[med-svn] r1124 - in trunk/packages/muscle: branches trunk trunk/debian
charles-guest at alioth.debian.org
charles-guest at alioth.debian.org
Sat Jan 12 07:30:43 UTC 2008
Author: charles-guest
Date: 2008-01-12 07:30:41 +0000 (Sat, 12 Jan 2008)
New Revision: 1124
Removed:
trunk/packages/muscle/branches/upstream/
trunk/packages/muscle/trunk/Makefile
trunk/packages/muscle/trunk/aligngivenpath.cpp
trunk/packages/muscle/trunk/aligngivenpathsw.cpp
trunk/packages/muscle/trunk/aligntwomsas.cpp
trunk/packages/muscle/trunk/aligntwoprofs.cpp
trunk/packages/muscle/trunk/aln.cpp
trunk/packages/muscle/trunk/alpha.cpp
trunk/packages/muscle/trunk/alpha.h
trunk/packages/muscle/trunk/anchors.cpp
trunk/packages/muscle/trunk/bittraceback.cpp
trunk/packages/muscle/trunk/blosumla.cpp
trunk/packages/muscle/trunk/clust.cpp
trunk/packages/muscle/trunk/clust.h
trunk/packages/muscle/trunk/cluster.cpp
trunk/packages/muscle/trunk/cluster.h
trunk/packages/muscle/trunk/clustset.h
trunk/packages/muscle/trunk/clustsetdf.h
trunk/packages/muscle/trunk/clustsetmsa.h
trunk/packages/muscle/trunk/clwwt.cpp
trunk/packages/muscle/trunk/color.cpp
trunk/packages/muscle/trunk/cons.cpp
trunk/packages/muscle/trunk/diaglist.cpp
trunk/packages/muscle/trunk/diaglist.h
trunk/packages/muscle/trunk/diffobjscore.cpp
trunk/packages/muscle/trunk/diffpaths.cpp
trunk/packages/muscle/trunk/difftrees.cpp
trunk/packages/muscle/trunk/difftreese.cpp
trunk/packages/muscle/trunk/distcalc.cpp
trunk/packages/muscle/trunk/distcalc.h
trunk/packages/muscle/trunk/distfunc.cpp
trunk/packages/muscle/trunk/distfunc.h
trunk/packages/muscle/trunk/distpwkimura.cpp
trunk/packages/muscle/trunk/domuscle.cpp
trunk/packages/muscle/trunk/dosp.cpp
trunk/packages/muscle/trunk/dpregionlist.h
trunk/packages/muscle/trunk/dpreglist.cpp
trunk/packages/muscle/trunk/dpreglist.h
trunk/packages/muscle/trunk/drawtree.cpp
trunk/packages/muscle/trunk/edgelist.cpp
trunk/packages/muscle/trunk/edgelist.h
trunk/packages/muscle/trunk/enumopts.cpp
trunk/packages/muscle/trunk/enumopts.h
trunk/packages/muscle/trunk/enums.h
trunk/packages/muscle/trunk/enumtostr.cpp
trunk/packages/muscle/trunk/estring.cpp
trunk/packages/muscle/trunk/estring.h
trunk/packages/muscle/trunk/fasta.cpp
trunk/packages/muscle/trunk/fasta2.cpp
trunk/packages/muscle/trunk/fastclust.cpp
trunk/packages/muscle/trunk/fastdist.cpp
trunk/packages/muscle/trunk/fastdistjones.cpp
trunk/packages/muscle/trunk/fastdistkbit.cpp
trunk/packages/muscle/trunk/fastdistkmer.cpp
trunk/packages/muscle/trunk/fastdistmafft.cpp
trunk/packages/muscle/trunk/fastdistnuc.cpp
trunk/packages/muscle/trunk/fastscorepath2.cpp
trunk/packages/muscle/trunk/finddiags.cpp
trunk/packages/muscle/trunk/finddiagsn.cpp
trunk/packages/muscle/trunk/gapscoredimer.h
trunk/packages/muscle/trunk/glbalign.cpp
trunk/packages/muscle/trunk/glbalign352.cpp
trunk/packages/muscle/trunk/glbaligndiag.cpp
trunk/packages/muscle/trunk/glbalignle.cpp
trunk/packages/muscle/trunk/glbalignsimple.cpp
trunk/packages/muscle/trunk/glbalignsp.cpp
trunk/packages/muscle/trunk/glbalignspn.cpp
trunk/packages/muscle/trunk/glbalignss.cpp
trunk/packages/muscle/trunk/glbalndimer.cpp
trunk/packages/muscle/trunk/globals.cpp
trunk/packages/muscle/trunk/globalslinux.cpp
trunk/packages/muscle/trunk/globalswin32.cpp
trunk/packages/muscle/trunk/gonnet.cpp
trunk/packages/muscle/trunk/gonnet.h
trunk/packages/muscle/trunk/gotowt.cpp
trunk/packages/muscle/trunk/henikoffweight.cpp
trunk/packages/muscle/trunk/henikoffweightpb.cpp
trunk/packages/muscle/trunk/html.cpp
trunk/packages/muscle/trunk/hydro.cpp
trunk/packages/muscle/trunk/intmath.cpp
trunk/packages/muscle/trunk/intmath.h
trunk/packages/muscle/trunk/local.cpp
trunk/packages/muscle/trunk/main.cpp
trunk/packages/muscle/trunk/makerootmsa.cpp
trunk/packages/muscle/trunk/makerootmsab.cpp
trunk/packages/muscle/trunk/mhack.cpp
trunk/packages/muscle/trunk/mk
trunk/packages/muscle/trunk/mpam200.cpp
trunk/packages/muscle/trunk/msa.cpp
trunk/packages/muscle/trunk/msa.h
trunk/packages/muscle/trunk/msa2.cpp
trunk/packages/muscle/trunk/msadist.h
trunk/packages/muscle/trunk/msadistkimura.cpp
trunk/packages/muscle/trunk/msf.cpp
trunk/packages/muscle/trunk/muscle.cpp
trunk/packages/muscle/trunk/muscle.h
trunk/packages/muscle/trunk/muscle.html
trunk/packages/muscle/trunk/muscleout.cpp
trunk/packages/muscle/trunk/nucmx.cpp
trunk/packages/muscle/trunk/nwdasimple.cpp
trunk/packages/muscle/trunk/nwdasimple2.cpp
trunk/packages/muscle/trunk/nwdasmall.cpp
trunk/packages/muscle/trunk/nwrec.cpp
trunk/packages/muscle/trunk/nwsmall.cpp
trunk/packages/muscle/trunk/objscore.cpp
trunk/packages/muscle/trunk/objscore.h
trunk/packages/muscle/trunk/objscore2.cpp
trunk/packages/muscle/trunk/objscoreda.cpp
trunk/packages/muscle/trunk/onexception.cpp
trunk/packages/muscle/trunk/options.cpp
trunk/packages/muscle/trunk/outweights.cpp
trunk/packages/muscle/trunk/pam200mafft.cpp
trunk/packages/muscle/trunk/params.cpp
trunk/packages/muscle/trunk/params.h
trunk/packages/muscle/trunk/phy.cpp
trunk/packages/muscle/trunk/phy2.cpp
trunk/packages/muscle/trunk/phy3.cpp
trunk/packages/muscle/trunk/phy4.cpp
trunk/packages/muscle/trunk/phyfromclust.cpp
trunk/packages/muscle/trunk/phyfromfile.cpp
trunk/packages/muscle/trunk/physeq.cpp
trunk/packages/muscle/trunk/phytofile.cpp
trunk/packages/muscle/trunk/posgap.cpp
trunk/packages/muscle/trunk/ppscore.cpp
trunk/packages/muscle/trunk/profdb.cpp
trunk/packages/muscle/trunk/profile.cpp
trunk/packages/muscle/trunk/profile.h
trunk/packages/muscle/trunk/profilefrommsa.cpp
trunk/packages/muscle/trunk/progalign.cpp
trunk/packages/muscle/trunk/progress.cpp
trunk/packages/muscle/trunk/progressivealign.cpp
trunk/packages/muscle/trunk/pwpath.cpp
trunk/packages/muscle/trunk/pwpath.h
trunk/packages/muscle/trunk/readmx.cpp
trunk/packages/muscle/trunk/realigndiffs.cpp
trunk/packages/muscle/trunk/realigndiffse.cpp
trunk/packages/muscle/trunk/refine.cpp
trunk/packages/muscle/trunk/refinehoriz.cpp
trunk/packages/muscle/trunk/refinesubfams.cpp
trunk/packages/muscle/trunk/refinetree.cpp
trunk/packages/muscle/trunk/refinetreee.cpp
trunk/packages/muscle/trunk/refinevert.cpp
trunk/packages/muscle/trunk/refinew.cpp
trunk/packages/muscle/trunk/savebest.cpp
trunk/packages/muscle/trunk/scoregaps.cpp
trunk/packages/muscle/trunk/scorehistory.cpp
trunk/packages/muscle/trunk/scorehistory.h
trunk/packages/muscle/trunk/scorepp.cpp
trunk/packages/muscle/trunk/seq.cpp
trunk/packages/muscle/trunk/seq.h
trunk/packages/muscle/trunk/seqvect.cpp
trunk/packages/muscle/trunk/seqvect.h
trunk/packages/muscle/trunk/setblosumweights.cpp
trunk/packages/muscle/trunk/setgscweights.cpp
trunk/packages/muscle/trunk/setnewhandler.cpp
trunk/packages/muscle/trunk/spfast.cpp
trunk/packages/muscle/trunk/sptest.cpp
trunk/packages/muscle/trunk/stabilize.cpp
trunk/packages/muscle/trunk/subfam.cpp
trunk/packages/muscle/trunk/subfams.cpp
trunk/packages/muscle/trunk/sw.cpp
trunk/packages/muscle/trunk/termgaps.cpp
trunk/packages/muscle/trunk/textfile.cpp
trunk/packages/muscle/trunk/textfile.h
trunk/packages/muscle/trunk/threewaywt.cpp
trunk/packages/muscle/trunk/timing.h
trunk/packages/muscle/trunk/traceback.cpp
trunk/packages/muscle/trunk/tracebackopt.cpp
trunk/packages/muscle/trunk/tracebacksw.cpp
trunk/packages/muscle/trunk/tree.h
trunk/packages/muscle/trunk/treefrommsa.cpp
trunk/packages/muscle/trunk/types.h
trunk/packages/muscle/trunk/typetostr.cpp
trunk/packages/muscle/trunk/unixio.h
trunk/packages/muscle/trunk/upgma2.cpp
trunk/packages/muscle/trunk/usage.cpp
trunk/packages/muscle/trunk/validateids.cpp
trunk/packages/muscle/trunk/vtml2.cpp
trunk/packages/muscle/trunk/writescorefile.cpp
Modified:
trunk/packages/muscle/trunk/debian/
Log:
we do not store upstream sources in our repository anymore
Deleted: trunk/packages/muscle/trunk/Makefile
===================================================================
--- trunk/packages/muscle/trunk/Makefile 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/Makefile 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,53 +0,0 @@
-# Porting notes:
-# For Solaris and other platforms where the logf function
-# is missing from the math library, add the following line
-# to the end of muscle.h:
-# #define logf(x) ((float) log(x))
-# Using -static increases the executable size and thus gives a very
-# small increase in start time, but is more portable (the binding
-# to dynamic libraries often breaks when a new library is released).
-# On OSX, using -static gives the error "ld: can't locate file for: -lcrt0.o",
-# this is fixed by deleting "-static" from the LDLIBS line.
-
-CFLAGS = -O2 -funroll-loops -Winline -DNDEBUG=1
-# LDLIBS = -lm -static
-LDLIBS = -lm
-
-OBJ = .o
-EXE =
-
-RM = rm -f
-CP = cp
-
-GPP = g++
-LD = $(GPP) $(CFLAGS)
-CPP = $(GPP) -c $(CFLAGS)
-
-all: muscle
-
-CPPSRC = $(sort $(wildcard *.cpp))
-CPPOBJ = $(subst .cpp,.o,$(CPPSRC))
-
-$(CPPOBJ): %.o: %.cpp
- $(CPP) $< -o $@
-
-muscle: $(CPPOBJ)
- $(LD) -o muscle $(CPPOBJ) $(LDLIBS)
-# dh_strip takes care of stripping
-# strip muscle
-
-DESTDIR=""
-
-install: muscle
- if [ ! -x $(DESTDIR)/usr/bin ]; then mkdir -p $(DESTDIR)/usr/bin; fi
- cp muscle $(DESTDIR)/usr/bin/muscle
-
-install-doc: muscle.html
- if [ ! -x $(DESTDIR)/usr/share/doc/muscle ]; then mkdir -p $(DESTDIR)/usr/share/doc/muscle; fi
- if [ ! -x $(DESTDIR)/usr/share/doc/muscle-doc ]; then mkdir -p $(DESTDIR)/usr/share/doc/muscle-doc; fi
- cp muscle.html $(DESTDIR)/usr/share/doc/muscle/
- (cd $(DESTDIR)/usr/share/doc/muscle-doc/ && ln -s ../muscle/muscle.html* . )
-
-clean:
- $(RM) *.o make.err make.out
- $(RM) muscle
Deleted: trunk/packages/muscle/trunk/aligngivenpath.cpp
===================================================================
--- trunk/packages/muscle/trunk/aligngivenpath.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/aligngivenpath.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,802 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "pwpath.h"
-#include "profile.h"
-
-#define TRACE 0
-
-static void LogPP(const ProfPos &PP)
- {
- Log("ResidueGroup %u\n", PP.m_uResidueGroup);
- Log("AllGaps %d\n", PP.m_bAllGaps);
- Log("Occ %.3g\n", PP.m_fOcc);
- Log("LL=%.3g LG=%.3g GL=%.3g GG=%.3g\n", PP.m_LL, PP.m_LG, PP.m_GL, PP.m_GG);
- Log("Freqs ");
- for (unsigned i = 0; i < 20; ++i)
- if (PP.m_fcCounts[i] > 0)
- Log("%c=%.3g ", LetterToChar(i), PP.m_fcCounts[i]);
- Log("\n");
- }
-
-static void AssertProfPosEq(const ProfPos *PA, const ProfPos *PB, unsigned i)
- {
- const ProfPos &PPA = PA[i];
- const ProfPos &PPB = PB[i];
-#define eq(x) if (PPA.m_##x != PPB.m_##x) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }
-#define be(x) if (!BTEq(PPA.m_##x, PPB.m_##x)) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }
- eq(bAllGaps)
- eq(uResidueGroup)
-
- be(LL)
- be(LG)
- be(GL)
- be(GG)
- be(fOcc)
- be(scoreGapOpen)
- be(scoreGapClose)
-
- for (unsigned j = 0; j < 20; ++j)
- {
-#define eqj(x) if (PPA.m_##x != PPB.m_##x) Quit("AssertProfPosEq j=%u " #x, j);
-#define bej(x) if (!BTEq(PPA.m_##x, PPB.m_##x)) Quit("AssertProfPosEq j=%u " #x, j);
- bej(fcCounts[j]);
-// eqj(uSortOrder[j]) // may differ due to ties, don't check?
- bej(AAScores[j])
-#undef eqj
-#undef bej
- }
-#undef eq
-#undef be
- }
-
-void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB)
- {
- if (uLengthA != uLengthB)
- Quit("AssertProfsEq: lengths differ %u %u", uLengthA, uLengthB);
- for (unsigned i = 0; i < uLengthB; ++i)
- AssertProfPosEq(PA, PB, i);
- }
-
-#if DEBUG
-static void ValidateProf(const ProfPos *Prof, unsigned uLength)
- {
- for (unsigned i = 0; i < uLength; ++i)
- {
- const ProfPos &PP = Prof[i];
-
- FCOUNT s1 = PP.m_LL + PP.m_LG + PP.m_GL + PP.m_GG;
- assert(BTEq(s1, 1.0));
-
- if (i > 0)
- {
- const ProfPos &PPPrev = Prof[i-1];
- FCOUNT s2 = PPPrev.m_LL + PPPrev.m_GL;
- FCOUNT s3 = PP.m_LL + PP.m_LG;
- assert(BTEq(s2, s3));
- }
- if (i < uLength - 1)
- {
- const ProfPos &PPNext = Prof[i+1];
- FCOUNT s4 = PP.m_LL + PP.m_GL;
- FCOUNT s5 = PPNext.m_LL + PPNext.m_LG;
- assert(BTEq(s4, s5));
- }
- }
- }
-#else
-#define ValidateProf(Prof, Length) /* empty */
-#endif
-
-static void ScoresFromFreqsPos(ProfPos *Prof, unsigned uLength, unsigned uPos)
- {
- ProfPos &PP = Prof[uPos];
- SortCounts(PP.m_fcCounts, PP.m_uSortOrder);
- PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts);
-
-// "Occupancy"
- PP.m_fOcc = PP.m_LL + PP.m_GL;
-
-// Frequency of gap-opens in this position (i)
-// Gap open = letter in i-1 and gap in i
-// = iff LG in i
- FCOUNT fcOpen = PP.m_LG;
-
-// Frequency of gap-closes in this position
-// Gap close = gap in i and letter in i+1
-// = iff GL in i+1
- FCOUNT fcClose;
- if (uPos + 1 < uLength)
- fcClose = Prof[uPos + 1].m_GL;
- else
- fcClose = PP.m_GG + PP.m_LG;
-
- PP.m_scoreGapOpen = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen/2.0);
- PP.m_scoreGapClose = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen/2.0);
-#if DOUBLE_AFFINE
- PP.m_scoreGapOpen2 = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen2/2.0);
- PP.m_scoreGapClose2 = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen2/2.0);
-#endif
-
- for (unsigned i = 0; i < g_AlphaSize; ++i)
- {
- SCORE scoreSum = 0;
- for (unsigned j = 0; j < g_AlphaSize; ++j)
- scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j];
- PP.m_AAScores[i] = scoreSum;
- }
- }
-
-void ProfScoresFromFreqs(ProfPos *Prof, unsigned uLength)
- {
- for (unsigned i = 0; i < uLength; ++i)
- ScoresFromFreqsPos(Prof, uLength, i);
- }
-
-static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,
- unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
- unsigned &uColIndexCombined)
- {
-#if TRACE
- Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",
- uColIndexA, uColIndexCombined);
-#endif
- for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
- {
- char c = msaA.GetChar(uSeqIndexA, uColIndexA);
- msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
- }
-
- for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
- msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');
-
- ++uColIndexCombined;
- ++uColIndexA;
- }
-
-static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,
- unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
- unsigned &uColIndexCombined)
- {
-#if TRACE
- Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",
- uColIndexB, uColIndexCombined);
-#endif
- for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
- msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');
-
- for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
- {
- char c = msaB.GetChar(uSeqIndexB, uColIndexB);
- msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
- }
-
- ++uColIndexCombined;
- ++uColIndexB;
- }
-
-static void AppendTplInserts(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
- const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
- unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
- {
-#if TRACE
- Log("AppendTplInserts ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
- uColIndexA, uColIndexB, uColIndexCombined);
-#endif
- const unsigned uLengthA = msaA.GetColCount();
- const unsigned uLengthB = msaB.GetColCount();
-
- unsigned uNewColCount = uColCountA;
- if (uColCountB > uNewColCount)
- uNewColCount = uColCountB;
-
- for (unsigned n = 0; n < uColCountA; ++n)
- {
- for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
- {
- char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
- c = UnalignChar(c);
- msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
- }
- }
- for (unsigned n = uColCountA; n < uNewColCount; ++n)
- {
- for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
- msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
- }
-
- for (unsigned n = 0; n < uColCountB; ++n)
- {
- for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
- {
- char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
- c = UnalignChar(c);
- msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
- }
- }
- for (unsigned n = uColCountB; n < uNewColCount; ++n)
- {
- for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
- msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
- }
-
- uColIndexCombined += uNewColCount;
- uColIndexA += uColCountA;
- uColIndexB += uColCountB;
- }
-
-static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,
- unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,
- MSA &msaCombined, unsigned &uColIndexCombined)
- {
-#if TRACE
- Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
- uColIndexA, uColIndexB, uColIndexCombined);
-#endif
-
- for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
- {
- char c = msaA.GetChar(uSeqIndexA, uColIndexA);
- msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
- }
-
- for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
- {
- char c = msaB.GetChar(uSeqIndexB, uColIndexB);
- msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
- }
-
- ++uColIndexA;
- ++uColIndexB;
- ++uColIndexCombined;
- }
-
-void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,
- MSA &msaCombined)
- {
- msaCombined.Clear();
-
-#if TRACE
- Log("FastAlignProfiles\n");
- Log("Template A:\n");
- msaA.LogMe();
- Log("Template B:\n");
- msaB.LogMe();
-#endif
-
- const unsigned uColCountA = msaA.GetColCount();
- const unsigned uColCountB = msaB.GetColCount();
-
- const unsigned uSeqCountA = msaA.GetSeqCount();
- const unsigned uSeqCountB = msaB.GetSeqCount();
-
- msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);
-
-// Copy sequence names into combined MSA
- for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
- {
- msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
- msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
- }
-
- for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
- {
- msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
- msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
- }
-
- unsigned uColIndexA = 0;
- unsigned uColIndexB = 0;
- unsigned uColIndexCombined = 0;
- const unsigned uEdgeCount = Path.GetEdgeCount();
- for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
-#if TRACE
- Log("\nEdge %u %c%u.%u\n",
- uEdgeIndex,
- Edge.cType,
- Edge.uPrefixLengthA,
- Edge.uPrefixLengthB);
-#endif
- const char cType = Edge.cType;
- const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
- unsigned uColCountA = 0;
- if (uPrefixLengthA > 0)
- {
- const unsigned uNodeIndexA = uPrefixLengthA - 1;
- const unsigned uTplColIndexA = uNodeIndexA;
- if (uTplColIndexA > uColIndexA)
- uColCountA = uTplColIndexA - uColIndexA;
- }
-
- const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
- unsigned uColCountB = 0;
- if (uPrefixLengthB > 0)
- {
- const unsigned uNodeIndexB = uPrefixLengthB - 1;
- const unsigned uTplColIndexB = uNodeIndexB;
- if (uTplColIndexB > uColIndexB)
- uColCountB = uTplColIndexB - uColIndexB;
- }
-
-// TODO: This code looks like a hangover from HMM estimation -- can we delete it?
- assert(uColCountA == 0);
- assert(uColCountB == 0);
- AppendTplInserts(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
- uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
-
- switch (cType)
- {
- case 'M':
- {
- assert(uPrefixLengthA > 0);
- assert(uPrefixLengthB > 0);
- const unsigned uColA = uPrefixLengthA - 1;
- const unsigned uColB = uPrefixLengthB - 1;
- assert(uColIndexA == uColA);
- assert(uColIndexB == uColB);
- AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
- msaCombined, uColIndexCombined);
- break;
- }
- case 'D':
- {
- assert(uPrefixLengthA > 0);
- const unsigned uColA = uPrefixLengthA - 1;
- assert(uColIndexA == uColA);
- AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
- break;
- }
- case 'I':
- {
- assert(uPrefixLengthB > 0);
- const unsigned uColB = uPrefixLengthB - 1;
- assert(uColIndexB == uColB);
- AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
- break;
- }
- default:
- assert(false);
- }
- }
- unsigned uInsertColCountA = uColCountA - uColIndexA;
- unsigned uInsertColCountB = uColCountB - uColIndexB;
-
-// TODO: This code looks like a hangover from HMM estimation -- can we delete it?
- assert(uInsertColCountA == 0);
- assert(uInsertColCountB == 0);
- AppendTplInserts(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
- uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
-
- assert(msaCombined.GetColCount() == uEdgeCount);
- }
-
-static const ProfPos PPStart =
- {
- false, //m_bAllGaps;
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_uSortOrder[21];
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_fcCounts[20];
- 1.0, // m_LL;
- 0.0, // m_LG;
- 0.0, // m_GL;
- 0.0, // m_GG;
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_ALScores
- 0, // m_uResidueGroup;
- 1.0, // m_fOcc;
- 0.0, // m_fcStartOcc;
- 0.0, // m_fcEndOcc;
- 0.0, // m_scoreGapOpen;
- 0.0, // m_scoreGapClose;
- };
-
-// MM
-// Ai1 Ai Out
-// X X LL LL
-// X - LG LG
-// - X GL GL
-// - - GG GG
-//
-// Bj1 Bj
-// X X LL LL
-// X - LG LG
-// - X GL GL
-// - - GG GG
-static void SetGapsMM(
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
- ProfPos *POut, unsigned uColIndexOut)
- {
- const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
- const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
- ProfPos &PPO = POut[uColIndexOut];
-
- PPO.m_LL = wA*PPA.m_LL + wB*PPB.m_LL;
- PPO.m_LG = wA*PPA.m_LG + wB*PPB.m_LG;
- PPO.m_GL = wA*PPA.m_GL + wB*PPB.m_GL;
- PPO.m_GG = wA*PPA.m_GG + wB*PPB.m_GG;
- }
-
-// MD
-// Ai1 Ai Out
-// X X LL LL
-// X - LG LG
-// - X GL GL
-// - - GG GG
-//
-// Bj (-)
-// X - ?L LG
-// - - ?G GG
-static void SetGapsMD(
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
- ProfPos *POut, unsigned uColIndexOut)
- {
- const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
- const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
- ProfPos &PPO = POut[uColIndexOut];
-
- PPO.m_LL = wA*PPA.m_LL;
- PPO.m_LG = wA*PPA.m_LG + wB*(PPB.m_LL + PPB.m_GL);
- PPO.m_GL = wA*PPA.m_GL;
- PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
- }
-
-// DD
-// Ai1 Ai Out
-// X X LL LL
-// X - LG LG
-// - X GL GL
-// - - GG GG
-//
-// (-) (-)
-// - - ?? GG
-static void SetGapsDD(
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
- ProfPos *POut, unsigned uColIndexOut)
- {
- const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
- ProfPos &PPO = POut[uColIndexOut];
-
- PPO.m_LL = wA*PPA.m_LL;
- PPO.m_LG = wA*PPA.m_LG;
- PPO.m_GL = wA*PPA.m_GL;
- PPO.m_GG = wA*PPA.m_GG + wB;
- }
-
-// MI
-// Ai (-) Out
-// X - ?L LG
-// - - ?G GG
-
-// Bj1 Bj
-// X X LL LL
-// X - LG LG
-// - X GL GL
-// - - GG GG
-static void SetGapsMI(
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
- ProfPos *POut, unsigned uColIndexOut)
- {
- const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
- const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
- ProfPos &PPO = POut[uColIndexOut];
-
- PPO.m_LL = wB*PPB.m_LL;
- PPO.m_LG = wB*PPB.m_LG + wA*(PPA.m_LL + PPA.m_GL);
- PPO.m_GL = wB*PPB.m_GL;
- PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
- }
-
-// DM
-// Ai1 Ai Out
-// X X LL LL
-// X - LG LG
-// - X GL GL
-// - - GG GG
-//
-// (-) Bj
-// - X ?L GL
-// - - ?G GG
-static void SetGapsDM(
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
- ProfPos *POut, unsigned uColIndexOut)
- {
- const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
- const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
- ProfPos &PPO = POut[uColIndexOut];
-
- PPO.m_LL = wA*PPA.m_LL;
- PPO.m_LG = wA*PPA.m_LG;
- PPO.m_GL = wA*PPA.m_GL + wB*(PPB.m_LL + PPB.m_GL);
- PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
- }
-
-// IM
-// (-) Ai Out
-// - X ?L GL
-// - - ?G GG
-
-// Bj1 Bj
-// X X LL LL
-// X - LG LG
-// - X GL GL
-// - - GG GG
-static void SetGapsIM(
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
- ProfPos *POut, unsigned uColIndexOut)
- {
- const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
- const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
- ProfPos &PPO = POut[uColIndexOut];
-
- PPO.m_LL = wB*PPB.m_LL;
- PPO.m_LG = wB*PPB.m_LG;
- PPO.m_GL = wB*PPB.m_GL + wA*(PPA.m_LL + PPA.m_GL);
- PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
- }
-
-// ID
-// (-) Ai Out
-// - X ?L GL
-// - - ?G GG
-
-// Bj (-)
-// X - ?L LG
-// - - ?G GG
-static void SetGapsID(
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
- ProfPos *POut, unsigned uColIndexOut)
- {
- const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
- const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
- ProfPos &PPO = POut[uColIndexOut];
-
- PPO.m_LL = 0;
- PPO.m_LG = wB*PPB.m_GL + wB*PPB.m_LL;
- PPO.m_GL = wA*PPA.m_GL + wA*PPA.m_LL;
- PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
- }
-
-// DI
-// Ai (-) Out
-// X - ?L LG
-// - - ?G GG
-
-// (-) Bj
-// - X ?L GL
-// - - ?G GG
-static void SetGapsDI(
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
- ProfPos *POut, unsigned uColIndexOut)
- {
- const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
- const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
- ProfPos &PPO = POut[uColIndexOut];
-
- PPO.m_LL = 0;
- PPO.m_LG = wA*PPA.m_GL + wA*PPA.m_LL;
- PPO.m_GL = wB*PPB.m_GL + wB*PPB.m_LL;
- PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
- }
-
-// II
-// (-) (-) Out
-// - - ?? GG
-
-// Bj1 Bj
-// X X LL LL
-// X - LG LG
-// - X GL GL
-// - - GG GG
-static void SetGapsII(
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
- ProfPos *POut, unsigned uColIndexOut)
- {
- const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
- ProfPos &PPO = POut[uColIndexOut];
-
- PPO.m_LL = wB*PPB.m_LL;
- PPO.m_LG = wB*PPB.m_LG;
- PPO.m_GL = wB*PPB.m_GL;
- PPO.m_GG = wB*PPB.m_GG + wA;
- }
-
-static void SetFreqs(
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
- ProfPos *POut, unsigned uColIndexOut)
- {
- const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
- const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
- ProfPos &PPO = POut[uColIndexOut];
-
- if (g_bNormalizeCounts)
- {
- const FCOUNT fA = PPA.m_fOcc*wA/(wA + wB);
- const FCOUNT fB = PPB.m_fOcc*wB/(wA + wB);
- FCOUNT fTotal = 0;
- for (unsigned i = 0; i < 20; ++i)
- {
- const FCOUNT f = fA*PPA.m_fcCounts[i] + fB*PPB.m_fcCounts[i];
- PPO.m_fcCounts[i] = f;
- fTotal += f;
- }
- if (fTotal > 0)
- for (unsigned i = 0; i < 20; ++i)
- PPO.m_fcCounts[i] /= fTotal;
- }
- else
- {
- for (unsigned i = 0; i < 20; ++i)
- PPO.m_fcCounts[i] = wA*PPA.m_fcCounts[i] + wB*PPB.m_fcCounts[i];
- }
- }
-
-void AlignTwoProfsGivenPath(const PWPath &Path,
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
- ProfPos **ptrPOut, unsigned *ptruLengthOut)
- {
-#if TRACE
- Log("AlignTwoProfsGivenPath wA=%.3g wB=%.3g Path=\n", wA, wB);
- Path.LogMe();
-#endif
- assert(BTEq(wA + wB, 1.0));
-
- unsigned uColIndexA = 0;
- unsigned uColIndexB = 0;
- unsigned uColIndexOut = 0;
- const unsigned uEdgeCount = Path.GetEdgeCount();
- ProfPos *POut = new ProfPos[uEdgeCount];
- char cPrevType = 'M';
- for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
- const char cType = Edge.cType;
-
- const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
- const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
-
-#if TRACE
- Log("\nEdge %u %c%u.%u ColA=%u ColB=%u\n",
- uEdgeIndex,
- Edge.cType,
- Edge.uPrefixLengthA,
- Edge.uPrefixLengthB,
- uColIndexA,
- uColIndexB);
-#endif
-
- POut[uColIndexOut].m_bAllGaps = false;
- switch (cType)
- {
- case 'M':
- {
- assert(uPrefixLengthA > 0);
- assert(uPrefixLengthB > 0);
- SetFreqs(
- PA, uPrefixLengthA, wA,
- PB, uPrefixLengthB, wB,
- POut, uColIndexOut);
- switch (cPrevType)
- {
- case 'M':
- SetGapsMM(
- PA, uPrefixLengthA, wA,
- PB, uPrefixLengthB, wB,
- POut, uColIndexOut);
- break;
- case 'D':
- SetGapsDM(
- PA, uPrefixLengthA, wA,
- PB, uPrefixLengthB, wB,
- POut, uColIndexOut);
- break;
- case 'I':
- SetGapsIM(
- PA, uPrefixLengthA, wA,
- PB, uPrefixLengthB, wB,
- POut, uColIndexOut);
- break;
- default:
- Quit("Bad cPrevType");
- }
- ++uColIndexA;
- ++uColIndexB;
- ++uColIndexOut;
- break;
- }
- case 'D':
- {
- assert(uPrefixLengthA > 0);
- SetFreqs(
- PA, uPrefixLengthA, wA,
- PB, uPrefixLengthB, 0,
- POut, uColIndexOut);
- switch (cPrevType)
- {
- case 'M':
- SetGapsMD(
- PA, uPrefixLengthA, wA,
- PB, uPrefixLengthB, wB,
- POut, uColIndexOut);
- break;
- case 'D':
- SetGapsDD(
- PA, uPrefixLengthA, wA,
- PB, uPrefixLengthB, wB,
- POut, uColIndexOut);
- break;
- case 'I':
- SetGapsID(
- PA, uPrefixLengthA, wA,
- PB, uPrefixLengthB, wB,
- POut, uColIndexOut);
- break;
- default:
- Quit("Bad cPrevType");
- }
- ++uColIndexA;
- ++uColIndexOut;
- break;
- }
- case 'I':
- {
- assert(uPrefixLengthB > 0);
- SetFreqs(
- PA, uPrefixLengthA, 0,
- PB, uPrefixLengthB, wB,
- POut, uColIndexOut);
- switch (cPrevType)
- {
- case 'M':
- SetGapsMI(
- PA, uPrefixLengthA, wA,
- PB, uPrefixLengthB, wB,
- POut, uColIndexOut);
- break;
- case 'D':
- SetGapsDI(
- PA, uPrefixLengthA, wA,
- PB, uPrefixLengthB, wB,
- POut, uColIndexOut);
- break;
- case 'I':
- SetGapsII(
- PA, uPrefixLengthA, wA,
- PB, uPrefixLengthB, wB,
- POut, uColIndexOut);
- break;
- default:
- Quit("Bad cPrevType");
- }
- ++uColIndexB;
- ++uColIndexOut;
- break;
- }
- default:
- assert(false);
- }
- cPrevType = cType;
- }
- assert(uColIndexOut == uEdgeCount);
-
- ProfScoresFromFreqs(POut, uEdgeCount);
- ValidateProf(POut, uEdgeCount);
-
- *ptrPOut = POut;
- *ptruLengthOut = uEdgeCount;
-
-#if TRACE
- Log("AlignTwoProfsGivenPath:\n");
- ListProfile(POut, uEdgeCount, 0);
-#endif
- }
Deleted: trunk/packages/muscle/trunk/aligngivenpathsw.cpp
===================================================================
--- trunk/packages/muscle/trunk/aligngivenpathsw.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/aligngivenpathsw.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,237 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "pwpath.h"
-#include "profile.h"
-
-#define TRACE 0
-
-static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,
- unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
- unsigned &uColIndexCombined)
- {
-#if TRACE
- Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",
- uColIndexA, uColIndexCombined);
-#endif
- for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
- {
- char c = msaA.GetChar(uSeqIndexA, uColIndexA);
- msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
- }
-
- for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
- msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');
-
- ++uColIndexCombined;
- ++uColIndexA;
- }
-
-static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,
- unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
- unsigned &uColIndexCombined)
- {
-#if TRACE
- Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",
- uColIndexB, uColIndexCombined);
-#endif
- for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
- msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');
-
- for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
- {
- char c = msaB.GetChar(uSeqIndexB, uColIndexB);
- msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
- }
-
- ++uColIndexCombined;
- ++uColIndexB;
- }
-
-static void AppendUnalignedTerminals(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
- const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
- unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
- {
-#if TRACE
- Log("AppendUnalignedTerminals ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
- uColIndexA, uColIndexB, uColIndexCombined);
-#endif
- const unsigned uLengthA = msaA.GetColCount();
- const unsigned uLengthB = msaB.GetColCount();
-
- unsigned uNewColCount = uColCountA;
- if (uColCountB > uNewColCount)
- uNewColCount = uColCountB;
-
- for (unsigned n = 0; n < uColCountA; ++n)
- {
- for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
- {
- char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
- c = UnalignChar(c);
- msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
- }
- }
- for (unsigned n = uColCountA; n < uNewColCount; ++n)
- {
- for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
- msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
- }
-
- for (unsigned n = 0; n < uColCountB; ++n)
- {
- for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
- {
- char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
- c = UnalignChar(c);
- msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
- }
- }
- for (unsigned n = uColCountB; n < uNewColCount; ++n)
- {
- for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
- msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
- }
-
- uColIndexCombined += uNewColCount;
- uColIndexA += uColCountA;
- uColIndexB += uColCountB;
- }
-
-static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,
- unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,
- MSA &msaCombined, unsigned &uColIndexCombined)
- {
-#if TRACE
- Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
- uColIndexA, uColIndexB, uColIndexCombined);
-#endif
-
- for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
- {
- char c = msaA.GetChar(uSeqIndexA, uColIndexA);
- msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
- }
-
- for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
- {
- char c = msaB.GetChar(uSeqIndexB, uColIndexB);
- msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
- }
-
- ++uColIndexA;
- ++uColIndexB;
- ++uColIndexCombined;
- }
-
-void AlignTwoMSAsGivenPathSW(const PWPath &Path, const MSA &msaA, const MSA &msaB,
- MSA &msaCombined)
- {
- msaCombined.Clear();
-
-#if TRACE
- Log("AlignTwoMSAsGivenPathSW\n");
- Log("Template A:\n");
- msaA.LogMe();
- Log("Template B:\n");
- msaB.LogMe();
-#endif
-
- const unsigned uColCountA = msaA.GetColCount();
- const unsigned uColCountB = msaB.GetColCount();
-
- const unsigned uSeqCountA = msaA.GetSeqCount();
- const unsigned uSeqCountB = msaB.GetSeqCount();
-
- msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);
-
-// Copy sequence names into combined MSA
- for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
- {
- msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
- msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
- }
-
- for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
- {
- msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
- msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
- }
-
- unsigned uColIndexA = 0;
- unsigned uColIndexB = 0;
- unsigned uColIndexCombined = 0;
- const unsigned uEdgeCount = Path.GetEdgeCount();
- for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
-#if TRACE
- Log("\nEdge %u %c%u.%u\n",
- uEdgeIndex,
- Edge.cType,
- Edge.uPrefixLengthA,
- Edge.uPrefixLengthB);
-#endif
- const char cType = Edge.cType;
- const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
- unsigned uColCountA = 0;
- if (uPrefixLengthA > 0)
- {
- const unsigned uNodeIndexA = uPrefixLengthA - 1;
- const unsigned uTplColIndexA = uNodeIndexA;
- if (uTplColIndexA > uColIndexA)
- uColCountA = uTplColIndexA - uColIndexA;
- }
-
- const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
- unsigned uColCountB = 0;
- if (uPrefixLengthB > 0)
- {
- const unsigned uNodeIndexB = uPrefixLengthB - 1;
- const unsigned uTplColIndexB = uNodeIndexB;
- if (uTplColIndexB > uColIndexB)
- uColCountB = uTplColIndexB - uColIndexB;
- }
-
- AppendUnalignedTerminals(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
- uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
-
- switch (cType)
- {
- case 'M':
- {
- assert(uPrefixLengthA > 0);
- assert(uPrefixLengthB > 0);
- const unsigned uColA = uPrefixLengthA - 1;
- const unsigned uColB = uPrefixLengthB - 1;
- assert(uColIndexA == uColA);
- assert(uColIndexB == uColB);
- AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
- msaCombined, uColIndexCombined);
- break;
- }
- case 'D':
- {
- assert(uPrefixLengthA > 0);
- const unsigned uColA = uPrefixLengthA - 1;
- assert(uColIndexA == uColA);
- AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
- break;
- }
- case 'I':
- {
- assert(uPrefixLengthB > 0);
- const unsigned uColB = uPrefixLengthB - 1;
- assert(uColIndexB == uColB);
- AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
- break;
- }
- default:
- assert(false);
- }
- }
- unsigned uInsertColCountA = uColCountA - uColIndexA;
- unsigned uInsertColCountB = uColCountB - uColIndexB;
-
- AppendUnalignedTerminals(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
- uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
- }
Deleted: trunk/packages/muscle/trunk/aligntwomsas.cpp
===================================================================
--- trunk/packages/muscle/trunk/aligntwomsas.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/aligntwomsas.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,41 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "profile.h"
-#include "pwpath.h"
-#include "textfile.h"
-#include "timing.h"
-
-SCORE AlignTwoMSAs(const MSA &msa1, const MSA &msa2, MSA &msaOut, PWPath &Path,
- bool bLockLeft, bool bLockRight)
- {
- const unsigned uLengthA = msa1.GetColCount();
- const unsigned uLengthB = msa2.GetColCount();
-
- ProfPos *PA = ProfileFromMSA(msa1);
- ProfPos *PB = ProfileFromMSA(msa2);
-
- if (bLockLeft)
- {
- PA[0].m_scoreGapOpen = MINUS_INFINITY;
- PB[0].m_scoreGapOpen = MINUS_INFINITY;
- }
-
- if (bLockRight)
- {
- PA[uLengthA-1].m_scoreGapClose = MINUS_INFINITY;
- PB[uLengthB-1].m_scoreGapClose = MINUS_INFINITY;
- }
-
- float r = (float) uLengthA/ (float) (uLengthB + 1); // +1 to prevent div 0
- if (r < 1)
- r = 1/r;
-
- SCORE Score = GlobalAlign(PA, uLengthA, PB, uLengthB, Path);
-
- AlignTwoMSAsGivenPath(Path, msa1, msa2, msaOut);
-
- delete[] PA;
- delete[] PB;
-
- return Score;
- }
Deleted: trunk/packages/muscle/trunk/aligntwoprofs.cpp
===================================================================
--- trunk/packages/muscle/trunk/aligntwoprofs.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/aligntwoprofs.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,31 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "profile.h"
-#include "pwpath.h"
-
-SCORE GlobalAlign4(ProfPos *PA, unsigned uLengthA, ProfPos *PB,
- unsigned uLengthB, PWPath &Path);
-
-SCORE AlignTwoProfs(
- const ProfPos *PA, unsigned uLengthA, WEIGHT wA,
- const ProfPos *PB, unsigned uLengthB, WEIGHT wB,
- PWPath &Path, ProfPos **ptrPout, unsigned *ptruLengthOut)
- {
- assert(uLengthA < 100000);
- assert(uLengthB < 100000);
-
- float r = (float) uLengthA/ (float) (uLengthB + 1); // +1 to prevent div 0
- if (r < 1)
- r = 1/r;
-
- SCORE Score = GlobalAlign(PA, uLengthA, PB, uLengthB, Path);
-
- AlignTwoProfsGivenPath(Path, PA, uLengthB, wA/(wA + wB), PB, uLengthB, wB/(wA + wB),
- ptrPout, ptruLengthOut);
-
-#if HYDRO
- if (ALPHA_Amino == g_Alpha)
- Hydro(*ptrPout, *ptruLengthOut);
-#endif
- return Score;
- }
Deleted: trunk/packages/muscle/trunk/aln.cpp
===================================================================
--- trunk/packages/muscle/trunk/aln.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/aln.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,170 +0,0 @@
-#include "muscle.h"
-#include <stdio.h>
-#include <ctype.h>
-#include "msa.h"
-#include "textfile.h"
-
-const unsigned uCharsPerLine = 60;
-const int MIN_NAME = 10;
-const int MAX_NAME = 32;
-
-static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex);
-
-void MSA::ToAlnFile(TextFile &File) const
- {
- if (g_bClwStrict)
- File.PutString("CLUSTAL W (1.81) multiple sequence alignment\n");
- else
- {
- File.PutString("MUSCLE ("
- MUSCLE_MAJOR_VERSION "." MUSCLE_MINOR_VERSION ")"
- " multiple sequence alignment\n");
- File.PutString("\n");
- }
-
- int iLongestNameLength = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
- {
- const char *ptrName = GetSeqName(uSeqIndex);
- const char *ptrBlank = strchr(ptrName, ' ');
- int iLength;
- if (0 != ptrBlank)
- iLength = (int) (ptrBlank - ptrName);
- else
- iLength = (int) strlen(ptrName);
- if (iLength > iLongestNameLength)
- iLongestNameLength = iLength;
- }
- if (iLongestNameLength > MAX_NAME)
- iLongestNameLength = MAX_NAME;
- if (iLongestNameLength < MIN_NAME)
- iLongestNameLength = MIN_NAME;
-
- unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;
- for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)
- {
- File.PutString("\n");
- unsigned uStartColIndex = uLineIndex*uCharsPerLine;
- unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;
- if (uEndColIndex >= GetColCount())
- uEndColIndex = GetColCount() - 1;
- char Name[MAX_NAME+1];
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
- {
- const char *ptrName = GetSeqName(uSeqIndex);
- const char *ptrBlank = strchr(ptrName, ' ');
- int iLength;
- if (0 != ptrBlank)
- iLength = (int) (ptrBlank - ptrName);
- else
- iLength = (int) strlen(ptrName);
- if (iLength > MAX_NAME)
- iLength = MAX_NAME;
- memset(Name, ' ', MAX_NAME);
- memcpy(Name, ptrName, iLength);
- Name[iLongestNameLength] = 0;
-
- File.PutFormat("%s ", Name);
- for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
- ++uColIndex)
- {
- const char c = GetChar(uSeqIndex, uColIndex);
- File.PutFormat("%c", toupper(c));
- }
- File.PutString("\n");
- }
-
- memset(Name, ' ', MAX_NAME);
- Name[iLongestNameLength] = 0;
- File.PutFormat("%s ", Name);
- for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
- ++uColIndex)
- {
- const char c = GetAlnConsensusChar(*this, uColIndex);
- File.PutChar(c);
- }
- File.PutString("\n");
- }
- }
-
-static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex)
- {
- const unsigned uSeqCount = a.GetSeqCount();
- unsigned BitMap = 0;
- unsigned Count = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- unsigned uLetter = a.GetLetterEx(uSeqIndex, uColIndex);
- assert(uLetter < 32);
- unsigned Bit = (1 << uLetter);
- if (!(BitMap & Bit))
- ++Count;
- BitMap |= Bit;
- }
-
-// '*' indicates positions which have a single, fully conserved residue
- if (1 == Count)
- return '*';
-
- if (ALPHA_Amino != g_Alpha)
- return ' ';
-
-#define B(a) (1 << AX_##a)
-#define S2(a, b) S(B(a) | B(b))
-#define S3(a, b, c) S(B(a) | B(b) | B(c))
-#define S4(a, b, c, d) S(B(a) | B(b) | B(c) | B(d))
-#define S(w) if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return ':';
-
-#define W3(a, b, c) W(B(a) | B(b) | B(c))
-#define W4(a, b, c, d) W(B(a) | B(b) | B(c) | B(d))
-#define W5(a, b, c, d, e) W(B(a) | B(b) | B(c) | B(d) | B(e))
-#define W6(a, b, c, d, e, f) W(B(a) | B(b) | B(c) | B(d) | B(e) | B(f))
-#define W(w) if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return '.';
-
-// ':' indicates that one of the following 'strong'
-// groups is fully conserved
-// STA
-// NEQK
-// NHQK
-// NDEQ
-// QHRK
-// MILV
-// MILF
-// HY
-// FYW
-//
- S3(S, T, A)
- S4(N, E, Q, K)
- S4(N, H, Q, K)
- S4(N, D, E, Q)
- S4(M, I, L, V)
- S4(M, I, L, F)
- S2(H, Y)
- S3(F, Y, W)
-
-// '.' indicates that one of the following 'weaker'
-// groups is fully conserved
-// CSA
-// ATV
-// SAG
-// STNK
-// STPA
-// SGND
-// SNDEQK
-// NDEQHK
-// NEQHRK
-// FVLIM
-// HFY
- W3(C, S, A)
- W3(A, T, V)
- W3(S, A, G)
- W4(S, T, N, K)
- W4(S, T, P, A)
- W4(S, G, N, D)
- W6(S, N, D, E, Q, K)
- W6(N, W, Q, H, R, K)
- W5(F, V, L, I, M)
- W3(H, F, Y)
-
- return ' ';
- }
Deleted: trunk/packages/muscle/trunk/alpha.cpp
===================================================================
--- trunk/packages/muscle/trunk/alpha.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/alpha.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,283 +0,0 @@
-#include "muscle.h"
-#include <ctype.h>
-
-/***
-From Bioperl docs:
-Extended DNA / RNA alphabet
-------------------------------------------
-Symbol Meaning Nucleic Acid
-------------------------------------------
- A A Adenine
- C C Cytosine
- G G Guanine
- T T Thymine
- U U Uracil
- M A or C
- R A or G
- W A or T
- S C or G
- Y C or T
- K G or T
- V A or C or G
- H A or C or T
- D A or G or T
- B C or G or T
- X G or A or T or C
- N G or A or T or C
-
-IUPAC-IUB SYMBOLS FOR NUCLEOTIDE NOMENCLATURE:
- Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030.
-***/
-
-unsigned g_CharToLetter[MAX_CHAR];
-unsigned g_CharToLetterEx[MAX_CHAR];
-
-char g_LetterToChar[MAX_ALPHA];
-char g_LetterExToChar[MAX_ALPHA_EX];
-
-char g_UnalignChar[MAX_CHAR];
-char g_AlignChar[MAX_CHAR];
-
-bool g_IsWildcardChar[MAX_CHAR];
-bool g_IsResidueChar[MAX_CHAR];
-
-ALPHA g_Alpha = ALPHA_Undefined;
-unsigned g_AlphaSize = 0;
-
-#define Res(c, Letter) \
- { \
- const unsigned char Upper = (unsigned char) toupper(c); \
- const unsigned char Lower = (unsigned char) tolower(c); \
- g_CharToLetter[Upper] = Letter; \
- g_CharToLetter[Lower] = Letter; \
- g_CharToLetterEx[Upper] = Letter; \
- g_CharToLetterEx[Lower] = Letter; \
- g_LetterToChar[Letter] = Upper; \
- g_LetterExToChar[Letter] = Upper; \
- g_IsResidueChar[Upper] = true; \
- g_IsResidueChar[Lower] = true; \
- g_AlignChar[Upper] = Upper; \
- g_AlignChar[Lower] = Upper; \
- g_UnalignChar[Upper] = Lower; \
- g_UnalignChar[Lower] = Lower; \
- }
-
-#define Wild(c, Letter) \
- { \
- const unsigned char Upper = (unsigned char) toupper(c); \
- const unsigned char Lower = (unsigned char) tolower(c); \
- g_CharToLetterEx[Upper] = Letter; \
- g_CharToLetterEx[Lower] = Letter; \
- g_LetterExToChar[Letter] = Upper; \
- g_IsResidueChar[Upper] = true; \
- g_IsResidueChar[Lower] = true; \
- g_AlignChar[Upper] = Upper; \
- g_AlignChar[Lower] = Upper; \
- g_UnalignChar[Upper] = Lower; \
- g_UnalignChar[Lower] = Lower; \
- g_IsWildcardChar[Lower] = true; \
- g_IsWildcardChar[Upper] = true; \
- }
-
-static unsigned GetAlphaSize(ALPHA Alpha)
- {
- switch (Alpha)
- {
- case ALPHA_Amino:
- return 20;
-
- case ALPHA_RNA:
- case ALPHA_DNA:
- return 4;
- }
- Quit("Invalid Alpha=%d", Alpha);
- return 0;
- }
-
-static void InitArrays()
- {
- memset(g_CharToLetter, 0xff, sizeof(g_CharToLetter));
- memset(g_CharToLetterEx, 0xff, sizeof(g_CharToLetterEx));
-
- memset(g_LetterToChar, '?', sizeof(g_LetterToChar));
- memset(g_LetterExToChar, '?', sizeof(g_LetterExToChar));
-
- memset(g_AlignChar, '?', sizeof(g_UnalignChar));
- memset(g_UnalignChar, '?', sizeof(g_UnalignChar));
-
- memset(g_IsWildcardChar, 0, sizeof(g_IsWildcardChar));
- }
-
-static void SetGapChar(char c)
- {
- unsigned char u = (unsigned char) c;
-
- g_CharToLetterEx[u] = AX_GAP;
- g_LetterExToChar[AX_GAP] = u;
- g_AlignChar[u] = u;
- g_UnalignChar[u] = u;
- }
-
-static void SetAlphaDNA()
- {
- Res('A', NX_A)
- Res('C', NX_C)
- Res('G', NX_G)
- Res('T', NX_T)
- Wild('M', NX_M)
- Wild('R', NX_R)
- Wild('W', NX_W)
- Wild('S', NX_S)
- Wild('Y', NX_Y)
- Wild('K', NX_K)
- Wild('V', NX_V)
- Wild('H', NX_H)
- Wild('D', NX_D)
- Wild('B', NX_B)
- Wild('X', NX_X)
- Wild('N', NX_N)
- }
-
-static void SetAlphaRNA()
- {
- Res('A', NX_A)
- Res('C', NX_C)
- Res('G', NX_G)
- Res('U', NX_U)
- Res('T', NX_T)
- Wild('M', NX_M)
- Wild('R', NX_R)
- Wild('W', NX_W)
- Wild('S', NX_S)
- Wild('Y', NX_Y)
- Wild('K', NX_K)
- Wild('V', NX_V)
- Wild('H', NX_H)
- Wild('D', NX_D)
- Wild('B', NX_B)
- Wild('X', NX_X)
- Wild('N', NX_N)
- }
-
-static void SetAlphaAmino()
- {
- Res('A', AX_A)
- Res('C', AX_C)
- Res('D', AX_D)
- Res('E', AX_E)
- Res('F', AX_F)
- Res('G', AX_G)
- Res('H', AX_H)
- Res('I', AX_I)
- Res('K', AX_K)
- Res('L', AX_L)
- Res('M', AX_M)
- Res('N', AX_N)
- Res('P', AX_P)
- Res('Q', AX_Q)
- Res('R', AX_R)
- Res('S', AX_S)
- Res('T', AX_T)
- Res('V', AX_V)
- Res('W', AX_W)
- Res('Y', AX_Y)
-
- Wild('B', AX_B)
- Wild('X', AX_X)
- Wild('Z', AX_Z)
- }
-
-void SetAlpha(ALPHA Alpha)
- {
- InitArrays();
-
- SetGapChar('.');
- SetGapChar('-');
-
- switch (Alpha)
- {
- case ALPHA_Amino:
- SetAlphaAmino();
- break;
-
- case ALPHA_DNA:
- SetAlphaDNA();
-
- case ALPHA_RNA:
- SetAlphaRNA();
- break;
-
- default:
- Quit("Invalid Alpha=%d", Alpha);
- }
-
- g_AlphaSize = GetAlphaSize(Alpha);
- g_Alpha = Alpha;
-
- if (g_bVerbose)
- Log("Alphabet %s\n", ALPHAToStr(g_Alpha));
- }
-
-char GetWildcardChar()
- {
- switch (g_Alpha)
- {
- case ALPHA_Amino:
- return 'X';
-
- case ALPHA_DNA:
- case ALPHA_RNA:
- return 'N';
-
- default:
- Quit("Invalid Alpha=%d", g_Alpha);
- }
- return '?';
- }
-
-bool IsNucleo(char c)
- {
- return strchr("ACGTURYNacgturyn", c) != 0;
- }
-
-bool IsDNA(char c)
- {
- return strchr("AGCTNagctn", c) != 0;
- }
-
-bool IsRNA(char c)
- {
- return strchr("AGCUNagcun", c) != 0;
- }
-
-static char InvalidLetters[256];
-static int InvalidLetterCount = 0;
-
-void ClearInvalidLetterWarning()
- {
- memset(InvalidLetters, 0, 256);
- }
-
-void InvalidLetterWarning(char c, char w)
- {
- InvalidLetters[(unsigned char) c] = 1;
- ++InvalidLetterCount;
- }
-
-void ReportInvalidLetters()
- {
- if (0 == InvalidLetterCount)
- return;
-
- char Str[257];
- memset(Str, 0, 257);
-
- int n = 0;
- for (int i = 0; i < 256; ++i)
- {
- if (InvalidLetters[i])
- Str[n++] = (char) i;
- }
- Warning("Assuming %s (see -seqtype option), invalid letters found: %s",
- ALPHAToStr(g_Alpha), Str);
- }
Deleted: trunk/packages/muscle/trunk/alpha.h
===================================================================
--- trunk/packages/muscle/trunk/alpha.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/alpha.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,106 +0,0 @@
-#ifndef alpha_h
-#define alpha_h
-
-bool StrHasAmino(const char *Str);
-bool StrHasGap(const char *Str);
-void ClearInvalidLetterWarning();
-void InvalidLetterWarning(char c, char w);
-void ReportInvalidLetters();
-
-extern unsigned g_CharToLetter[];
-extern unsigned g_CharToLetterEx[];
-
-extern char g_LetterToChar[];
-extern char g_LetterExToChar[];
-
-extern char g_UnalignChar[];
-extern char g_AlignChar[];
-
-extern bool g_IsWildcardChar[];
-extern bool g_IsResidueChar[];
-
-#define CharToLetter(c) (g_CharToLetter[(unsigned char) (c)])
-#define CharToLetterEx(c) (g_CharToLetterEx[(unsigned char) (c)])
-
-#define LetterToChar(u) (g_LetterToChar[u])
-#define LetterExToChar(u) (g_LetterExToChar[u])
-
-#define IsResidueChar(c) (g_IsResidueChar[(unsigned char) (c)])
-#define IsGapChar(c) ('-' == (c) || '.' == (c))
-#define IsWildcardChar(c) (g_IsWildcardChar[(unsigned char) (c)])
-
-#define AlignChar(c) (g_AlignChar[(unsigned char) (c)])
-#define UnalignChar(c) (g_UnalignChar[(unsigned char) (c)])
-
-// AX=Amino alphabet with eXtensions (B, Z and X)
-enum AX
- {
- AX_A,
- AX_C,
- AX_D,
- AX_E,
- AX_F,
- AX_G,
- AX_H,
- AX_I,
- AX_K,
- AX_L,
- AX_M,
- AX_N,
- AX_P,
- AX_Q,
- AX_R,
- AX_S,
- AX_T,
- AX_V,
- AX_W,
- AX_Y,
-
- AX_X, // Any
-
- AX_B, // D or N
- AX_Z, // E or Q
-
- AX_GAP,
- };
-const unsigned AX_COUNT = AX_GAP + 1;
-
-// NX=Nucleotide alphabet with extensions
-enum NX
- {
- NX_A,
- NX_C,
- NX_G,
- NX_T,
- NX_U = NX_T,
-
- NX_M, // AC
- NX_R, // AG
- NX_W, // AT
- NX_S, // CG
- NX_Y, // CT
- NX_K, // GT
- NX_V, // ACG
- NX_H, // ACT
- NX_D, // AGT
- NX_B, // CGT
- NX_X, // GATC
- NX_N, // GATC
- NX_GAP
- };
-const unsigned NX_COUNT = NX_GAP + 1;
-
-const unsigned MAX_ALPHA = 20;
-const unsigned MAX_ALPHA_EX = AX_COUNT;
-const unsigned MAX_CHAR = 256;
-
-extern ALPHA g_Alpha;
-extern unsigned g_AlphaSize;
-
-void SetAlpha(ALPHA Alpha);
-char GetWildcardChar();
-bool IsNucleo(char c);
-bool IsDNA(char c);
-bool IsRNA(char c);
-
-#endif // alpha_h
Deleted: trunk/packages/muscle/trunk/anchors.cpp
===================================================================
--- trunk/packages/muscle/trunk/anchors.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/anchors.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,218 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "objscore.h"
-
-#define TRACE 0
-
-static void WindowSmooth(const SCORE Score[], unsigned uCount, unsigned uWindowLength,
- SCORE SmoothScore[], double dCeil)
- {
-#define Ceil(x) ((SCORE) ((x) > dCeil ? dCeil : (x)))
-
- if (1 != uWindowLength%2)
- Quit("WindowSmooth, length=%u", uWindowLength);
-
- if (uCount <= uWindowLength)
- {
- for (unsigned i = 0; i < uCount; ++i)
- SmoothScore[i] = 0;
- return;
- }
-
- const unsigned w2 = uWindowLength/2;
- for (unsigned i = 0; i < w2; ++i)
- {
- SmoothScore[i] = 0;
- SmoothScore[uCount - i - 1] = 0;
- }
-
- SCORE scoreWindowTotal = 0;
- for (unsigned i = 0; i < uWindowLength; ++i)
- {
- scoreWindowTotal += Ceil(Score[i]);
- }
-
- for (unsigned i = w2; ; ++i)
- {
- SmoothScore[i] = scoreWindowTotal/uWindowLength;
- if (i == uCount - w2 - 1)
- break;
-
- scoreWindowTotal -= Ceil(Score[i - w2]);
- scoreWindowTotal += Ceil(Score[i + w2 + 1]);
- }
-#undef Ceil
- }
-
-// Find columns that score above the given threshold.
-// A range of scores is defined between the average
-// and the maximum. The threshold is a fraction 0.0 .. 1.0
-// within that range, where 0.0 is the average score
-// and 1.0 is the maximum score.
-// "Grade" is by analogy with grading on a curve.
-static void FindBestColsGrade(const SCORE Score[], unsigned uCount,
- double dThreshold, unsigned BestCols[], unsigned *ptruBestColCount)
- {
- SCORE scoreTotal = 0;
- for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
- scoreTotal += Score[uIndex];
- const SCORE scoreAvg = scoreTotal / uCount;
-
- SCORE scoreMax = MINUS_INFINITY;
- for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
- if (Score[uIndex] > scoreMax)
- scoreMax = Score[uIndex];
-
- unsigned uBestColCount = 0;
- for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
- {
- const SCORE s = Score[uIndex];
- const double dHeight = (s - scoreAvg)/(scoreMax - scoreAvg);
- if (dHeight >= dThreshold)
- {
- BestCols[uBestColCount] = uIndex;
- ++uBestColCount;
- }
- }
- *ptruBestColCount = uBestColCount;
- }
-
-// Best col only if all following criteria satisfied:
-// (1) Score >= min
-// (2) Smoothed score >= min
-// (3) No gaps.
-static void FindBestColsCombo(const MSA &msa, const SCORE Score[],
- const SCORE SmoothScore[], double dMinScore, double dMinSmoothScore,
- unsigned BestCols[], unsigned *ptruBestColCount)
- {
- const unsigned uColCount = msa.GetColCount();
-
- unsigned uBestColCount = 0;
- for (unsigned uIndex = 0; uIndex < uColCount; ++uIndex)
- {
- if (Score[uIndex] < dMinScore)
- continue;
- if (SmoothScore[uIndex] < dMinSmoothScore)
- continue;
- if (msa.ColumnHasGap(uIndex))
- continue;
- BestCols[uBestColCount] = uIndex;
- ++uBestColCount;
- }
- *ptruBestColCount = uBestColCount;
- }
-
-static void ListBestCols(const MSA &msa, const SCORE Score[], const SCORE SmoothScore[],
- unsigned BestCols[], unsigned uBestColCount)
- {
- const unsigned uColCount = msa.GetColCount();
- const unsigned uSeqCount = msa.GetSeqCount();
-
- Log("Col ");
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- Log("%u", uSeqIndex%10);
- Log(" ");
-
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- Log("%3u ", uColIndex);
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- Log("%c", msa.GetChar(uSeqIndex, uColIndex));
-
- Log(" %10.3f", Score[uColIndex]);
- Log(" %10.3f", SmoothScore[uColIndex]);
-
- for (unsigned i = 0; i < uBestColCount; ++i)
- if (BestCols[i] == uColIndex)
- Log(" <-- Best");
- Log("\n");
- }
- }
-
-// If two best columns are found within a window, choose
-// the highest-scoring. If more than two, choose the one
-// closest to the center of the window.
-static void MergeBestCols(const SCORE Scores[], const unsigned BestCols[],
- unsigned uBestColCount, unsigned uWindowLength, unsigned AnchorCols[],
- unsigned *ptruAnchorColCount)
- {
- unsigned uAnchorColCount = 0;
- for (unsigned n = 0; n < uBestColCount; /* update inside loop */)
- {
- unsigned uBestColIndex = BestCols[n];
- unsigned uCountWithinWindow = 0;
- for (unsigned i = n + 1; i < uBestColCount; ++i)
- {
- unsigned uBestColIndex2 = BestCols[i];
- if (uBestColIndex2 - uBestColIndex >= uWindowLength)
- break;
- ++uCountWithinWindow;
- }
- unsigned uAnchorCol = uBestColIndex;
- if (1 == uCountWithinWindow)
- {
- unsigned uBestColIndex2 = BestCols[n+1];
- if (Scores[uBestColIndex] > Scores[uBestColIndex2])
- uAnchorCol = uBestColIndex;
- else
- uAnchorCol = uBestColIndex2;
- }
- else if (uCountWithinWindow > 1)
- {
- unsigned uWindowCenter = uBestColIndex + uWindowLength/2;
- int iClosestDist = uWindowLength;
- unsigned uClosestCol = uBestColIndex;
- for (unsigned i = n + 1; i < n + uCountWithinWindow; ++i)
- {
- unsigned uColIndex = BestCols[i];
- int iDist = uColIndex - uBestColIndex;
- if (iDist < 0)
- iDist = -iDist;
- if (iDist < iClosestDist)
- {
- uClosestCol = uColIndex;
- iClosestDist = iDist;
- }
- }
- uAnchorCol = uClosestCol;
- }
- AnchorCols[uAnchorColCount] = uAnchorCol;
- ++uAnchorColCount;
- n += uCountWithinWindow + 1;
- }
- *ptruAnchorColCount = uAnchorColCount;
- }
-
-void FindAnchorCols(const MSA &msa, unsigned AnchorCols[],
- unsigned *ptruAnchorColCount)
- {
- const unsigned uColCount = msa.GetColCount();
- if (uColCount < 16)
- {
- *ptruAnchorColCount = 0;
- return;
- }
-
- SCORE *MatchScore = new SCORE[uColCount];
- SCORE *SmoothScore = new SCORE[uColCount];
- unsigned *BestCols = new unsigned[uColCount];
-
- GetLetterScores(msa, MatchScore);
- WindowSmooth(MatchScore, uColCount, g_uSmoothWindowLength, SmoothScore,
- g_dSmoothScoreCeil);
-
- unsigned uBestColCount;
- FindBestColsCombo(msa, MatchScore, SmoothScore, g_dMinBestColScore, g_dMinSmoothScore,
- BestCols, &uBestColCount);
-
-#if TRACE
- ListBestCols(msa, MatchScore, SmoothScore, BestCols, uBestColCount);
-#endif
-
- MergeBestCols(MatchScore, BestCols, uBestColCount, g_uAnchorSpacing, AnchorCols,
- ptruAnchorColCount);
-
- delete[] MatchScore;
- delete[] SmoothScore;
- delete[] BestCols;
- }
Deleted: trunk/packages/muscle/trunk/bittraceback.cpp
===================================================================
--- trunk/packages/muscle/trunk/bittraceback.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/bittraceback.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,206 +0,0 @@
-#include "muscle.h"
-#include "pwpath.h"
-
-#define TRACE 0
-
-static char XlatEdgeType(char c)
- {
- if ('E' == c)
- return 'D';
- if ('J' == c)
- return 'I';
- return c;
- }
-
-static const char *BitsToStr(char Bits)
- {
- static char Str[] = "xM xD xI";
-
- switch (Bits & BIT_xM)
- {
- case BIT_MM:
- Str[0] = 'M';
- break;
- case BIT_DM:
- Str[0] = 'D';
- break;
- case BIT_IM:
- Str[0] = 'I';
- break;
- }
-
- switch (Bits & BIT_xD)
- {
- case BIT_MD:
- Str[3] = 'M';
- break;
- case BIT_DD:
- Str[3] = 'D';
- break;
- }
-
- switch (Bits & BIT_xI)
- {
- case BIT_MI:
- Str[6] = 'M';
- break;
- case BIT_II:
- Str[6] = 'I';
- break;
- }
-
- return Str;
- }
-
-static inline char XChar(char Bits, char cType)
- {
- switch (cType)
- {
- case 'M':
- {
- switch (Bits & BIT_xM)
- {
- case BIT_MM:
- return 'M';
- case BIT_DM:
- return 'D';
- case BIT_IM:
- return 'I';
-#if DOUBLE_AFFINE
- case BIT_EM:
- return 'E';
- case BIT_JM:
- return 'J';
-#endif
- }
- Quit("Huh!?");
- return '?';
- }
- case 'D':
- {
- switch (Bits & BIT_xD)
- {
- case BIT_MD:
- return 'M';
- case BIT_DD:
- return 'D';
- }
- Quit("Huh!?");
- return '?';
- }
- case 'I':
- {
- switch (Bits & BIT_xI)
- {
- case BIT_MI:
- return 'M';
- case BIT_II:
- return 'I';
- }
- Quit("Huh!?");
- return '?';
- }
-#if DOUBLE_AFFINE
- case 'E':
- {
- switch (Bits & BIT_xE)
- {
- case BIT_ME:
- return 'M';
- case BIT_EE:
- return 'E';
- }
- Quit("Huh!?");
- return '?';
- }
- case 'J':
- {
- switch (Bits & BIT_xJ)
- {
- case BIT_MJ:
- return 'M';
- case BIT_JJ:
- return 'J';
- }
- Quit("Huh!?");
- return '?';
- }
-#endif
- default:
- Quit("Huh?");
- return '?';
- }
- }
-
-void BitTraceBack(char **TraceBack, unsigned uLengthA, unsigned uLengthB,
- char LastEdge, PWPath &Path)
- {
-#if TRACE
- Log("BitTraceBack\n");
-#endif
- Path.Clear();
-
- PWEdge Edge;
- Edge.uPrefixLengthA = uLengthA;
- Edge.uPrefixLengthB = uLengthB;
- char Bits = TraceBack[uLengthA][uLengthB];
- Edge.cType = LastEdge;
- for (;;)
- {
-#if TRACE
- Log("Prepend %c%d.%d\n", Edge.cType, Edge.uPrefixLengthA, Edge.uPrefixLengthB);
-#endif
- char cSave = Edge.cType;
- Edge.cType = XlatEdgeType(cSave);
- Path.PrependEdge(Edge);
- Edge.cType = cSave;
-
- unsigned PLA = Edge.uPrefixLengthA;
- unsigned PLB = Edge.uPrefixLengthB;
- char Bits = TraceBack[PLA][PLB];
- char NextEdgeType = XChar(Bits, Edge.cType);
-#if TRACE
- Log("XChar(%s, %c) = %c\n", BitsToStr(Bits), Edge.cType, NextEdgeType);
-#endif
- switch (Edge.cType)
- {
- case 'M':
- {
- if (Edge.uPrefixLengthA == 0)
- Quit("BitTraceBack MA=0");
- if (Edge.uPrefixLengthB == 0)
- Quit("BitTraceBack MA=0");
- --(Edge.uPrefixLengthA);
- --(Edge.uPrefixLengthB);
- break;
- }
- case 'D':
- case 'E':
- {
- if (Edge.uPrefixLengthA == 0)
- Quit("BitTraceBack DA=0");
- --(Edge.uPrefixLengthA);
- break;
- }
- case 'I':
- case 'J':
- {
- if (Edge.uPrefixLengthB == 0)
- Quit("BitTraceBack IB=0");
- --(Edge.uPrefixLengthB);
- break;
- }
- default:
- Quit("BitTraceBack: Invalid edge %c", Edge);
- }
-
- if (0 == Edge.uPrefixLengthA && 0 == Edge.uPrefixLengthB)
- break;
-
- Edge.cType = NextEdgeType;
- }
-
-#if TRACE
- Path.LogMe();
-#endif
- }
Deleted: trunk/packages/muscle/trunk/blosumla.cpp
===================================================================
--- trunk/packages/muscle/trunk/blosumla.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/blosumla.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,118 +0,0 @@
-#include "muscle.h"
-
-#define GAPVAL 0.3
-#define GAPGAPVAL 5.0
-
-// Blosum62 log-average factor matrix
-static float Blosum62LA[20][20] =
- {
-#define v(x) ((float) x)
-#define S_ROW(n, c, A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
- { v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), v(M), v(N), v(P), v(Q), \
- v(R), v(S), v(T), v(V), v(W), v(Y) },
-
-// Blosum62 log average matrix
-// A C D E F
-// G H I K L
-// M N P Q R
-// S T V W Y
-S_ROW( 0, 'A', 3.9029401, 0.8679881, 0.5446049, 0.7412640, 0.4648942,
- 1.0568696, 0.5693654, 0.6324813, 0.7753898, 0.6019460,
- 0.7231498, 0.5883077, 0.7541214, 0.7568035, 0.6126988,
- 1.4721037, 0.9844022, 0.9364584, 0.4165484, 0.5426125)
-
-S_ROW( 1, 'C', 0.8679881, 19.5765802, 0.3014542, 0.2859347, 0.4389910,
- 0.4203886, 0.3550472, 0.6534589, 0.3491296, 0.6422760,
- 0.6113537, 0.3978026, 0.3795628, 0.3657796, 0.3089379,
- 0.7384148, 0.7405530, 0.7558448, 0.4499807, 0.4342013)
-
-S_ROW( 2, 'D', 0.5446049, 0.3014542, 7.3979253, 1.6878109, 0.2989696,
- 0.6343015, 0.6785593, 0.3390155, 0.7840905, 0.2866128,
- 0.3464547, 1.5538520, 0.5987177, 0.8970811, 0.5732000,
- 0.9135051, 0.6947898, 0.3365004, 0.2321050, 0.3456829)
-
-S_ROW( 3, 'E', 0.7412640, 0.2859347, 1.6878109, 5.4695276, 0.3307441,
- 0.4812675, 0.9600400, 0.3305223, 1.3082782, 0.3728734,
- 0.5003421, 0.9112983, 0.6792027, 1.9017376, 0.9607983,
- 0.9503570, 0.7414260, 0.4289431, 0.3743021, 0.4964664)
-
-S_ROW( 4, 'F', 0.4648942, 0.4389910, 0.2989696, 0.3307441, 8.1287983,
- 0.3406407, 0.6519893, 0.9457698, 0.3440433, 1.1545978,
- 1.0043715, 0.3542882, 0.2874440, 0.3339729, 0.3807263,
- 0.4399736, 0.4816930, 0.7450894, 1.3743775, 2.7693817)
-
-S_ROW( 5, 'G', 1.0568696, 0.4203886, 0.6343015, 0.4812675, 0.3406407,
- 6.8763075, 0.4929663, 0.2750096, 0.5888716, 0.2845039,
- 0.3954865, 0.8637114, 0.4773858, 0.5386498, 0.4499840,
- 0.9035965, 0.5792712, 0.3369551, 0.4216898, 0.3487141)
-
-S_ROW( 6, 'H', 0.5693654, 0.3550472, 0.6785593, 0.9600400, 0.6519893,
- 0.4929663, 13.5060070, 0.3262878, 0.7788884, 0.3806759,
- 0.5841316, 1.2220028, 0.4728797, 1.1679835, 0.9170473,
- 0.7367319, 0.5575021, 0.3394474, 0.4440859, 1.7979036)
-
-S_ROW( 7, 'I', 0.6324813, 0.6534589, 0.3390155, 0.3305223, 0.9457698,
- 0.2750096, 0.3262878, 3.9979299, 0.3963730, 1.6944349,
- 1.4777449, 0.3279345, 0.3846629, 0.3829375, 0.3547509,
- 0.4431634, 0.7798163, 2.4175121, 0.4088732, 0.6303898)
-
-S_ROW( 8, 'K', 0.7753898, 0.3491296, 0.7840905, 1.3082782, 0.3440433,
- 0.5888716, 0.7788884, 0.3963730, 4.7643359, 0.4282702,
- 0.6253033, 0.9398419, 0.7037741, 1.5543233, 2.0768092,
- 0.9319192, 0.7929060, 0.4565429, 0.3589319, 0.5321784)
-
-S_ROW( 9, 'L', 0.6019460, 0.6422760, 0.2866128, 0.3728734, 1.1545978,
- 0.2845039, 0.3806759, 1.6944349, 0.4282702, 3.7966214,
- 1.9942957, 0.3100430, 0.3711219, 0.4773261, 0.4739194,
- 0.4288939, 0.6603292, 1.3142355, 0.5680359, 0.6920589)
-
-S_ROW(10, 'M', 0.7231498, 0.6113537, 0.3464547, 0.5003421, 1.0043715,
- 0.3954865, 0.5841316, 1.4777449, 0.6253033, 1.9942957,
- 6.4814549, 0.4745299, 0.4238960, 0.8642486, 0.6226249,
- 0.5985578, 0.7938018, 1.2689365, 0.6103022, 0.7083636)
-
-S_ROW(11, 'N', 0.5883077, 0.3978026, 1.5538520, 0.9112983, 0.3542882,
- 0.8637114, 1.2220028, 0.3279345, 0.9398419, 0.3100430,
- 0.4745299, 7.0940964, 0.4999337, 1.0005835, 0.8586298,
- 1.2315289, 0.9841525, 0.3690340, 0.2777841, 0.4860309)
-
-S_ROW(12, 'P', 0.7541214, 0.3795628, 0.5987177, 0.6792027, 0.2874440,
- 0.4773858, 0.4728797, 0.3846629, 0.7037741, 0.3711219,
- 0.4238960, 0.4999337, 12.8375452, 0.6412803, 0.4815348,
- 0.7555033, 0.6888962, 0.4430825, 0.2818321, 0.3635216)
-
-S_ROW(13, 'Q', 0.7568035, 0.3657796, 0.8970811, 1.9017376, 0.3339729,
- 0.5386498, 1.1679835, 0.3829375, 1.5543233, 0.4773261,
- 0.8642486, 1.0005835, 0.6412803, 6.2444210, 1.4057958,
- 0.9655559, 0.7913219, 0.4667781, 0.5093584, 0.6110951)
-
-S_ROW(14, 'R', 0.6126988, 0.3089379, 0.5732000, 0.9607983, 0.3807263,
- 0.4499840, 0.9170473, 0.3547509, 2.0768092, 0.4739194,
- 0.6226249, 0.8586298, 0.4815348, 1.4057958, 6.6655769,
- 0.7671661, 0.6777544, 0.4200721, 0.3951049, 0.5559652)
-
-S_ROW(15, 'S', 1.4721037, 0.7384148, 0.9135051, 0.9503570, 0.4399736,
- 0.9035965, 0.7367319, 0.4431634, 0.9319192, 0.4288939,
- 0.5985578, 1.2315289, 0.7555033, 0.9655559, 0.7671661,
- 3.8428476, 1.6139205, 0.5652240, 0.3853031, 0.5575206)
-
-S_ROW(16, 'T', 0.9844022, 0.7405530, 0.6947898, 0.7414260, 0.4816930,
- 0.5792712, 0.5575021, 0.7798163, 0.7929060, 0.6603292,
- 0.7938018, 0.9841525, 0.6888962, 0.7913219, 0.6777544,
- 1.6139205, 4.8321048, 0.9809432, 0.4309317, 0.5731577)
-
-S_ROW(17, 'V', 0.9364584, 0.7558448, 0.3365004, 0.4289431, 0.7450894,
- 0.3369551, 0.3394474, 2.4175121, 0.4565429, 1.3142355,
- 1.2689365, 0.3690340, 0.4430825, 0.4667781, 0.4200721,
- 0.5652240, 0.9809432, 3.6921553, 0.3744576, 0.6580390)
-
-S_ROW(18, 'W', 0.4165484, 0.4499807, 0.2321050, 0.3743021, 1.3743775,
- 0.4216898, 0.4440859, 0.4088732, 0.3589319, 0.5680359,
- 0.6103022, 0.2777841, 0.2818321, 0.5093584, 0.3951049,
- 0.3853031, 0.4309317, 0.3744576, 38.1077830, 2.1098056)
-
-S_ROW(19, 'Y', 0.5426125, 0.4342013, 0.3456829, 0.4964664, 2.7693817,
- 0.3487141, 1.7979036, 0.6303898, 0.5321784, 0.6920589,
- 0.7083636, 0.4860309, 0.3635216, 0.6110951, 0.5559652,
- 0.5575206, 0.5731577, 0.6580390, 2.1098056, 9.8322054)
- };
Deleted: trunk/packages/muscle/trunk/clust.cpp
===================================================================
--- trunk/packages/muscle/trunk/clust.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/clust.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,666 +0,0 @@
-#include "muscle.h"
-#include "clust.h"
-#include "clustset.h"
-#include <stdio.h>
-
-#define TRACE 0
-
-Clust::Clust()
- {
- m_Nodes = 0;
- m_uNodeCount = 0;
- m_uLeafCount = 0;
- m_uClusterCount = 0;
- m_JoinStyle = JOIN_Undefined;
- m_dDist = 0;
- m_uLeafCount = 0;
- m_ptrSet = 0;
- }
-
-Clust::~Clust()
- {
- delete[] m_Nodes;
- delete[] m_dDist;
- delete[] m_ClusterIndexToNodeIndex;
- }
-
-void Clust::Create(ClustSet &Set, CLUSTER Method)
- {
- m_ptrSet = &Set;
-
- SetLeafCount(Set.GetLeafCount());
-
- switch (Method)
- {
- case CLUSTER_UPGMA:
- m_JoinStyle = JOIN_NearestNeighbor;
- m_CentroidStyle = LINKAGE_Avg;
- break;
-
- case CLUSTER_UPGMAMax:
- m_JoinStyle = JOIN_NearestNeighbor;
- m_CentroidStyle = LINKAGE_Max;
- break;
-
- case CLUSTER_UPGMAMin:
- m_JoinStyle = JOIN_NearestNeighbor;
- m_CentroidStyle = LINKAGE_Min;
- break;
-
- case CLUSTER_UPGMB:
- m_JoinStyle = JOIN_NearestNeighbor;
- m_CentroidStyle = LINKAGE_Biased;
- break;
-
- case CLUSTER_NeighborJoining:
- m_JoinStyle = JOIN_NeighborJoining;
- m_CentroidStyle = LINKAGE_NeighborJoining;
- break;
-
- default:
- Quit("Clust::Create, invalid method %d", Method);
- }
-
- if (m_uLeafCount <= 1)
- Quit("Clust::Create: no leaves");
-
- m_uNodeCount = 2*m_uLeafCount - 1;
- m_Nodes = new ClustNode[m_uNodeCount];
- m_ClusterIndexToNodeIndex = new unsigned[m_uLeafCount];
-
- m_ptrClusterList = 0;
- for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
- {
- ClustNode &Node = m_Nodes[uNodeIndex];
- Node.m_uIndex = uNodeIndex;
- if (uNodeIndex < m_uLeafCount)
- {
- Node.m_uSize = 1;
- Node.m_uLeafIndexes = new unsigned[1];
- Node.m_uLeafIndexes[0] = uNodeIndex;
- AddToClusterList(uNodeIndex);
- }
- else
- Node.m_uSize = 0;
- }
-
-// Compute initial distance matrix between leaves
- SetProgressDesc("Build dist matrix");
- unsigned uPairIndex = 0;
- const unsigned uPairCount = (m_uLeafCount*(m_uLeafCount - 1))/2;
- for (unsigned i = 0; i < m_uLeafCount; ++i)
- for (unsigned j = 0; j < i; ++j)
- {
- const float dDist = (float) m_ptrSet->ComputeDist(*this, i, j);
- SetDist(i, j, dDist);
- if (0 == uPairIndex%10000)
- Progress(uPairIndex, uPairCount);
- ++uPairIndex;
- }
- ProgressStepsDone();
-
-// Call CreateCluster once for each internal node in the tree
- SetProgressDesc("Build guide tree");
- m_uClusterCount = m_uLeafCount;
- const unsigned uInternalNodeCount = m_uNodeCount - m_uLeafCount;
- for (unsigned uNodeIndex = m_uLeafCount; uNodeIndex < m_uNodeCount; ++uNodeIndex)
- {
- unsigned i = uNodeIndex + 1 - m_uLeafCount;
- Progress(i, uInternalNodeCount);
- CreateCluster();
- }
- ProgressStepsDone();
- }
-
-void Clust::CreateCluster()
- {
- unsigned uLeftNodeIndex;
- unsigned uRightNodeIndex;
- float dLeftLength;
- float dRightLength;
- ChooseJoin(&uLeftNodeIndex, &uRightNodeIndex, &dLeftLength, &dRightLength);
-
- const unsigned uNewNodeIndex = m_uNodeCount - m_uClusterCount + 1;
-
- JoinNodes(uLeftNodeIndex, uRightNodeIndex, dLeftLength, dRightLength,
- uNewNodeIndex);
-
-#if TRACE
- Log("Merge New=%u L=%u R=%u Ld=%7.2g Rd=%7.2g\n",
- uNewNodeIndex, uLeftNodeIndex, uRightNodeIndex, dLeftLength, dRightLength);
-#endif
-
-// Compute distances to other clusters
- --m_uClusterCount;
- for (unsigned uNodeIndex = GetFirstCluster(); uNodeIndex != uInsane;
- uNodeIndex = GetNextCluster(uNodeIndex))
- {
- if (uNodeIndex == uLeftNodeIndex || uNodeIndex == uRightNodeIndex)
- continue;
-
- if (uNewNodeIndex == uNodeIndex)
- continue;
-
- const float dDist = ComputeDist(uNewNodeIndex, uNodeIndex);
- SetDist(uNewNodeIndex, uNodeIndex, dDist);
- }
-
- for (unsigned uNodeIndex = GetFirstCluster(); uNodeIndex != uInsane;
- uNodeIndex = GetNextCluster(uNodeIndex))
- {
- if (uNodeIndex == uLeftNodeIndex || uNodeIndex == uRightNodeIndex)
- continue;
-
- if (uNewNodeIndex == uNodeIndex)
- continue;
-
-#if REDLACK
- const float dMetric = ComputeMetric(uNewNodeIndex, uNodeIndex);
- InsertMetric(uNewNodeIndex, uNodeIndex, dMetric);
-#endif
- }
- }
-
-void Clust::ChooseJoin(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
- float *ptrdLeftLength, float *ptrdRightLength)
- {
- switch (m_JoinStyle)
- {
- case JOIN_NearestNeighbor:
- ChooseJoinNearestNeighbor(ptruLeftIndex, ptruRightIndex, ptrdLeftLength,
- ptrdRightLength);
- return;
- case JOIN_NeighborJoining:
- ChooseJoinNeighborJoining(ptruLeftIndex, ptruRightIndex, ptrdLeftLength,
- ptrdRightLength);
- return;
- }
- Quit("Clust::ChooseJoin, Invalid join style %u", m_JoinStyle);
- }
-
-void Clust::ChooseJoinNearestNeighbor(unsigned *ptruLeftIndex,
- unsigned *ptruRightIndex, float *ptrdLeftLength, float *ptrdRightLength)
- {
- const unsigned uClusterCount = GetClusterCount();
-
- unsigned uMinLeftNodeIndex;
- unsigned uMinRightNodeIndex;
- GetMinMetric(&uMinLeftNodeIndex, &uMinRightNodeIndex);
-
- float dMinDist = GetDist(uMinLeftNodeIndex, uMinRightNodeIndex);
-
- const float dLeftHeight = GetHeight(uMinLeftNodeIndex);
- const float dRightHeight = GetHeight(uMinRightNodeIndex);
-
- *ptruLeftIndex = uMinLeftNodeIndex;
- *ptruRightIndex = uMinRightNodeIndex;
- *ptrdLeftLength = dMinDist/2 - dLeftHeight;
- *ptrdRightLength = dMinDist/2 - dRightHeight;
- }
-
-void Clust::ChooseJoinNeighborJoining(unsigned *ptruLeftIndex,
- unsigned *ptruRightIndex, float *ptrdLeftLength, float *ptrdRightLength)
- {
- const unsigned uClusterCount = GetClusterCount();
-
- //unsigned uMinLeftNodeIndex = uInsane;
- //unsigned uMinRightNodeIndex = uInsane;
- //float dMinD = PLUS_INFINITY;
- //for (unsigned i = GetFirstCluster(); i != uInsane; i = GetNextCluster(i))
- // {
- // const float ri = Calc_r(i);
- // for (unsigned j = GetNextCluster(i); j != uInsane; j = GetNextCluster(j))
- // {
- // const float rj = Calc_r(j);
- // const float dij = GetDist(i, j);
- // const float Dij = dij - (ri + rj);
- // if (Dij < dMinD)
- // {
- // dMinD = Dij;
- // uMinLeftNodeIndex = i;
- // uMinRightNodeIndex = j;
- // }
- // }
- // }
-
- unsigned uMinLeftNodeIndex;
- unsigned uMinRightNodeIndex;
- GetMinMetric(&uMinLeftNodeIndex, &uMinRightNodeIndex);
-
- const float dDistLR = GetDist(uMinLeftNodeIndex, uMinRightNodeIndex);
- const float rL = Calc_r(uMinLeftNodeIndex);
- const float rR = Calc_r(uMinRightNodeIndex);
-
- const float dLeftLength = (dDistLR + rL - rR)/2;
- const float dRightLength = (dDistLR - rL + rR)/2;
-
- *ptruLeftIndex = uMinLeftNodeIndex;
- *ptruRightIndex = uMinRightNodeIndex;
- *ptrdLeftLength = dLeftLength;
- *ptrdRightLength = dRightLength;
- }
-
-void Clust::JoinNodes(unsigned uLeftIndex, unsigned uRightIndex, float dLeftLength,
- float dRightLength, unsigned uNodeIndex)
- {
- ClustNode &Parent = m_Nodes[uNodeIndex];
- ClustNode &Left = m_Nodes[uLeftIndex];
- ClustNode &Right = m_Nodes[uRightIndex];
-
- Left.m_dLength = dLeftLength;
- Right.m_dLength = dRightLength;
-
- Parent.m_ptrLeft = &Left;
- Parent.m_ptrRight = &Right;
-
- Left.m_ptrParent = &Parent;
- Right.m_ptrParent = &Parent;
-
- const unsigned uLeftSize = Left.m_uSize;
- const unsigned uRightSize = Right.m_uSize;
- const unsigned uParentSize = uLeftSize + uRightSize;
- Parent.m_uSize = uParentSize;
-
- assert(0 == Parent.m_uLeafIndexes);
- Parent.m_uLeafIndexes = new unsigned[uParentSize];
-
- const unsigned uLeftBytes = uLeftSize*sizeof(unsigned);
- const unsigned uRightBytes = uRightSize*sizeof(unsigned);
- memcpy(Parent.m_uLeafIndexes, Left.m_uLeafIndexes, uLeftBytes);
- memcpy(Parent.m_uLeafIndexes + uLeftSize, Right.m_uLeafIndexes, uRightBytes);
-
- DeleteFromClusterList(uLeftIndex);
- DeleteFromClusterList(uRightIndex);
- AddToClusterList(uNodeIndex);
- }
-
-float Clust::Calc_r(unsigned uNodeIndex) const
- {
- const unsigned uClusterCount = GetClusterCount();
- if (2 == uClusterCount)
- return 0;
-
- float dSum = 0;
- for (unsigned i = GetFirstCluster(); i != uInsane; i = GetNextCluster(i))
- {
- if (i == uNodeIndex)
- continue;
- dSum += GetDist(uNodeIndex, i);
- }
- return dSum/(uClusterCount - 2);
- }
-
-float Clust::ComputeDist(unsigned uNewNodeIndex, unsigned uNodeIndex)
- {
- switch (m_CentroidStyle)
- {
- case LINKAGE_Avg:
- return ComputeDistAverageLinkage(uNewNodeIndex, uNodeIndex);
-
- case LINKAGE_Min:
- return ComputeDistMinLinkage(uNewNodeIndex, uNodeIndex);
-
- case LINKAGE_Max:
- return ComputeDistMaxLinkage(uNewNodeIndex, uNodeIndex);
-
- case LINKAGE_Biased:
- return ComputeDistMAFFT(uNewNodeIndex, uNodeIndex);
-
- case LINKAGE_NeighborJoining:
- return ComputeDistNeighborJoining(uNewNodeIndex, uNodeIndex);
- }
- Quit("Clust::ComputeDist, invalid centroid style %u", m_CentroidStyle);
- return (float) g_dNAN;
- }
-
-float Clust::ComputeDistMinLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
- {
- const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
- const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
- const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
- const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
- return (dDistL < dDistR ? dDistL : dDistR);
- }
-
-float Clust::ComputeDistMaxLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
- {
- const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
- const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
- const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
- const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
- return (dDistL > dDistR ? dDistL : dDistR);
- }
-
-float Clust::ComputeDistAverageLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
- {
- const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
- const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
- const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
- const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
- return (dDistL + dDistR)/2;
- }
-
-float Clust::ComputeDistNeighborJoining(unsigned uNewNodeIndex, unsigned uNodeIndex)
- {
- const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
- const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
- const float dDistLR = GetDist(uLeftNodeIndex, uRightNodeIndex);
- const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
- const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
- const float dDist = (dDistL + dDistR - dDistLR)/2;
- return dDist;
- }
-
-// This is a mysterious variant of UPGMA reverse-engineered from MAFFT source.
-float Clust::ComputeDistMAFFT(unsigned uNewNodeIndex, unsigned uNodeIndex)
- {
- const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
- const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
-
- const float dDistLR = GetDist(uLeftNodeIndex, uRightNodeIndex);
- const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
- const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
- const float dMinDistLR = (dDistL < dDistR ? dDistL : dDistR);
- const float dSumDistLR = dDistL + dDistR;
- const float dDist = dMinDistLR*(1 - g_dSUEFF) + dSumDistLR*g_dSUEFF/2;
- return dDist;
- }
-
-unsigned Clust::GetClusterCount() const
- {
- return m_uClusterCount;
- }
-
-void Clust::LogMe() const
- {
- Log("Clust %u leaves, %u nodes, %u clusters.\n",
- m_uLeafCount, m_uNodeCount, m_uClusterCount);
-
- Log("Distance matrix\n");
- const unsigned uNodeCount = GetNodeCount();
- Log(" ");
- for (unsigned i = 0; i < uNodeCount - 1; ++i)
- Log(" %7u", i);
- Log("\n");
-
- Log(" ");
- for (unsigned i = 0; i < uNodeCount - 1; ++i)
- Log(" ------");
- Log("\n");
-
- for (unsigned i = 0; i < uNodeCount - 1; ++i)
- {
- Log("%4u: ", i);
- for (unsigned j = 0; j < i; ++j)
- Log(" %7.2g", GetDist(i, j));
- Log("\n");
- }
-
- Log("\n");
- Log("Node Size Prnt Left Rght Length Name\n");
- Log("---- ---- ---- ---- ---- ------ ----\n");
- for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
- {
- const ClustNode &Node = m_Nodes[uNodeIndex];
- Log("%4u %4u", uNodeIndex, Node.m_uSize);
- if (0 != Node.m_ptrParent)
- Log(" %4u", Node.m_ptrParent->m_uIndex);
- else
- Log(" ");
-
- if (0 != Node.m_ptrLeft)
- Log(" %4u", Node.m_ptrLeft->m_uIndex);
- else
- Log(" ");
-
- if (0 != Node.m_ptrRight)
- Log(" %4u", Node.m_ptrRight->m_uIndex);
- else
- Log(" ");
-
- if (uNodeIndex != m_uNodeCount - 1)
- Log(" %7.3g", Node.m_dLength);
- if (IsLeaf(uNodeIndex))
- {
- const char *ptrName = GetNodeName(uNodeIndex);
- if (0 != ptrName)
- Log(" %s", ptrName);
- }
- if (GetRootNodeIndex() == uNodeIndex)
- Log(" [ROOT]");
- Log("\n");
- }
- }
-
-const ClustNode &Clust::GetNode(unsigned uNodeIndex) const
- {
- if (uNodeIndex >= m_uNodeCount)
- Quit("ClustNode::GetNode(%u) %u", uNodeIndex, m_uNodeCount);
- return m_Nodes[uNodeIndex];
- }
-
-bool Clust::IsLeaf(unsigned uNodeIndex) const
- {
- return uNodeIndex < m_uLeafCount;
- }
-
-unsigned Clust::GetClusterSize(unsigned uNodeIndex) const
- {
- const ClustNode &Node = GetNode(uNodeIndex);
- return Node.m_uSize;
- }
-
-unsigned Clust::GetLeftIndex(unsigned uNodeIndex) const
- {
- const ClustNode &Node = GetNode(uNodeIndex);
- if (0 == Node.m_ptrLeft)
- Quit("Clust::GetLeftIndex: leaf");
- return Node.m_ptrLeft->m_uIndex;
- }
-
-unsigned Clust::GetRightIndex(unsigned uNodeIndex) const
- {
- const ClustNode &Node = GetNode(uNodeIndex);
- if (0 == Node.m_ptrRight)
- Quit("Clust::GetRightIndex: leaf");
- return Node.m_ptrRight->m_uIndex;
- }
-
-float Clust::GetLength(unsigned uNodeIndex) const
- {
- const ClustNode &Node = GetNode(uNodeIndex);
- return Node.m_dLength;
- }
-
-void Clust::SetLeafCount(unsigned uLeafCount)
- {
- if (uLeafCount <= 1)
- Quit("Clust::SetLeafCount(%u)", uLeafCount);
-
- m_uLeafCount = uLeafCount;
- const unsigned uNodeCount = GetNodeCount();
-
-// Triangular matrix size excluding diagonal (all zeros in our case).
- m_uTriangularMatrixSize = (uNodeCount*(uNodeCount - 1))/2;
- m_dDist = new float[m_uTriangularMatrixSize];
- }
-
-unsigned Clust::GetLeafCount() const
- {
- return m_uLeafCount;
- }
-
-unsigned Clust::VectorIndex(unsigned uIndex1, unsigned uIndex2) const
- {
- const unsigned uNodeCount = GetNodeCount();
- if (uIndex1 >= uNodeCount || uIndex2 >= uNodeCount)
- Quit("DistVectorIndex(%u,%u) %u", uIndex1, uIndex2, uNodeCount);
- unsigned v;
- if (uIndex1 >= uIndex2)
- v = uIndex2 + (uIndex1*(uIndex1 - 1))/2;
- else
- v = uIndex1 + (uIndex2*(uIndex2 - 1))/2;
- assert(v < m_uTriangularMatrixSize);
- return v;
- }
-
-float Clust::GetDist(unsigned uIndex1, unsigned uIndex2) const
- {
- unsigned v = VectorIndex(uIndex1, uIndex2);
- return m_dDist[v];
- }
-
-void Clust::SetDist(unsigned uIndex1, unsigned uIndex2, float dDist)
- {
- unsigned v = VectorIndex(uIndex1, uIndex2);
- m_dDist[v] = dDist;
- }
-
-float Clust::GetHeight(unsigned uNodeIndex) const
- {
- if (IsLeaf(uNodeIndex))
- return 0;
-
- const unsigned uLeftIndex = GetLeftIndex(uNodeIndex);
- const unsigned uRightIndex = GetRightIndex(uNodeIndex);
- const float dLeftLength = GetLength(uLeftIndex);
- const float dRightLength = GetLength(uRightIndex);
- const float dLeftHeight = dLeftLength + GetHeight(uLeftIndex);
- const float dRightHeight = dRightLength + GetHeight(uRightIndex);
- return (dLeftHeight + dRightHeight)/2;
- }
-
-const char *Clust::GetNodeName(unsigned uNodeIndex) const
- {
- if (!IsLeaf(uNodeIndex))
- Quit("Clust::GetNodeName, is not leaf");
- return m_ptrSet->GetLeafName(uNodeIndex);
- }
-
-unsigned Clust::GetNodeId(unsigned uNodeIndex) const
- {
- if (uNodeIndex >= GetLeafCount())
- return 0;
- return m_ptrSet->GetLeafId(uNodeIndex);
- }
-
-unsigned Clust::GetLeaf(unsigned uNodeIndex, unsigned uLeafIndex) const
- {
- const ClustNode &Node = GetNode(uNodeIndex);
- const unsigned uLeafCount = Node.m_uSize;
- if (uLeafIndex >= uLeafCount)
- Quit("Clust::GetLeaf, invalid index");
- const unsigned uIndex = Node.m_uLeafIndexes[uLeafIndex];
- if (uIndex >= m_uNodeCount)
- Quit("Clust::GetLeaf, index out of range");
- return uIndex;
- }
-
-unsigned Clust::GetFirstCluster() const
- {
- if (0 == m_ptrClusterList)
- return uInsane;
- return m_ptrClusterList->m_uIndex;
- }
-
-unsigned Clust::GetNextCluster(unsigned uIndex) const
- {
- ClustNode *ptrNode = &m_Nodes[uIndex];
- if (0 == ptrNode->m_ptrNextCluster)
- return uInsane;
- return ptrNode->m_ptrNextCluster->m_uIndex;
- }
-
-void Clust::DeleteFromClusterList(unsigned uNodeIndex)
- {
- assert(uNodeIndex < m_uNodeCount);
- ClustNode *ptrNode = &m_Nodes[uNodeIndex];
- ClustNode *ptrPrev = ptrNode->m_ptrPrevCluster;
- ClustNode *ptrNext = ptrNode->m_ptrNextCluster;
-
- if (0 != ptrNext)
- ptrNext->m_ptrPrevCluster = ptrPrev;
- if (0 == ptrPrev)
- {
- assert(m_ptrClusterList == ptrNode);
- m_ptrClusterList = ptrNext;
- }
- else
- ptrPrev->m_ptrNextCluster = ptrNext;
-
- ptrNode->m_ptrNextCluster = 0;
- ptrNode->m_ptrPrevCluster = 0;
- }
-
-void Clust::AddToClusterList(unsigned uNodeIndex)
- {
- assert(uNodeIndex < m_uNodeCount);
- ClustNode *ptrNode = &m_Nodes[uNodeIndex];
-
- if (0 != m_ptrClusterList)
- m_ptrClusterList->m_ptrPrevCluster = ptrNode;
-
- ptrNode->m_ptrNextCluster = m_ptrClusterList;
- ptrNode->m_ptrPrevCluster = 0;
-
- m_ptrClusterList = ptrNode;
- }
-
-float Clust::ComputeMetric(unsigned uIndex1, unsigned uIndex2) const
- {
- switch (m_JoinStyle)
- {
- case JOIN_NearestNeighbor:
- return ComputeMetricNearestNeighbor(uIndex1, uIndex2);
-
- case JOIN_NeighborJoining:
- return ComputeMetricNeighborJoining(uIndex1, uIndex2);
- }
- Quit("Clust::ComputeMetric");
- return 0;
- }
-
-float Clust::ComputeMetricNeighborJoining(unsigned i, unsigned j) const
- {
- float ri = Calc_r(i);
- float rj = Calc_r(j);
- float dij = GetDist(i, j);
- float dMetric = dij - (ri + rj);
- return (float) dMetric;
- }
-
-float Clust::ComputeMetricNearestNeighbor(unsigned i, unsigned j) const
- {
- return (float) GetDist(i, j);
- }
-
-float Clust::GetMinMetricBruteForce(unsigned *ptruIndex1, unsigned *ptruIndex2) const
- {
- unsigned uMinLeftNodeIndex = uInsane;
- unsigned uMinRightNodeIndex = uInsane;
- float dMinMetric = PLUS_INFINITY;
- for (unsigned uLeftNodeIndex = GetFirstCluster(); uLeftNodeIndex != uInsane;
- uLeftNodeIndex = GetNextCluster(uLeftNodeIndex))
- {
- for (unsigned uRightNodeIndex = GetNextCluster(uLeftNodeIndex);
- uRightNodeIndex != uInsane;
- uRightNodeIndex = GetNextCluster(uRightNodeIndex))
- {
- float dMetric = ComputeMetric(uLeftNodeIndex, uRightNodeIndex);
- if (dMetric < dMinMetric)
- {
- dMinMetric = dMetric;
- uMinLeftNodeIndex = uLeftNodeIndex;
- uMinRightNodeIndex = uRightNodeIndex;
- }
- }
- }
- *ptruIndex1 = uMinLeftNodeIndex;
- *ptruIndex2 = uMinRightNodeIndex;
- return dMinMetric;
- }
-
-float Clust::GetMinMetric(unsigned *ptruIndex1, unsigned *ptruIndex2) const
- {
- return GetMinMetricBruteForce(ptruIndex1, ptruIndex2);
- }
Deleted: trunk/packages/muscle/trunk/clust.h
===================================================================
--- trunk/packages/muscle/trunk/clust.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/clust.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,148 +0,0 @@
-#ifndef Clust_h
-#define Clust_h
-
-class Clust;
-class ClustNode;
-class ClustSet;
-class Phylip;
-class SortedNode;
-
-const unsigned RB_NIL = ((unsigned) 0xfff0);
-
-class ClustNode
- {
-public:
- ClustNode()
- {
- m_uIndex = uInsane;
- m_uSize = uInsane;
- m_dLength = (float) dInsane;
- m_ptrLeft = 0;
- m_ptrRight = 0;
- m_ptrParent = 0;
- m_ptrNextCluster = 0;
- m_ptrPrevCluster = 0;
- m_uLeafIndexes = 0;
- }
- ~ClustNode()
- {
- delete[] m_uLeafIndexes;
- }
- unsigned m_uIndex;
- unsigned m_uSize;
- float m_dLength;
- ClustNode *m_ptrLeft;
- ClustNode *m_ptrRight;
- ClustNode *m_ptrParent;
- ClustNode *m_ptrNextCluster;
- ClustNode *m_ptrPrevCluster;
- unsigned *m_uLeafIndexes;
- };
-
-class Clust
- {
-public:
- Clust();
- virtual ~Clust();
-
- void Create(ClustSet &Set, CLUSTER Method);
-
- unsigned GetLeafCount() const;
-
- unsigned GetClusterCount() const;
- unsigned GetClusterSize(unsigned uNodeIndex) const;
- unsigned GetLeaf(unsigned uClusterIndex, unsigned uLeafIndex) const;
-
- unsigned GetNodeCount() const { return 2*m_uLeafCount - 1; }
- const ClustNode &GetRoot() const { return m_Nodes[GetRootNodeIndex()]; }
- unsigned GetRootNodeIndex() const { return m_uNodeCount - 1; }
-
- const ClustNode &GetNode(unsigned uNodeIndex) const;
- bool IsLeaf(unsigned uNodeIndex) const;
- unsigned GetLeftIndex(unsigned uNodeIndex) const;
- unsigned GetRightIndex(unsigned uNodeIndex) const;
- float GetLength(unsigned uNodeIndex) const;
- float GetHeight(unsigned uNodeIndex) const;
- const char *GetNodeName(unsigned uNodeIndex) const;
- unsigned GetNodeId(unsigned uNodeIndex) const;
-
- JOIN GetJoinStyle() const { return m_JoinStyle; }
- LINKAGE GetCentroidStyle() const { return m_CentroidStyle; }
-
- void SetDist(unsigned uIndex1, unsigned uIndex2, float dDist);
- float GetDist(unsigned uIndex1, unsigned uIndex2) const;
-
- void ToPhylip(Phylip &tree);
-
- void LogMe() const;
-
-//private:
- void SetLeafCount(unsigned uLeafCount);
-
- void CreateCluster();
- void JoinNodes(unsigned uLeftNodeIndex, unsigned uRightNodeIndex,
- float dLeftLength, float dRightLength, unsigned uNewNodeIndex);
-
- void ChooseJoin(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
- float *ptrdLeftLength, float *ptrdRightLength);
- void ChooseJoinNeighborJoining(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
- float *ptrdLeftLength, float *ptrdRightLength);
- void ChooseJoinNearestNeighbor(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
- float *ptrdLeftLength, float *ptrdRightLength);
-
- float ComputeDist(unsigned uNewNodeIndex, unsigned uNodeIndex);
- float ComputeDistAverageLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex);
- float ComputeDistMinLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex);
- float ComputeDistMaxLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex);
- float ComputeDistNeighborJoining(unsigned uNewNewIndex, unsigned uNodeIndex);
- float ComputeDistMAFFT(unsigned uNewNewIndex, unsigned uNodeIndex);
-
- float Calc_r(unsigned uNodeIndex) const;
-
- unsigned VectorIndex(unsigned uIndex1, unsigned uIndex2) const;
-
- unsigned GetFirstCluster() const;
- unsigned GetNextCluster(unsigned uNodeIndex) const;
-
- float ComputeMetric(unsigned uIndex1, unsigned uIndex2) const;
- float ComputeMetricNearestNeighbor(unsigned i, unsigned j) const;
- float ComputeMetricNeighborJoining(unsigned i, unsigned j) const;
-
- void InitMetric(unsigned uMaxNodeIndex);
- void InsertMetric(unsigned uIndex1, unsigned uIndex2, float dMetric);
- float GetMinMetric(unsigned *ptruIndex1, unsigned *ptruIndex2) const;
- float GetMinMetricBruteForce(unsigned *ptruIndex1, unsigned *ptruIndex2) const;
- void DeleteMetric(unsigned uIndex);
- void DeleteMetric(unsigned uIndex1, unsigned uIndex2);
- void ListMetric() const;
-
- void DeleteFromClusterList(unsigned uNodeIndex);
- void AddToClusterList(unsigned uNodeIndex);
-
- void RBDelete(unsigned RBNode);
- unsigned RBInsert(unsigned i, unsigned j, float fMetric);
-
- unsigned RBNext(unsigned RBNode) const;
- unsigned RBPrev(unsigned RBNode) const;
- unsigned RBMin(unsigned RBNode) const;
- unsigned RBMax(unsigned RBNode) const;
-
- void ValidateRB(const char szMsg[] = 0) const;
- void ValidateRBNode(unsigned Node, const char szMsg[]) const;
-
-//private:
- JOIN m_JoinStyle;
- LINKAGE m_CentroidStyle;
- ClustNode *m_Nodes;
- unsigned *m_ClusterIndexToNodeIndex;
- unsigned *m_NodeIndexToClusterIndex;
- unsigned m_uLeafCount;
- unsigned m_uNodeCount;
- unsigned m_uClusterCount;
- unsigned m_uTriangularMatrixSize;
- float *m_dDist;
- ClustSet *m_ptrSet;
- ClustNode *m_ptrClusterList;
- };
-
-#endif // Clust_h
Deleted: trunk/packages/muscle/trunk/cluster.cpp
===================================================================
--- trunk/packages/muscle/trunk/cluster.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/cluster.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,339 +0,0 @@
-#include "muscle.h"
-#include "cluster.h"
-#include "distfunc.h"
-
-static inline float Min(float d1, float d2)
- {
- return d1 < d2 ? d1 : d2;
- }
-
-static inline float Max(float d1, float d2)
- {
- return d1 > d2 ? d1 : d2;
- }
-
-static inline float Mean(float d1, float d2)
- {
- return (float) ((d1 + d2)/2.0);
- }
-
-#if _DEBUG
-void ClusterTree::Validate(unsigned uNodeCount)
- {
- unsigned n;
- ClusterNode *pNode;
- unsigned uDisjointListCount = 0;
- for (pNode = m_ptrDisjoints; pNode; pNode = pNode->GetNextDisjoint())
- {
- ClusterNode *pPrev = pNode->GetPrevDisjoint();
- ClusterNode *pNext = pNode->GetNextDisjoint();
- if (0 != pPrev)
- {
- if (pPrev->GetNextDisjoint() != pNode)
- {
- Log("Prev->This mismatch, prev=\n");
- pPrev->LogMe();
- Log("This=\n");
- pNode->LogMe();
- Quit("ClusterTree::Validate()");
- }
- }
- else
- {
- if (pNode != m_ptrDisjoints)
- {
- Log("[%u]->prev = 0 but != m_ptrDisjoints=%d\n",
- pNode->GetIndex(),
- m_ptrDisjoints ? m_ptrDisjoints->GetIndex() : 0xffffffff);
- pNode->LogMe();
- Quit("ClusterTree::Validate()");
- }
- }
- if (0 != pNext)
- {
- if (pNext->GetPrevDisjoint() != pNode)
- {
- Log("Next->This mismatch, next=\n");
- pNext->LogMe();
- Log("This=\n");
- pNode->LogMe();
- Quit("ClusterTree::Validate()");
- }
- }
- ++uDisjointListCount;
- if (uDisjointListCount > m_uNodeCount)
- Quit("Loop in disjoint list");
- }
-
- unsigned uParentlessNodeCount = 0;
- for (n = 0; n < uNodeCount; ++n)
- if (0 == m_Nodes[n].GetParent())
- ++uParentlessNodeCount;
-
- if (uDisjointListCount != uParentlessNodeCount)
- Quit("Disjoints = %u Parentless = %u\n", uDisjointListCount,
- uParentlessNodeCount);
- }
-#else // !_DEBUG
-#define Validate(uNodeCount) // empty
-#endif
-
-void ClusterNode::LogMe() const
- {
- unsigned uClusterSize = GetClusterSize();
- Log("[%02u] w=%5.3f CW=%5.3f LBW=%5.3f RBW=%5.3f LWT=%5.3f RWT=%5.3f L=%02d R=%02d P=%02d NxDj=%02d PvDj=%02d Sz=%02d {",
- m_uIndex,
- m_dWeight,
- GetClusterWeight(),
- GetLeftBranchWeight(),
- GetRightBranchWeight(),
- GetLeftWeight(),
- GetRightWeight(),
- m_ptrLeft ? m_ptrLeft->GetIndex() : 0xffffffff,
- m_ptrRight ? m_ptrRight->GetIndex() : 0xffffffff,
- m_ptrParent ? m_ptrParent->GetIndex() : 0xffffffff,
- m_ptrNextDisjoint ? m_ptrNextDisjoint->GetIndex() : 0xffffffff,
- m_ptrPrevDisjoint ? m_ptrPrevDisjoint->GetIndex() : 0xffffffff,
- uClusterSize);
- for (unsigned i = 0; i < uClusterSize; ++i)
- Log(" %u", GetClusterLeaf(i)->GetIndex());
- Log(" }\n");
- }
-
-// How many leaves in the sub-tree under this node?
-unsigned ClusterNode::GetClusterSize() const
- {
- unsigned uLeafCount = 0;
-
- if (0 == m_ptrLeft && 0 == m_ptrRight)
- return 1;
-
- if (0 != m_ptrLeft)
- uLeafCount += m_ptrLeft->GetClusterSize();
- if (0 != m_ptrRight)
- uLeafCount += m_ptrRight->GetClusterSize();
- assert(uLeafCount > 0);
- return uLeafCount;
- }
-
-double ClusterNode::GetClusterWeight() const
- {
- double dWeight = 0.0;
- if (0 != m_ptrLeft)
- dWeight += m_ptrLeft->GetClusterWeight();
- if (0 != m_ptrRight)
- dWeight += m_ptrRight->GetClusterWeight();
- return dWeight + GetWeight();
- }
-
-double ClusterNode::GetLeftBranchWeight() const
- {
- const ClusterNode *ptrLeft = GetLeft();
- if (0 == ptrLeft)
- return 0.0;
-
- return GetWeight() - ptrLeft->GetWeight();
- }
-
-double ClusterNode::GetRightBranchWeight() const
- {
- const ClusterNode *ptrRight = GetRight();
- if (0 == ptrRight)
- return 0.0;
-
- return GetWeight() - ptrRight->GetWeight();
- }
-
-double ClusterNode::GetRightWeight() const
- {
- const ClusterNode *ptrRight = GetRight();
- if (0 == ptrRight)
- return 0.0;
- return ptrRight->GetClusterWeight() + GetWeight();
- }
-
-double ClusterNode::GetLeftWeight() const
- {
- const ClusterNode *ptrLeft = GetLeft();
- if (0 == ptrLeft)
- return 0.0;
- return ptrLeft->GetClusterWeight() + GetWeight();
- }
-
-// Return n'th leaf in the sub-tree under this node.
-const ClusterNode *ClusterNode::GetClusterLeaf(unsigned uLeafIndex) const
- {
- if (0 != m_ptrLeft)
- {
- if (0 == m_ptrRight)
- return this;
-
- unsigned uLeftLeafCount = m_ptrLeft->GetClusterSize();
-
- if (uLeafIndex < uLeftLeafCount)
- return m_ptrLeft->GetClusterLeaf(uLeafIndex);
-
- assert(uLeafIndex >= uLeftLeafCount);
- return m_ptrRight->GetClusterLeaf(uLeafIndex - uLeftLeafCount);
- }
- if (0 == m_ptrRight)
- return this;
- return m_ptrRight->GetClusterLeaf(uLeafIndex);
- }
-
-void ClusterTree::DeleteFromDisjoints(ClusterNode *ptrNode)
- {
- ClusterNode *ptrPrev = ptrNode->GetPrevDisjoint();
- ClusterNode *ptrNext = ptrNode->GetNextDisjoint();
-
- if (0 != ptrPrev)
- ptrPrev->SetNextDisjoint(ptrNext);
- else
- m_ptrDisjoints = ptrNext;
-
- if (0 != ptrNext)
- ptrNext->SetPrevDisjoint(ptrPrev);
-
-#if _DEBUG
-// not algorithmically necessary, but improves clarity
-// and supports Validate().
- ptrNode->SetPrevDisjoint(0);
- ptrNode->SetNextDisjoint(0);
-#endif
- }
-
-void ClusterTree::AddToDisjoints(ClusterNode *ptrNode)
- {
- ptrNode->SetNextDisjoint(m_ptrDisjoints);
- ptrNode->SetPrevDisjoint(0);
- if (0 != m_ptrDisjoints)
- m_ptrDisjoints->SetPrevDisjoint(ptrNode);
- m_ptrDisjoints = ptrNode;
- }
-
-ClusterTree::ClusterTree()
- {
- m_ptrDisjoints = 0;
- m_Nodes = 0;
- m_uNodeCount = 0;
- }
-
-ClusterTree::~ClusterTree()
- {
- delete[] m_Nodes;
- }
-
-void ClusterTree::LogMe() const
- {
- Log("Disjoints=%d\n", m_ptrDisjoints ? m_ptrDisjoints->GetIndex() : 0xffffffff);
- for (unsigned i = 0; i < m_uNodeCount; ++i)
- {
- m_Nodes[i].LogMe();
- }
- }
-
-ClusterNode *ClusterTree::GetRoot() const
- {
- return &m_Nodes[m_uNodeCount - 1];
- }
-
-// This is the UPGMA algorithm as described in Durbin et al. p166.
-void ClusterTree::Create(const DistFunc &Dist)
- {
- unsigned i;
- m_uLeafCount = Dist.GetCount();
- m_uNodeCount = 2*m_uLeafCount - 1;
-
- delete[] m_Nodes;
- m_Nodes = new ClusterNode[m_uNodeCount];
-
- for (i = 0; i < m_uNodeCount; ++i)
- m_Nodes[i].SetIndex(i);
-
- for (i = 0; i < m_uLeafCount - 1; ++i)
- m_Nodes[i].SetNextDisjoint(&m_Nodes[i+1]);
-
- for (i = 1; i < m_uLeafCount; ++i)
- m_Nodes[i].SetPrevDisjoint(&m_Nodes[i-1]);
-
- m_ptrDisjoints = &m_Nodes[0];
-
-// Log("Initial state\n");
-// LogMe();
-// Log("\n");
-
- DistFunc ClusterDist;
- ClusterDist.SetCount(m_uNodeCount);
- double dMaxDist = 0.0;
- for (i = 0; i < m_uLeafCount; ++i)
- for (unsigned j = 0; j < m_uLeafCount; ++j)
- {
- float dDist = Dist.GetDist(i, j);
- ClusterDist.SetDist(i, j, dDist);
- }
-
- Validate(m_uLeafCount);
-
-// Iteration. N-1 joins needed to create a binary tree from N leaves.
- for (unsigned uJoinIndex = m_uLeafCount; uJoinIndex < m_uNodeCount;
- ++uJoinIndex)
- {
- // Find closest pair of clusters
- unsigned uIndexClosest1;
- unsigned uIndexClosest2;
- bool bFound = false;
- double dDistClosest = 9e99;
- for (ClusterNode *ptrNode1 = m_ptrDisjoints; ptrNode1;
- ptrNode1 = ptrNode1->GetNextDisjoint())
- {
- for (ClusterNode *ptrNode2 = ptrNode1->GetNextDisjoint(); ptrNode2;
- ptrNode2 = ptrNode2->GetNextDisjoint())
- {
- unsigned i1 = ptrNode1->GetIndex();
- unsigned i2 = ptrNode2->GetIndex();
- double dDist = ClusterDist.GetDist(i1, i2);
- if (dDist < dDistClosest)
- {
- bFound = true;
- dDistClosest = dDist;
- uIndexClosest1 = i1;
- uIndexClosest2 = i2;
- }
- }
- }
- assert(bFound);
-
- ClusterNode &Join = m_Nodes[uJoinIndex];
- ClusterNode &Child1 = m_Nodes[uIndexClosest1];
- ClusterNode &Child2 = m_Nodes[uIndexClosest2];
-
- Join.SetLeft(&Child1);
- Join.SetRight(&Child2);
- Join.SetWeight(dDistClosest);
-
- Child1.SetParent(&Join);
- Child2.SetParent(&Join);
-
- DeleteFromDisjoints(&Child1);
- DeleteFromDisjoints(&Child2);
- AddToDisjoints(&Join);
-
-// Log("After join %d %d\n", uIndexClosest1, uIndexClosest2);
-// LogMe();
-
- // Calculate distance of every remaining disjoint cluster to the
- // new cluster created by the join
- for (ClusterNode *ptrNode = m_ptrDisjoints; ptrNode;
- ptrNode = ptrNode->GetNextDisjoint())
- {
- unsigned uNodeIndex = ptrNode->GetIndex();
- float dDist1 = ClusterDist.GetDist(uNodeIndex, uIndexClosest1);
- float dDist2 = ClusterDist.GetDist(uNodeIndex, uIndexClosest2);
- float dDist = Min(dDist1, dDist2);
- ClusterDist.SetDist(uJoinIndex, uNodeIndex, dDist);
- }
- Validate(uJoinIndex+1);
- }
- GetRoot()->GetClusterWeight();
-// LogMe();
- }
Deleted: trunk/packages/muscle/trunk/cluster.h
===================================================================
--- trunk/packages/muscle/trunk/cluster.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/cluster.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,86 +0,0 @@
-class DistFunc;
-
-class ClusterNode
- {
- friend class ClusterTree;
-public:
- ClusterNode()
- {
- m_dWeight = 0.0;
- m_dWeight2 = 0.0;
- m_ptrLeft = 0;
- m_ptrRight = 0;
- m_ptrParent = 0;
- m_uIndex = 0;
- m_ptrPrevDisjoint = 0;
- m_ptrNextDisjoint = 0;
- }
- ~ClusterNode() {}
-
-public:
- unsigned GetIndex() const { return m_uIndex; }
- ClusterNode *GetLeft() const { return m_ptrLeft; }
- ClusterNode *GetRight() const { return m_ptrRight; }
- ClusterNode *GetParent() const { return m_ptrParent; }
- double GetWeight() const { return m_dWeight; }
-
- const ClusterNode *GetClusterLeaf(unsigned uLeafIndex) const;
- unsigned GetClusterSize() const;
- double GetClusterWeight() const;
- double GetLeftBranchWeight() const;
- double GetRightBranchWeight() const;
- double GetLeftWeight() const;
- double GetRightWeight() const;
-
- void LogMe() const;
-
- double GetWeight2() const { return m_dWeight2; }
- void SetWeight2(double dWeight2) { m_dWeight2 = dWeight2; }
-
-protected:
- void SetIndex(unsigned uIndex) { m_uIndex = uIndex; }
- void SetWeight(double dWeight) { m_dWeight = dWeight; }
- void SetLeft(ClusterNode *ptrLeft) { m_ptrLeft = ptrLeft; }
- void SetRight(ClusterNode *ptrRight) { m_ptrRight = ptrRight; }
- void SetParent(ClusterNode *ptrParent) { m_ptrParent = ptrParent; }
- void SetNextDisjoint(ClusterNode *ptrNode) { m_ptrNextDisjoint = ptrNode; }
- void SetPrevDisjoint(ClusterNode *ptrNode) { m_ptrPrevDisjoint = ptrNode; }
-
- ClusterNode *GetNextDisjoint() { return m_ptrNextDisjoint; }
- ClusterNode *GetPrevDisjoint() { return m_ptrPrevDisjoint; }
-
-private:
- double m_dWeight;
- double m_dWeight2;
- unsigned m_uIndex;
- ClusterNode *m_ptrLeft;
- ClusterNode *m_ptrRight;
- ClusterNode *m_ptrParent;
- ClusterNode *m_ptrNextDisjoint;
- ClusterNode *m_ptrPrevDisjoint;
- };
-
-class ClusterTree
- {
-public:
- ClusterTree();
- virtual ~ClusterTree();
-
- void Create(const DistFunc &DF);
-
- ClusterNode *GetRoot() const;
- void LogMe() const;
-
-protected:
- void Join(ClusterNode *ptrNode1, ClusterNode *ptrNode2,
- ClusterNode *ptrJoin);
- void AddToDisjoints(ClusterNode *ptrNode);
- void DeleteFromDisjoints(ClusterNode *ptrNode);
- void Validate(unsigned uNodeCount);
-
-private:
- ClusterNode *m_ptrDisjoints;
- ClusterNode *m_Nodes;
- unsigned m_uNodeCount;
- unsigned m_uLeafCount;
- };
Deleted: trunk/packages/muscle/trunk/clustset.h
===================================================================
--- trunk/packages/muscle/trunk/clustset.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/clustset.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,21 +0,0 @@
-#ifndef ClustSet_h
-#define ClustSet_h
-
-enum JOIN;
-enum LINKAGE;
-class Clust;
-
-class ClustSet
- {
-public:
- virtual unsigned GetLeafCount() = 0;
- virtual double ComputeDist(const Clust &C, unsigned uNodeIndex1,
- unsigned uNodeIndex2) = 0;
- virtual void JoinNodes(const Clust &C, unsigned uLeftNodeIndex,
- unsigned uRightNodeIndex, unsigned uJoinedNodeIndex,
- double *ptrdLeftLength, double *ptrdRightLength) = 0;
- virtual const char *GetLeafName(unsigned uNodeIndex) = 0;
- virtual unsigned GetLeafId(unsigned uNodeIndex) = 0;
- };
-
-#endif // ClustSet_h
Deleted: trunk/packages/muscle/trunk/clustsetdf.h
===================================================================
--- trunk/packages/muscle/trunk/clustsetdf.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/clustsetdf.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,48 +0,0 @@
-#ifndef ClustSetDF_h
-#define ClustSetDF_h
-
-class MSA;
-class Clust;
-
-#include "clustset.h"
-#include "distfunc.h"
-#include "msa.h"
-
-class ClustSetDF : public ClustSet
- {
-public:
- ClustSetDF(const DistFunc &DF) :
- m_ptrDF(&DF)
- {
- }
-
-public:
- virtual unsigned GetLeafCount()
- {
- return m_ptrDF->GetCount();
- }
- virtual const char *GetLeafName(unsigned uNodeIndex)
- {
- return m_ptrDF->GetName(uNodeIndex);
- }
- virtual unsigned GetLeafId(unsigned uNodeIndex)
- {
- return m_ptrDF->GetId(uNodeIndex);
- }
- virtual void JoinNodes(const Clust &C, unsigned uLeftNodeIndex,
- unsigned uRightNodeIndex, unsigned uJoinedNodeIndex,
- double *ptrdLeftLength, double *ptrdRightLength)
- {
- Quit("ClustSetDF::JoinNodes, should never be called");
- }
- virtual double ComputeDist(const Clust &C, unsigned uNodeIndex1,
- unsigned uNodeIndex2)
- {
- return m_ptrDF->GetDist(uNodeIndex1, uNodeIndex2);
- }
-
-private:
- const DistFunc *m_ptrDF;
- };
-
-#endif // ClustSetDF_h
Deleted: trunk/packages/muscle/trunk/clustsetmsa.h
===================================================================
--- trunk/packages/muscle/trunk/clustsetmsa.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/clustsetmsa.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,55 +0,0 @@
-#ifndef ClustSetMSA_h
-#define ClustSetMSA_h
-
-class MSA;
-class Clust;
-
-#include "clustset.h"
-#include "msadist.h"
-
-// Distance matrix based set.
-// Computes distances between leaves, never between
-// joined clusters (leaves this to distance matrix method).
-class ClustSetMSA : public ClustSet
- {
-public:
- ClustSetMSA(const MSA &msa, MSADist &MD) :
- m_ptrMSA(&msa),
- m_ptrMSADist(&MD)
- {
- }
-
-public:
- virtual unsigned GetLeafCount()
- {
- return m_ptrMSA->GetSeqCount();
- }
- virtual const char *GetLeafName(unsigned uNodeIndex)
- {
- return m_ptrMSA->GetSeqName(uNodeIndex);
- }
- virtual unsigned GetLeafId(unsigned uNodeIndex)
- {
- return m_ptrMSA->GetSeqId(uNodeIndex);
- }
- virtual void JoinNodes(const Clust &C, unsigned uLeftNodeIndex,
- unsigned uRightNodeIndex, unsigned uJoinedNodeIndex,
- double *ptrdLeftLength, double *ptrdRightLength)
- {
- Quit("ClustSetMSA::JoinNodes, should never be called");
- }
- virtual double ComputeDist(const Clust &C, unsigned uNodeIndex1,
- unsigned uNodeIndex2)
- {
- return m_ptrMSADist->ComputeDist(*m_ptrMSA, uNodeIndex1, uNodeIndex2);
- }
-
-public:
- const MSA &GetMSA();
-
-private:
- const MSA *m_ptrMSA;
- MSADist *m_ptrMSADist;
- };
-
-#endif // ClustSetMSA_h
Deleted: trunk/packages/muscle/trunk/clwwt.cpp
===================================================================
--- trunk/packages/muscle/trunk/clwwt.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/clwwt.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,190 +0,0 @@
-#include "muscle.h"
-#include "tree.h"
-#include "msa.h"
-
-/***
-Compute weights by the CLUSTALW method.
-Thompson, Higgins and Gibson (1994), CABIOS (10) 19-29;
-see also CLUSTALW paper.
-
-Weights are computed from the edge lengths of a rooted tree.
-
-Define the strength of an edge to be its length divided by the number
-of leaves under that edge. The weight of a sequence is then the sum
-of edge strengths on the path from the root to the leaf.
-
-Example.
-
- 0.2
- -----A 0.1
- -x ------- B 0.7
- --------y ----------- C
- 0.3 ----------z
- 0.4 -------------- D
- 0.8
-
-Edge Length Leaves Strength
----- ----- ------ --------
-xy 0.3 3 0.1
-xA 0.2 1 0.2
-yz 0.4 2 0.2
-yB 0.1 1 0.1
-zC 0.7 1 0.7
-zD 0.8 1 0.8
-
-Leaf Path Strengths Weight
----- ---- --------- ------
-A xA 0.2 0.2
-B xy-yB 0.1 + 0.1 0.2
-C xy-yz-zC 0.1 + 0.2 + 0.7 1.0
-D xy-yz-zD 0.1 + 0.2 + 0.8 1.1
-
-***/
-
-#define TRACE 0
-
-static unsigned CountLeaves(const Tree &tree, unsigned uNodeIndex,
- unsigned LeavesUnderNode[])
- {
- if (tree.IsLeaf(uNodeIndex))
- {
- LeavesUnderNode[uNodeIndex] = 1;
- return 1;
- }
-
- const unsigned uLeft = tree.GetLeft(uNodeIndex);
- const unsigned uRight = tree.GetRight(uNodeIndex);
- const unsigned uRightCount = CountLeaves(tree, uRight, LeavesUnderNode);
- const unsigned uLeftCount = CountLeaves(tree, uLeft, LeavesUnderNode);
- const unsigned uCount = uRightCount + uLeftCount;
- LeavesUnderNode[uNodeIndex] = uCount;
- return uCount;
- }
-
-void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[])
- {
-#if TRACE
- Log("CalcClustalWWeights\n");
- tree.LogMe();
-#endif
-
- const unsigned uLeafCount = tree.GetLeafCount();
- if (0 == uLeafCount)
- return;
- else if (1 == uLeafCount)
- {
- Weights[0] = (WEIGHT) 1.0;
- return;
- }
- else if (2 == uLeafCount)
- {
- Weights[0] = (WEIGHT) 0.5;
- Weights[1] = (WEIGHT) 0.5;
- return;
- }
-
- if (!tree.IsRooted())
- Quit("CalcClustalWWeights requires rooted tree");
-
- const unsigned uNodeCount = tree.GetNodeCount();
- unsigned *LeavesUnderNode = new unsigned[uNodeCount];
- memset(LeavesUnderNode, 0, uNodeCount*sizeof(unsigned));
-
- const unsigned uRootNodeIndex = tree.GetRootNodeIndex();
- unsigned uLeavesUnderRoot = CountLeaves(tree, uRootNodeIndex, LeavesUnderNode);
- if (uLeavesUnderRoot != uLeafCount)
- Quit("WeightsFromTreee: Internal error, root count %u %u",
- uLeavesUnderRoot, uLeafCount);
-
-#if TRACE
- Log("Node Leaves Length Strength\n");
- Log("---- ------ -------- --------\n");
- // 1234 123456 12345678 12345678
-#endif
-
- double *Strengths = new double[uNodeCount];
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- {
- if (tree.IsRoot(uNodeIndex))
- {
- Strengths[uNodeIndex] = 0.0;
- continue;
- }
- const unsigned uParent = tree.GetParent(uNodeIndex);
- const double dLength = tree.GetEdgeLength(uNodeIndex, uParent);
- const unsigned uLeaves = LeavesUnderNode[uNodeIndex];
- const double dStrength = dLength / (double) uLeaves;
- Strengths[uNodeIndex] = dStrength;
-#if TRACE
- Log("%4u %6u %8g %8g\n", uNodeIndex, uLeaves, dLength, dStrength);
-#endif
- }
-
-#if TRACE
- Log("\n");
- Log(" Seq Path..Weight\n");
- Log("-------------------- ------------\n");
-#endif
- for (unsigned n = 0; n < uLeafCount; ++n)
- {
- const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
-#if TRACE
- Log("%20.20s %4u ", tree.GetLeafName(uLeafNodeIndex), uLeafNodeIndex);
-#endif
- if (!tree.IsLeaf(uLeafNodeIndex))
- Quit("CalcClustalWWeights: leaf");
-
- double dWeight = 0;
- unsigned uNode = uLeafNodeIndex;
- while (!tree.IsRoot(uNode))
- {
- dWeight += Strengths[uNode];
- uNode = tree.GetParent(uNode);
-#if TRACE
- Log("->%u(%g)", uNode, Strengths[uNode]);
-#endif
- }
- if (dWeight < 0.0001)
- {
-#if TRACE
- Log("zero->one");
-#endif
- dWeight = 1.0;
- }
- Weights[n] = (WEIGHT) dWeight;
-#if TRACE
- Log(" = %g\n", dWeight);
-#endif
- }
-
- delete[] Strengths;
- delete[] LeavesUnderNode;
-
- Normalize(Weights, uLeafCount);
- }
-
-void MSA::SetClustalWWeights(const Tree &tree)
- {
- const unsigned uSeqCount = GetSeqCount();
- const unsigned uLeafCount = tree.GetLeafCount();
-
- WEIGHT *Weights = new WEIGHT[uSeqCount];
-
- CalcClustalWWeights(tree, Weights);
-
- for (unsigned n = 0; n < uLeafCount; ++n)
- {
- const WEIGHT w = Weights[n];
- const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
- const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
- const unsigned uSeqIndex = GetSeqIndex(uId);
-#if DEBUG
- if (GetSeqName(uSeqIndex) != tree.GetLeafName(uLeafNodeIndex))
- Quit("MSA::SetClustalWWeights: names don't match");
-#endif
- SetSeqWeight(uSeqIndex, w);
- }
- NormalizeWeights((WEIGHT) 1.0);
-
- delete[] Weights;
- }
Deleted: trunk/packages/muscle/trunk/color.cpp
===================================================================
--- trunk/packages/muscle/trunk/color.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/color.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,189 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-
-static int Blosum62[23][23] =
- {
-// A B C D E F G H I K L M N P Q R S T V W X Y Z
- +4, -2, +0, -2, -1, -2, +0, -2, -1, -1, -1, -1, -2, -1, -1, -1, +1, +0, +0, -3, -1, -2, -1, // A
- -2, +6, -3, +6, +2, -3, -1, -1, -3, -1, -4, -3, +1, -1, +0, -2, +0, -1, -3, -4, -1, -3, +2, // B
- +0, -3, +9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -1, -2, -4, // C
- -2, +6, -3, +6, +2, -3, -1, -1, -3, -1, -4, -3, +1, -1, +0, -2, +0, -1, -3, -4, -1, -3, +2, // D
- -1, +2, -4, +2, +5, -3, -2, +0, -3, +1, -3, -2, +0, -1, +2, +0, +0, -1, -2, -3, -1, -2, +5, // E
-
- -2, -3, -2, -3, -3, +6, -3, -1, +0, -3, +0, +0, -3, -4, -3, -3, -2, -2, -1, +1, -1, +3, -3, // F
- +0, -1, -3, -1, -2, -3, +6, -2, -4, -2, -4, -3, +0, -2, -2, -2, +0, -2, -3, -2, -1, -3, -2, // G
- -2, -1, -3, -1, +0, -1, -2, +8, -3, -1, -3, -2, +1, -2, +0, +0, -1, -2, -3, -2, -1, +2, +0, // H
- -1, -3, -1, -3, -3, +0, -4, -3, +4, -3, +2, +1, -3, -3, -3, -3, -2, -1, +3, -3, -1, -1, -3, // I
- -1, -1, -3, -1, +1, -3, -2, -1, -3, +5, -2, -1, +0, -1, +1, +2, +0, -1, -2, -3, -1, -2, +1, // K
-
- -1, -4, -1, -4, -3, +0, -4, -3, +2, -2, +4, +2, -3, -3, -2, -2, -2, -1, +1, -2, -1, -1, -3, // L
- -1, -3, -1, -3, -2, +0, -3, -2, +1, -1, +2, +5, -2, -2, +0, -1, -1, -1, +1, -1, -1, -1, -2, // M
- -2, +1, -3, +1, +0, -3, +0, +1, -3, +0, -3, -2, +6, -2, +0, +0, +1, +0, -3, -4, -1, -2, +0, // N
- -1, -1, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, +7, -1, -2, -1, -1, -2, -4, -1, -3, -1, // P
- -1, +0, -3, +0, +2, -3, -2, +0, -3, +1, -2, +0, +0, -1, +5, +1, +0, -1, -2, -2, -1, -1, +2, // Q
-
- -1, -2, -3, -2, +0, -3, -2, +0, -3, +2, -2, -1, +0, -2, +1, +5, -1, -1, -3, -3, -1, -2, +0, // R
- +1, +0, -1, +0, +0, -2, +0, -1, -2, +0, -2, -1, +1, -1, +0, -1, +4, +1, -2, -3, -1, -2, +0, // S
- +0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, +0, -1, -1, -1, +1, +5, +0, -2, -1, -2, -1, // T
- +0, -3, -1, -3, -2, -1, -3, -3, +3, -2, +1, +1, -3, -2, -2, -3, -2, +0, +4, -3, -1, -1, -2, // V
- -3, -4, -2, -4, -3, +1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3,+11, -1, +2, -3, // W
-
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // X
- -2, -3, -2, -3, -2, +3, -3, +2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, +2, -1, +7, -2, // Y
- -1, +2, -4, +2, +5, -3, -2, +0, -3, +1, -3, -2, +0, -1, +2, +0, +0, -1, -2, -3, -1, -2, +5, // Z
- };
-
-static int toi_tab[26] =
- {
- 0, // A
- 1, // B
- 2, // C
- 3, // D
- 4, // E
- 5, // F
- 6, // G
- 7, // H
- 8, // I
- -1, // J
- 9, // K
- 10, // L
- 11, // M
- 12, // N
- -1, // O
- 13, // P
- 14, // Q
- 15, // R
- 16, // S
- 17, // T
- -1, // U
- 18, // V
- 19, // W
- 20, // X
- 21, // Y
- 22, // Z
- };
-
-static int toi(char c)
- {
- c = toupper(c);
- return toi_tab[c - 'A'];
- }
-
-static int BlosumScore(char c1, char c2)
- {
- int i1 = toi(c1);
- int i2 = toi(c2);
- return Blosum62[i1][i2];
- }
-
-/***
-Consider a column with 5 As and 3 Bs.
-There are:
- 5x4 pairs of As.
- 3x2 pairs of Bs.
- 5x3x2 AB pairs
- 8x7 = 5x4 + 3x2 + 5x3x2 pairs of letters
-***/
-static double BlosumScoreCol(const MSA &a, unsigned uColIndex)
- {
- int iCounts[23];
- memset(iCounts, 0, sizeof(iCounts));
- const unsigned uSeqCount = a.GetSeqCount();
- unsigned uCharCount = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- char c = a.GetChar(uSeqIndex, uColIndex);
- if (IsGapChar(c))
- continue;
- int iChar = toi(c);
- ++iCounts[iChar];
- ++uCharCount;
- }
- if (uCharCount < 2)
- return -9;
- int iTotalScore = 0;
- for (int i1 = 0; i1 < 23; ++i1)
- {
- int iCounts1 = iCounts[i1];
- iTotalScore += iCounts1*(iCounts1 - 1)*Blosum62[i1][i1];
- for (int i2 = i1 + 1; i2 < 23; ++i2)
- iTotalScore += iCounts[i2]*iCounts1*2*Blosum62[i1][i2];
- }
- int iPairCount = uCharCount*(uCharCount - 1);
- return (double) iTotalScore / (double) iPairCount;
- }
-
-/***
-Consider a column with 5 As and 3 Bs.
-A residue of type Q scores:
- 5xAQ + 3xBQ
-***/
-static void AssignColorsCol(const MSA &a, unsigned uColIndex, int **Colors)
- {
- int iCounts[23];
- memset(iCounts, 0, sizeof(iCounts));
- const unsigned uSeqCount = a.GetSeqCount();
- unsigned uCharCount = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- char c = a.GetChar(uSeqIndex, uColIndex);
- if (IsGapChar(c))
- continue;
- int iChar = toi(c);
- ++iCounts[iChar];
- ++uCharCount;
- }
- int iMostConservedType = -1;
- int iMostConservedCount = -1;
- for (unsigned i = 0; i < 23; ++i)
- {
- if (iCounts[i] > iMostConservedCount)
- {
- iMostConservedType = i;
- iMostConservedCount = iCounts[i];
- }
- }
-
- double dColScore = BlosumScoreCol(a, uColIndex);
- int c;
- if (dColScore >= 3.0)
- c = 3;
- //else if (dColScore >= 1.0)
- // c = 2;
- else if (dColScore >= 0.2)
- c = 1;
- else
- c = 0;
-
- int Color[23];
- for (unsigned uLetter = 0; uLetter < 23; ++uLetter)
- {
- double dScore = Blosum62[uLetter][iMostConservedType];
- if (dScore >= dColScore)
- Color[uLetter] = c;
- else
- Color[uLetter] = 0;
- }
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- char c = a.GetChar(uSeqIndex, uColIndex);
- if (IsGapChar(c))
- {
- Colors[uSeqIndex][uColIndex] = 0;
- continue;
- }
- int iLetter = toi(c);
- if (iLetter >= 0 && iLetter < 23)
- Colors[uSeqIndex][uColIndex] = Color[iLetter];
- else
- Colors[uSeqIndex][uColIndex] = 0;
- }
- }
-
-void AssignColors(const MSA &a, int **Colors)
- {
- const unsigned uColCount = a.GetColCount();
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- AssignColorsCol(a, uColIndex, Colors);
- }
Deleted: trunk/packages/muscle/trunk/cons.cpp
===================================================================
--- trunk/packages/muscle/trunk/cons.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/cons.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,118 +0,0 @@
-/***
-Conservation value for a column in an MSA is defined as the number
-of times the most common letter appears divided by the number of
-sequences.
-***/
-
-#include "muscle.h"
-#include "msa.h"
-#include <math.h>
-
-double MSA::GetAvgCons() const
- {
- assert(GetSeqCount() > 0);
- double dSum = 0;
- unsigned uNonGapColCount = 0;
- for (unsigned uColIndex = 0; uColIndex < GetColCount(); ++uColIndex)
- {
- if (!IsGapColumn(uColIndex))
- {
- dSum += GetCons(uColIndex);
- ++uNonGapColCount;
- }
- }
- assert(uNonGapColCount > 0);
- double dAvg = dSum / uNonGapColCount;
- assert(dAvg > 0 && dAvg <= 1);
- return dAvg;
- }
-
-double MSA::GetCons(unsigned uColIndex) const
- {
- unsigned Counts[MAX_ALPHA];
- for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
- Counts[uLetter] = 0;
-
- unsigned uMaxCount = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
- {
- if (IsGap(uSeqIndex, uColIndex))
- continue;
- char c = GetChar(uSeqIndex, uColIndex);
- c = toupper(c);
- if ('X' == c || 'B' == c || 'Z' == c)
- continue;
- unsigned uLetter = GetLetter(uSeqIndex, uColIndex);
- unsigned uCount = Counts[uLetter] + 1;
- if (uCount > uMaxCount)
- uMaxCount = uCount;
- Counts[uLetter] = uCount;
- }
-
-// Cons is undefined for all-gap column
- if (0 == uMaxCount)
- {
-// assert(false);
- return 1;
- }
-
- double dCons = (double) uMaxCount / (double) GetSeqCount();
- assert(dCons > 0 && dCons <= 1);
- return dCons;
- }
-
-// Perecent identity of a pair of sequences.
-// Positions with one or both gapped are ignored.
-double MSA::GetPctIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const
- {
- const unsigned uColCount = GetColCount();
- unsigned uPosCount = 0;
- unsigned uSameCount = 0;
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- const char c1 = GetChar(uSeqIndex1, uColIndex);
- const char c2 = GetChar(uSeqIndex2, uColIndex);
- if (IsGapChar(c1) || IsGapChar(c2))
- continue;
- if (c1 == c2)
- ++uSameCount;
- ++uPosCount;
- }
- if (0 == uPosCount)
- return 0;
- return (double) uSameCount / (double) uPosCount;
- }
-
-// Perecent group identity of a pair of sequences.
-// Positions with one or both gapped are ignored.
-double MSA::GetPctGroupIdentityPair(unsigned uSeqIndex1,
- unsigned uSeqIndex2) const
- {
- extern unsigned ResidueGroup[];
-
- const unsigned uColCount = GetColCount();
- unsigned uPosCount = 0;
- unsigned uSameCount = 0;
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- if (IsGap(uSeqIndex1, uColIndex))
- continue;
- if (IsGap(uSeqIndex2, uColIndex))
- continue;
- if (IsWildcard(uSeqIndex1, uColIndex))
- continue;
- if (IsWildcard(uSeqIndex2, uColIndex))
- continue;
-
- const unsigned uLetter1 = GetLetter(uSeqIndex1, uColIndex);
- const unsigned uLetter2 = GetLetter(uSeqIndex2, uColIndex);
- const unsigned uGroup1 = ResidueGroup[uLetter1];
- const unsigned uGroup2 = ResidueGroup[uLetter2];
- if (uGroup1 == uGroup2)
- ++uSameCount;
- ++uPosCount;
- }
- if (0 == uPosCount)
- return 0;
- return (double) uSameCount / (double) uPosCount;
- }
Property changes on: trunk/packages/muscle/trunk/debian
___________________________________________________________________
Name: mergeWithUpstream
+ 1
Deleted: trunk/packages/muscle/trunk/diaglist.cpp
===================================================================
--- trunk/packages/muscle/trunk/diaglist.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/diaglist.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,378 +0,0 @@
-#include "muscle.h"
-#include "diaglist.h"
-#include "pwpath.h"
-
-#define MAX(x, y) ((x) > (y) ? (x) : (y))
-#define MIN(x, y) ((x) < (y) ? (x) : (y))
-
-void DiagList::Add(const Diag &d)
- {
- if (m_uCount == MAX_DIAGS)
- Quit("DiagList::Add, overflow %u", m_uCount);
- m_Diags[m_uCount] = d;
- ++m_uCount;
- }
-
-void DiagList::Add(unsigned uStartPosA, unsigned uStartPosB, unsigned uLength)
- {
- Diag d;
- d.m_uStartPosA = uStartPosA;
- d.m_uStartPosB = uStartPosB;
- d.m_uLength = uLength;
- Add(d);
- }
-
-const Diag &DiagList::Get(unsigned uIndex) const
- {
- if (uIndex >= m_uCount)
- Quit("DiagList::Get(%u), count=%u", uIndex, m_uCount);
- return m_Diags[uIndex];
- }
-
-void DiagList::LogMe() const
- {
- Log("DiagList::LogMe, count=%u\n", m_uCount);
- Log(" n StartA StartB Length\n");
- Log("--- ------ ------ ------\n");
- for (unsigned n = 0; n < m_uCount; ++n)
- {
- const Diag &d = m_Diags[n];
- Log("%3u %6u %6u %6u\n",
- n, d.m_uStartPosA, d.m_uStartPosB, d.m_uLength);
- }
- }
-
-void DiagList::FromPath(const PWPath &Path)
- {
- Clear();
-
- const unsigned uEdgeCount = Path.GetEdgeCount();
- unsigned uLength = 0;
- unsigned uStartPosA;
- unsigned uStartPosB;
- for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
-
- // Typical cases
- if (Edge.cType == 'M')
- {
- if (0 == uLength)
- {
- uStartPosA = Edge.uPrefixLengthA - 1;
- uStartPosB = Edge.uPrefixLengthB - 1;
- }
- ++uLength;
- }
- else
- {
- if (uLength >= g_uMinDiagLength)
- Add(uStartPosA, uStartPosB, uLength);
- uLength = 0;
- }
- }
-
-// Special case for last edge
- if (uLength >= g_uMinDiagLength)
- Add(uStartPosA, uStartPosB, uLength);
- }
-
-bool DiagList::NonZeroIntersection(const Diag &d) const
- {
- for (unsigned n = 0; n < m_uCount; ++n)
- {
- const Diag &d2 = m_Diags[n];
- if (DiagOverlap(d, d2) > 0)
- return true;
- }
- return false;
- }
-
-// DialogOverlap returns the length of the overlapping
-// section of the two diagonals along the diagonals
-// themselves; in other words, the length of
-// the intersection of the two sets of cells in
-// the matrix.
-unsigned DiagOverlap(const Diag &d1, const Diag &d2)
- {
-// Determine where the diagonals intersect the A
-// axis (extending them if required). If they
-// intersect at different points, they do not
-// overlap. Coordinates on a diagonal are
-// given by B = A + c where c is the value of
-// A at the intersection with the A axis.
-// Hence, c = B - A for any point on the diagonal.
- int c1 = (int) d1.m_uStartPosB - (int) d1.m_uStartPosA;
- int c2 = (int) d2.m_uStartPosB - (int) d2.m_uStartPosA;
- if (c1 != c2)
- return 0;
-
- assert(DiagOverlapA(d1, d2) == DiagOverlapB(d1, d2));
- return DiagOverlapA(d1, d2);
- }
-
-// DialogOverlapA returns the length of the overlapping
-// section of the projection of the two diagonals onto
-// the A axis.
-unsigned DiagOverlapA(const Diag &d1, const Diag &d2)
- {
- unsigned uMaxStart = MAX(d1.m_uStartPosA, d2.m_uStartPosA);
- unsigned uMinEnd = MIN(d1.m_uStartPosA + d1.m_uLength - 1,
- d2.m_uStartPosA + d2.m_uLength - 1);
-
- int iLength = (int) uMinEnd - (int) uMaxStart + 1;
- if (iLength < 0)
- return 0;
- return (unsigned) iLength;
- }
-
-// DialogOverlapB returns the length of the overlapping
-// section of the projection of the two diagonals onto
-// the B axis.
-unsigned DiagOverlapB(const Diag &d1, const Diag &d2)
- {
- unsigned uMaxStart = MAX(d1.m_uStartPosB, d2.m_uStartPosB);
- unsigned uMinEnd = MIN(d1.m_uStartPosB + d1.m_uLength - 1,
- d2.m_uStartPosB + d2.m_uLength - 1);
-
- int iLength = (int) uMinEnd - (int) uMaxStart + 1;
- if (iLength < 0)
- return 0;
- return (unsigned) iLength;
- }
-
-// Returns true if the two diagonals can be on the
-// same path through the DP matrix. If DiagCompatible
-// returns false, they cannot be in the same path
-// and hence "contradict" each other.
-bool DiagCompatible(const Diag &d1, const Diag &d2)
- {
- if (DiagOverlap(d1, d2) > 0)
- return true;
- return 0 == DiagOverlapA(d1, d2) && 0 == DiagOverlapB(d1, d2);
- }
-
-// Returns the length of the "break" between two diagonals.
-unsigned DiagBreak(const Diag &d1, const Diag &d2)
- {
- int c1 = (int) d1.m_uStartPosB - (int) d1.m_uStartPosA;
- int c2 = (int) d2.m_uStartPosB - (int) d2.m_uStartPosA;
- if (c1 != c2)
- return 0;
-
- int iMaxStart = MAX(d1.m_uStartPosA, d2.m_uStartPosA);
- int iMinEnd = MIN(d1.m_uStartPosA + d1.m_uLength - 1,
- d2.m_uStartPosA + d1.m_uLength - 1);
- int iBreak = iMaxStart - iMinEnd - 1;
- if (iBreak < 0)
- return 0;
- return (unsigned) iBreak;
- }
-
-// Merge diagonals that are continuations of each other with
-// short breaks of up to length g_uMaxDiagBreak.
-// In a sorted list of diagonals, we only have to check
-// consecutive entries.
-void MergeDiags(DiagList &DL)
- {
- return;
-#if DEBUG
- if (!DL.IsSorted())
- Quit("MergeDiags: !IsSorted");
-#endif
-
-// TODO: Fix this!
-// Breaks must be with no offset (no gaps)
- const unsigned uCount = DL.GetCount();
- if (uCount <= 1)
- return;
-
- DiagList NewList;
-
- Diag MergedDiag;
- const Diag *ptrPrev = &DL.Get(0);
- for (unsigned i = 1; i < uCount; ++i)
- {
- const Diag *ptrDiag = &DL.Get(i);
- unsigned uBreakLength = DiagBreak(*ptrPrev, *ptrDiag);
- if (uBreakLength <= g_uMaxDiagBreak)
- {
- MergedDiag.m_uStartPosA = ptrPrev->m_uStartPosA;
- MergedDiag.m_uStartPosB = ptrPrev->m_uStartPosB;
- MergedDiag.m_uLength = ptrPrev->m_uLength + ptrDiag->m_uLength
- + uBreakLength;
- ptrPrev = &MergedDiag;
- }
- else
- {
- NewList.Add(*ptrPrev);
- ptrPrev = ptrDiag;
- }
- }
- NewList.Add(*ptrPrev);
- DL.Copy(NewList);
- }
-
-void DiagList::DeleteIncompatible()
- {
- assert(IsSorted());
-
- if (m_uCount < 2)
- return;
-
- bool *bFlagForDeletion = new bool[m_uCount];
- for (unsigned i = 0; i < m_uCount; ++i)
- bFlagForDeletion[i] = false;
-
- for (unsigned i = 0; i < m_uCount; ++i)
- {
- const Diag &di = m_Diags[i];
- for (unsigned j = i + 1; j < m_uCount; ++j)
- {
- const Diag &dj = m_Diags[j];
-
- // Verify sorted correctly
- assert(di.m_uStartPosA <= dj.m_uStartPosA);
-
- // If two diagonals are incompatible and
- // one is is much longer than the other,
- // keep the longer one.
- if (!DiagCompatible(di, dj))
- {
- if (di.m_uLength > dj.m_uLength*4)
- bFlagForDeletion[j] = true;
- else if (dj.m_uLength > di.m_uLength*4)
- bFlagForDeletion[i] = true;
- else
- {
- bFlagForDeletion[i] = true;
- bFlagForDeletion[j] = true;
- }
- }
- }
- }
-
- for (unsigned i = 0; i < m_uCount; ++i)
- {
- const Diag &di = m_Diags[i];
- if (bFlagForDeletion[i])
- continue;
-
- for (unsigned j = i + 1; j < m_uCount; ++j)
- {
- const Diag &dj = m_Diags[j];
- if (bFlagForDeletion[j])
- continue;
-
- // Verify sorted correctly
- assert(di.m_uStartPosA <= dj.m_uStartPosA);
-
- // If sort order in B different from sorted order in A,
- // either diags are incompatible or we detected a repeat
- // or permutation.
- if (di.m_uStartPosB >= dj.m_uStartPosB || !DiagCompatible(di, dj))
- {
- bFlagForDeletion[i] = true;
- bFlagForDeletion[j] = true;
- }
- }
- }
-
- unsigned uNewCount = 0;
- Diag *NewDiags = new Diag[m_uCount];
- for (unsigned i = 0; i < m_uCount; ++i)
- {
- if (bFlagForDeletion[i])
- continue;
-
- const Diag &d = m_Diags[i];
- NewDiags[uNewCount] = d;
- ++uNewCount;
- }
- memcpy(m_Diags, NewDiags, uNewCount*sizeof(Diag));
- m_uCount = uNewCount;
- delete[] NewDiags;
- }
-
-void DiagList::Copy(const DiagList &DL)
- {
- Clear();
- unsigned uCount = DL.GetCount();
- for (unsigned i = 0; i < uCount; ++i)
- Add(DL.Get(i));
- }
-
-// Check if sorted in increasing order of m_uStartPosA
-bool DiagList::IsSorted() const
- {
- return true;
- unsigned uCount = GetCount();
- for (unsigned i = 1; i < uCount; ++i)
- if (m_Diags[i-1].m_uStartPosA > m_Diags[i].m_uStartPosA)
- return false;
- return true;
- }
-
-// Sort in increasing order of m_uStartPosA
-// Dumb bubble sort, but don't care about speed
-// because don't get long lists.
-void DiagList::Sort()
- {
- if (m_uCount < 2)
- return;
-
- bool bContinue = true;
- while (bContinue)
- {
- bContinue = false;
- for (unsigned i = 0; i < m_uCount - 1; ++i)
- {
- if (m_Diags[i].m_uStartPosA > m_Diags[i+1].m_uStartPosA)
- {
- Diag Tmp = m_Diags[i];
- m_Diags[i] = m_Diags[i+1];
- m_Diags[i+1] = Tmp;
- bContinue = true;
- }
- }
- }
- }
-
-//void TestDiag()
-// {
-// Diag d1;
-// Diag d2;
-// Diag d3;
-//
-// d1.m_uStartPosA = 0;
-// d1.m_uStartPosB = 1;
-// d1.m_uLength = 32;
-//
-// d2.m_uStartPosA = 55;
-// d2.m_uStartPosB = 70;
-// d2.m_uLength = 36;
-//
-// d3.m_uStartPosA = 102;
-// d3.m_uStartPosB = 122;
-// d3.m_uLength = 50;
-//
-// DiagList DL;
-// DL.Add(d1);
-// DL.Add(d2);
-// DL.Add(d3);
-//
-// Log("Before DeleteIncompatible:\n");
-// DL.LogMe();
-// DL.DeleteIncompatible();
-//
-// Log("After DeleteIncompatible:\n");
-// DL.LogMe();
-//
-// MergeDiags(DL);
-// Log("After Merge:\n");
-// DL.LogMe();
-//
-// DPRegionList RL;
-// DiagListToDPRegionList(DL, RL, 200, 200);
-// RL.LogMe();
-// }
Deleted: trunk/packages/muscle/trunk/diaglist.h
===================================================================
--- trunk/packages/muscle/trunk/diaglist.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/diaglist.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,89 +0,0 @@
-#ifndef diaglist_h
-#define diaglist_h
-
-const unsigned EMPTY = (unsigned) ~0;
-const unsigned MAX_DIAGS = 1024;
-
-struct Diag
- {
- unsigned m_uStartPosA;
- unsigned m_uStartPosB;
- unsigned m_uLength;
- };
-
-struct Rect
- {
- unsigned m_uStartPosA;
- unsigned m_uStartPosB;
- unsigned m_uLengthA;
- unsigned m_uLengthB;
- };
-
-class DiagList
- {
-public:
- DiagList()
- {
- m_uCount = 0;
- }
- ~DiagList()
- {
- Free();
- }
-
-public:
-// Creation
- void Clear()
- {
- Free();
- }
- void FromPath(const PWPath &Path);
- void Add(const Diag &d);
- void Add(unsigned uStartPosA, unsigned uStartPosB, unsigned uLength);
- void DeleteIncompatible();
-
-// Accessors
- unsigned GetCount() const
- {
- return m_uCount;
- }
- const Diag &Get(unsigned uIndex) const;
-
-// Operations
- void Sort();
- void Copy(const DiagList &DL);
-
-// Query
- // returns true iff given diagonal is included in the list
- // in whole or in part.
- bool NonZeroIntersection(const Diag &d) const;
- bool IsSorted() const;
-
-// Diagnostics
- void LogMe() const;
-
-private:
- void Free()
- {
- m_uCount = 0;
- }
-
-private:
- unsigned m_uCount;
- Diag m_Diags[MAX_DIAGS];
- };
-
-unsigned DiagOverlap(const Diag &d1, const Diag &d2);
-unsigned DiagOverlapA(const Diag &d1, const Diag &d2);
-unsigned DiagOverlapB(const Diag &d1, const Diag &d2);
-unsigned DiagBreak(const Diag &d1, const Diag &d2);
-bool DiagCompatible(const Diag &d1, const Diag &d2);
-void CheckDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, const MSA &msaA, const MSA &msaB, const PWPath &Path);
-void FindDiags(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
- unsigned uLengthY, DiagList &DL);
-void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
- unsigned uLengthY, DiagList &DL);
-void MergeDiags(DiagList &DL);
-
-#endif // diaglist_h
Deleted: trunk/packages/muscle/trunk/diffobjscore.cpp
===================================================================
--- trunk/packages/muscle/trunk/diffobjscore.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/diffobjscore.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,162 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "objscore.h"
-#include "profile.h"
-
-#define TRACE 0
-#define COMPARE_3_52 0
-#define BRUTE_LETTERS 0
-
-static SCORE ScoreColLetters(const MSA &msa, unsigned uColIndex)
- {
- SCOREMATRIX &Mx = *g_ptrScoreMatrix;
- const unsigned uSeqCount = msa.GetSeqCount();
-
-#if BRUTE_LETTERS
- SCORE BruteScore = 0;
- for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
- {
- unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex);
- if (uLetter1 >= g_AlphaSize)
- continue;
- WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
- for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
- {
- unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex);
- if (uLetter2 >= g_AlphaSize)
- continue;
- WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
- BruteScore += w1*w2*Mx[uLetter1][uLetter2];
- }
- }
-#endif
-
- double N = 0;
- for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
- {
- WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
- N += w;
- }
- if (N <= 0)
- return 0;
-
- FCOUNT Freqs[20];
- memset(Freqs, 0, sizeof(Freqs));
- SCORE Score = 0;
- for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
- {
- unsigned uLetter = msa.GetLetterEx(uSeqIndex1, uColIndex);
- if (uLetter >= g_AlphaSize)
- continue;
- WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
- Freqs[uLetter] += w;
- Score -= w*w*Mx[uLetter][uLetter];
- }
-
- for (unsigned uLetter1 = 0; uLetter1 < g_AlphaSize; ++uLetter1)
- {
- const FCOUNT f1 = Freqs[uLetter1];
- Score += f1*f1*Mx[uLetter1][uLetter1];
- for (unsigned uLetter2 = uLetter1 + 1; uLetter2 < g_AlphaSize; ++uLetter2)
- {
- const FCOUNT f2 = Freqs[uLetter2];
- Score += 2*f1*f2*Mx[uLetter1][uLetter2];
- }
- }
- Score /= 2;
-#if BRUTE_LETTERS
- assert(BTEq(BruteScore, Score));
-#endif
- return Score;
- }
-
-static SCORE ScoreLetters(const MSA &msa, const unsigned Edges[],
- unsigned uEdgeCount)
- {
- const unsigned uSeqCount = msa.GetSeqCount();
- const unsigned uColCount = msa.GetColCount();
-
-// Letters
- SCORE Score = 0;
- for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const unsigned uColIndex = Edges[uEdgeIndex];
- assert(uColIndex < uColCount);
- Score += ScoreColLetters(msa, uColIndex);
- }
- return Score;
- }
-
-void GetLetterScores(const MSA &msa, SCORE Scores[])
- {
- const unsigned uColCount = msa.GetColCount();
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- Scores[uColIndex] = ScoreColLetters(msa, uColIndex);
- }
-
-SCORE DiffObjScore(
- const MSA &msa1, const PWPath &Path1, const unsigned Edges1[], unsigned uEdgeCount1,
- const MSA &msa2, const PWPath &Path2, const unsigned Edges2[], unsigned uEdgeCount2)
- {
-#if TRACE
- {
- Log("============DiffObjScore===========\n");
- Log("msa1:\n");
- msa1.LogMe();
- Log("\n");
- Log("Cols1: ");
- for (unsigned i = 0; i < uEdgeCount1; ++i)
- Log(" %u", Edges1[i]);
- Log("\n\n");
- Log("msa2:\n");
- msa2.LogMe();
- Log("Cols2: ");
- for (unsigned i = 0; i < uEdgeCount2; ++i)
- Log(" %u", Edges2[i]);
- Log("\n\n");
- }
-#endif
-
-#if COMPARE_3_52
- extern SCORE g_SPScoreLetters;
- extern SCORE g_SPScoreGaps;
- SCORE SP1 = ObjScoreSP(msa1);
- SCORE SPLetters1 = g_SPScoreLetters;
- SCORE SPGaps1 = g_SPScoreGaps;
-
- SCORE SP2 = ObjScoreSP(msa2);
- SCORE SPLetters2 = g_SPScoreLetters;
- SCORE SPGaps2 = g_SPScoreGaps;
- SCORE SPDiffLetters = SPLetters2 - SPLetters1;
- SCORE SPDiffGaps = SPGaps2 - SPGaps1;
- SCORE SPDiff = SPDiffLetters + SPDiffGaps;
-#endif
-
- SCORE Letters1 = ScoreLetters(msa1, Edges1, uEdgeCount1);
- SCORE Letters2 = ScoreLetters(msa2, Edges2, uEdgeCount2);
-
- SCORE Gaps1 = ScoreGaps(msa1, Edges1, uEdgeCount1);
- SCORE Gaps2 = ScoreGaps(msa2, Edges2, uEdgeCount2);
-
- SCORE DiffLetters = Letters2 - Letters1;
- SCORE DiffGaps = Gaps2 - Gaps1;
- SCORE Diff = DiffLetters + DiffGaps;
-
-#if COMPARE_3_52
- Log("ObjScoreSP Letters1=%.4g Letters2=%.4g DiffLetters=%.4g\n",
- SPLetters1, SPLetters2, SPDiffLetters);
-
- Log("DiffObjScore Letters1=%.4g Letters2=%.4g DiffLetters=%.4g\n",
- Letters1, Letters2, DiffLetters);
-
- Log("ObjScoreSP Gaps1=%.4g Gaps2=%.4g DiffGaps=%.4g\n",
- SPGaps1, SPGaps2, SPDiffGaps);
-
- Log("DiffObjScore Gaps1=%.4g Gaps2=%.4g DiffGaps=%.4g\n",
- Gaps1, Gaps2, DiffGaps);
-
- Log("SP diff=%.4g DiffObjScore Diff=%.4g\n", SPDiff, Diff);
-#endif
-
- return Diff;
- }
Deleted: trunk/packages/muscle/trunk/diffpaths.cpp
===================================================================
--- trunk/packages/muscle/trunk/diffpaths.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/diffpaths.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,114 +0,0 @@
-#include "muscle.h"
-#include "pwpath.h"
-
-#define TRACE 0
-
-void DiffPaths(const PWPath &p1, const PWPath &p2, unsigned Edges1[],
- unsigned *ptruDiffCount1, unsigned Edges2[], unsigned *ptruDiffCount2)
- {
-#if TRACE
- Log("DiffPaths\n");
- Log("p1=");
- p1.LogMe();
- Log("p2=");
- p2.LogMe();
-#endif
- const unsigned uEdgeCount1 = p1.GetEdgeCount();
- const unsigned uEdgeCount2 = p2.GetEdgeCount();
-
- unsigned uDiffCount1 = 0;
- unsigned uDiffCount2 = 0;
- unsigned uEdgeIndex1 = 0;
- unsigned uEdgeIndex2 = 0;
- const PWEdge *Edge1 = &p1.GetEdge(uEdgeIndex1);
- const PWEdge *Edge2 = &p2.GetEdge(uEdgeIndex2);
- for (;;)
- {
- unsigned uEdgeIndexTop1 = uEdgeIndex1;
- unsigned uEdgeIndexTop2 = uEdgeIndex2;
- Edge1 = &p1.GetEdge(uEdgeIndex1);
- Edge2 = &p2.GetEdge(uEdgeIndex2);
-#if TRACE
- Log("e1[%u] PLA%u PLB%u %c, e2[%u] PLA%u PLB %u %c DC1=%u DC2=%u\n",
- uEdgeIndex1, Edge1->uPrefixLengthA, Edge1->uPrefixLengthB, Edge1->cType,
- uEdgeIndex2, Edge2->uPrefixLengthA, Edge2->uPrefixLengthB, Edge2->cType,
- uDiffCount1, uDiffCount2);
-#endif
- if (Edge1->uPrefixLengthA == Edge2->uPrefixLengthA &&
- Edge1->uPrefixLengthB == Edge2->uPrefixLengthB)
- {
- if (!Edge1->Equal(*Edge2))
- {
- Edges1[uDiffCount1++] = uEdgeIndex1;
- Edges2[uDiffCount2++] = uEdgeIndex2;
- }
- ++uEdgeIndex1;
- ++uEdgeIndex2;
- }
-
- else if (Edge2->uPrefixLengthA < Edge1->uPrefixLengthA ||
- Edge2->uPrefixLengthB < Edge1->uPrefixLengthB)
- Edges2[uDiffCount2++] = uEdgeIndex2++;
-
- else if (Edge1->uPrefixLengthA < Edge2->uPrefixLengthA ||
- Edge1->uPrefixLengthB < Edge2->uPrefixLengthB)
- Edges1[uDiffCount1++] = uEdgeIndex1++;
-
- if (uEdgeCount1 == uEdgeIndex1)
- {
- while (uEdgeIndex2 < uEdgeCount2)
- Edges2[uDiffCount2++] = uEdgeIndex2++;
- goto Done;
- }
- if (uEdgeCount2 == uEdgeIndex2)
- {
- while (uEdgeIndex1 < uEdgeCount1)
- Edges1[uDiffCount1++] = uEdgeIndex1++;
- goto Done;
- }
- if (uEdgeIndex1 == uEdgeIndexTop1 && uEdgeIndex2 == uEdgeIndexTop2)
- Quit("DiffPaths stuck");
- }
-Done:;
-#if TRACE
- Log("DiffCount1=%u (%u %u)\n", uDiffCount1, uEdgeCount1, uEdgeCount2);
- Log("Diffs1=");
- for (unsigned i = 0; i < uDiffCount1; ++i)
- {
- const PWEdge e = p1.GetEdge(Edges1[i]);
- Log(" %u=%c%u.%u", Edges1[i], e.cType, e.uPrefixLengthA, e.uPrefixLengthB);
- }
- Log("\n");
- Log("DiffCount2=%u\n", uDiffCount2);
- Log("Diffs2=");
- for (unsigned i = 0; i < uDiffCount2; ++i)
- {
- const PWEdge e = p2.GetEdge(Edges2[i]);
- Log(" %u=%c%u.%u", Edges2[i], e.cType, e.uPrefixLengthA, e.uPrefixLengthB);
- }
- Log("\n");
-#endif
- *ptruDiffCount1 = uDiffCount1;
- *ptruDiffCount2 = uDiffCount2;
- }
-
-void TestDiffPaths()
- {
- PWPath p1;
- PWPath p2;
-
- p1.AppendEdge('M', 1, 1);
- p1.AppendEdge('M', 2, 2);
- p1.AppendEdge('M', 3, 3);
-
- p2.AppendEdge('M', 1, 1);
- p2.AppendEdge('D', 2, 1);
- p2.AppendEdge('I', 2, 2);
- p2.AppendEdge('M', 3, 3);
-
- unsigned Edges1[64];
- unsigned Edges2[64];
- unsigned uDiffCount1;
- unsigned uDiffCount2;
- DiffPaths(p1, p2, Edges1, &uDiffCount1, Edges2, &uDiffCount2);
- }
Deleted: trunk/packages/muscle/trunk/difftrees.cpp
===================================================================
--- trunk/packages/muscle/trunk/difftrees.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/difftrees.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,381 +0,0 @@
-#include "muscle.h"
-#include "tree.h"
-
-#define TRACE 0
-
-/***
-Algorithm to compare two trees, X and Y.
-
-A node x in X and node y in Y are defined to be
-similar iff the set of leaves in the subtree under
-x is identical to the set of leaves under y.
-
-A node is defined to be dissimilar iff it is not
-similar to any node in the other tree.
-
-Nodes x and y are defined to be married iff every
-node in the subtree under x is similar to a node
-in the subtree under y. Married nodes are considered
-to be equal. The subtrees under two married nodes can
-at most differ by exchanges of left and right branches,
-which we do not consider to be significant here.
-
-A node is defined to be a bachelor iff it is not
-married. If a node is a bachelor, then it has a
-dissimilar node in its subtree, and it follows
-immediately from the definition of marriage that its
-parent is also a bachelor. Hence all nodes on the path
-from a bachelor node to the root are bachelors.
-
-We assume the trees have the same set of leaves, so
-every leaf is trivially both similar and married to
-the same leaf in the opposite tree. Bachelor nodes
-are therefore always internal (i.e., non-leaf) nodes.
-
-A node is defined to be a diff iff (a) it is married
-and (b) its parent is a bachelor. The subtree under
-a diff is maximally similar to the other tree. (In
-other words, you cannot extend the subtree without
-adding a bachelor).
-
-The set of diffs is the subset of the two trees that
-we consider to be identical.
-
-Example:
-
- -----A
- -----k
- ----j -----B
---i -----C
- ------D
-
-
- -----A
- -----p
- ----n -----B
---m -----D
- ------C
-
-
-The following pairs of internal nodes are similar.
-
- Nodes Set of leaves
- ----- -------------
- k,p A,B
- i,m A,B,C,D
-
-Bachelors in the first tree are i and j, bachelors
-in the second tree are m and n.
-
-Node k and p are married, but i and m are not (because j
-and n are bachelors). The diffs are C, D and k.
-
-The set of bachelor nodes can be viewed as the internal
-nodes of a tree, the leaves of which are diffs. (To see
-that there can't be disjoint subtrees, note that the path
-from a diff to a root is all bachelor nodes, so there is
-always a path between two diffs that goes through the root).
-We call this tree the "diffs tree".
-
-There is a simple O(N) algorithm to build the diffs tree.
-To achieve O(N) we avoid traversing a given subtree multiple
-times and also avoid comparing lists of leaves.
-
-We visit nodes in depth-first order (i.e., a node is visited
-before its parent).
-
-If either child of a node is a bachelor, we flag it as
-a bachelor.
-
-If both children of the node we are visiting are married,
-we check whether the spouses of those children have the
-same parent in the other tree. If the parents are different,
-the current node is a bachelor. If they have the same parent,
-then the node we are visiting is the spouse of that parent.
-We assign this newly identified married couple a unique integer
-id. The id of a node is in one-to-one correspondence with the
-set of leaves in its subtree. Two nodes have the same set of
-leaves iff they have the same id. Bachelor nodes do not get
-an id.
-***/
-
-static void BuildDiffs(const Tree &tree, unsigned uTreeNodeIndex,
- const bool bIsDiff[], Tree &Diffs, unsigned uDiffsNodeIndex,
- unsigned IdToDiffsLeafNodeIndex[])
- {
-#if TRACE
- Log("BuildDiffs(TreeNode=%u IsDiff=%d IsLeaf=%d)\n",
- uTreeNodeIndex, bIsDiff[uTreeNodeIndex], tree.IsLeaf(uTreeNodeIndex));
-#endif
- if (bIsDiff[uTreeNodeIndex])
- {
- unsigned uLeafCount = tree.GetLeafCount();
- unsigned *Leaves = new unsigned[uLeafCount];
- GetLeaves(tree, uTreeNodeIndex, Leaves, &uLeafCount);
- for (unsigned n = 0; n < uLeafCount; ++n)
- {
- const unsigned uLeafNodeIndex = Leaves[n];
- const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
- if (uId >= tree.GetLeafCount())
- Quit("BuildDiffs, id out of range");
- IdToDiffsLeafNodeIndex[uId] = uDiffsNodeIndex;
-#if TRACE
- Log(" Leaf id=%u DiffsNode=%u\n", uId, uDiffsNodeIndex);
-#endif
- }
- delete[] Leaves;
- return;
- }
-
- if (tree.IsLeaf(uTreeNodeIndex))
- Quit("BuildDiffs: should never reach leaf");
-
- const unsigned uTreeLeft = tree.GetLeft(uTreeNodeIndex);
- const unsigned uTreeRight = tree.GetRight(uTreeNodeIndex);
-
- const unsigned uDiffsLeft = Diffs.AppendBranch(uDiffsNodeIndex);
- const unsigned uDiffsRight = uDiffsLeft + 1;
-
- BuildDiffs(tree, uTreeLeft, bIsDiff, Diffs, uDiffsLeft, IdToDiffsLeafNodeIndex);
- BuildDiffs(tree, uTreeRight, bIsDiff, Diffs, uDiffsRight, IdToDiffsLeafNodeIndex);
- }
-
-void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs,
- unsigned IdToDiffsLeafNodeIndex[])
- {
-#if TRACE
- Log("Tree1:\n");
- Tree1.LogMe();
- Log("\n");
- Log("Tree2:\n");
- Tree2.LogMe();
-#endif
-
- if (!Tree1.IsRooted() || !Tree2.IsRooted())
- Quit("DiffTrees: requires rooted trees");
-
- const unsigned uNodeCount = Tree1.GetNodeCount();
- const unsigned uNodeCount2 = Tree2.GetNodeCount();
-
- const unsigned uLeafCount = Tree1.GetLeafCount();
- const unsigned uLeafCount2 = Tree2.GetLeafCount();
- assert(uLeafCount == uLeafCount2);
-
- if (uNodeCount != uNodeCount2)
- Quit("DiffTrees: different node counts");
-
-// Allocate tables so we can convert tree node index to
-// and from the unique id with a O(1) lookup.
- unsigned *NodeIndexToId1 = new unsigned[uNodeCount];
- unsigned *IdToNodeIndex2 = new unsigned[uNodeCount];
-
- bool *bIsBachelor1 = new bool[uNodeCount];
- bool *bIsDiff1 = new bool[uNodeCount];
-
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- {
- NodeIndexToId1[uNodeIndex] = uNodeCount;
- bIsBachelor1[uNodeIndex] = false;
- bIsDiff1[uNodeIndex] = false;
-
- // Use uNodeCount as value meaning "not set".
- IdToNodeIndex2[uNodeIndex] = uNodeCount;
- }
-
-// Initialize node index <-> id lookup tables
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- {
- if (Tree1.IsLeaf(uNodeIndex))
- {
- const unsigned uId = Tree1.GetLeafId(uNodeIndex);
- if (uId >= uNodeCount)
- Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
- NodeIndexToId1[uNodeIndex] = uId;
- }
-
- if (Tree2.IsLeaf(uNodeIndex))
- {
- const unsigned uId = Tree2.GetLeafId(uNodeIndex);
- if (uId >= uNodeCount)
- Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
- IdToNodeIndex2[uId] = uNodeIndex;
- }
- }
-
-// Validity check. This verifies that the ids
-// pre-assigned to the leaves in Tree1 are unique
-// (note that the id<N check above does not rule
-// out two leaves having duplicate ids).
- for (unsigned uId = 0; uId < uLeafCount; ++uId)
- {
- unsigned uNodeIndex2 = IdToNodeIndex2[uId];
- if (uNodeCount == uNodeIndex2)
- Quit("DiffTrees, check 2");
- }
-
-// Ids assigned to internal nodes are N, N+1 ...
-// An internal node id uniquely identifies a set
-// of two or more leaves.
- unsigned uInternalNodeId = uLeafCount;
-
-// Depth-first traversal of tree.
-// The order guarantees that a node is visited before
-// its parent is visited.
- for (unsigned uNodeIndex1 = Tree1.FirstDepthFirstNode();
- NULL_NEIGHBOR != uNodeIndex1;
- uNodeIndex1 = Tree1.NextDepthFirstNode(uNodeIndex1))
- {
-#if TRACE
- Log("Main loop: Node1=%u IsLeaf=%d IsBachelor=%d\n",
- uNodeIndex1,
- Tree1.IsLeaf(uNodeIndex1),
- bIsBachelor1[uNodeIndex1]);
-#endif
-
- // Leaves are trivial; nothing to do.
- if (Tree1.IsLeaf(uNodeIndex1) || bIsBachelor1[uNodeIndex1])
- continue;
-
- // If either child is a bachelor, flag
- // this node as a bachelor and continue.
- unsigned uLeft1 = Tree1.GetLeft(uNodeIndex1);
- if (bIsBachelor1[uLeft1])
- {
- bIsBachelor1[uNodeIndex1] = true;
- continue;
- }
-
- unsigned uRight1 = Tree1.GetRight(uNodeIndex1);
- if (bIsBachelor1[uRight1])
- {
- bIsBachelor1[uNodeIndex1] = true;
- continue;
- }
-
- // Both children are married.
- // Married nodes are guaranteed to have an id.
- unsigned uIdLeft = NodeIndexToId1[uLeft1];
- unsigned uIdRight = NodeIndexToId1[uRight1];
-
- if (uIdLeft == uNodeCount || uIdRight == uNodeCount)
- Quit("DiffTrees, check 5");
-
- // uLeft2 is the spouse of uLeft1, and similarly for uRight2.
- unsigned uLeft2 = IdToNodeIndex2[uIdLeft];
- unsigned uRight2 = IdToNodeIndex2[uIdRight];
-
- if (uLeft2 == uNodeCount || uRight2 == uNodeCount)
- Quit("DiffTrees, check 6");
-
- // If the spouses of uLeft1 and uRight1 have the same
- // parent, then this parent is the spouse of uNodeIndex1.
- // Otherwise, uNodeIndex1 is a diff.
- unsigned uParentLeft2 = Tree2.GetParent(uLeft2);
- unsigned uParentRight2 = Tree2.GetParent(uRight2);
-
-#if TRACE
- Log("L1=%u R1=%u L2=%u R2=%u PL2=%u PR2=%u\n",
- uLeft1,
- uRight1,
- uLeft2,
- uRight2,
- uParentLeft2,
- uParentRight2);
-#endif
-
- if (uParentLeft2 == uParentRight2)
- {
- NodeIndexToId1[uNodeIndex1] = uInternalNodeId;
- IdToNodeIndex2[uInternalNodeId] = uParentLeft2;
- ++uInternalNodeId;
- }
- else
- bIsBachelor1[uNodeIndex1] = true;
- }
-
- unsigned uDiffCount = 0;
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- {
- if (bIsBachelor1[uNodeIndex])
- continue;
- if (Tree1.IsRoot(uNodeIndex))
- {
- // Special case: if no bachelors, consider the
- // root a diff.
- if (!bIsBachelor1[uNodeIndex])
- bIsDiff1[uNodeIndex] = true;
- continue;
- }
- const unsigned uParent = Tree1.GetParent(uNodeIndex);
- if (bIsBachelor1[uParent])
- {
- bIsDiff1[uNodeIndex] = true;
- ++uDiffCount;
- }
- }
-
-#if TRACE
- Log("Tree1:\n");
- Log("Node Id Bach Diff Name\n");
- Log("---- ---- ---- ---- ----\n");
- for (unsigned n = 0; n < uNodeCount; ++n)
- {
- Log("%4u %4u %d %d",
- n,
- NodeIndexToId1[n],
- bIsBachelor1[n],
- bIsDiff1[n]);
- if (Tree1.IsLeaf(n))
- Log(" %s", Tree1.GetLeafName(n));
- Log("\n");
- }
- Log("\n");
- Log("Tree2:\n");
- Log("Node Id Name\n");
- Log("---- ---- ----\n");
- for (unsigned n = 0; n < uNodeCount; ++n)
- {
- Log("%4u ", n);
- if (Tree2.IsLeaf(n))
- Log(" %s", Tree2.GetLeafName(n));
- Log("\n");
- }
-#endif
-
- Diffs.CreateRooted();
- const unsigned uDiffsRootIndex = Diffs.GetRootNodeIndex();
- const unsigned uRootIndex1 = Tree1.GetRootNodeIndex();
-
- for (unsigned n = 0; n < uLeafCount; ++n)
- IdToDiffsLeafNodeIndex[n] = uNodeCount;
-
- BuildDiffs(Tree1, uRootIndex1, bIsDiff1, Diffs, uDiffsRootIndex,
- IdToDiffsLeafNodeIndex);
-
-#if TRACE
- Log("\n");
- Log("Diffs:\n");
- Diffs.LogMe();
- Log("\n");
- Log("IdToDiffsLeafNodeIndex:");
- for (unsigned n = 0; n < uLeafCount; ++n)
- {
- if (n%16 == 0)
- Log("\n");
- else
- Log(" ");
- Log("%u=%u", n, IdToDiffsLeafNodeIndex[n]);
- }
- Log("\n");
-#endif
-
- for (unsigned n = 0; n < uLeafCount; ++n)
- if (IdToDiffsLeafNodeIndex[n] == uNodeCount)
- Quit("TreeDiffs check 7");
-
- delete[] NodeIndexToId1;
- delete[] IdToNodeIndex2;
-
- delete[] bIsBachelor1;
- delete[] bIsDiff1;
- }
Deleted: trunk/packages/muscle/trunk/difftreese.cpp
===================================================================
--- trunk/packages/muscle/trunk/difftreese.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/difftreese.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,235 +0,0 @@
-#include "muscle.h"
-#include "tree.h"
-
-#define TRACE 0
-
-/***
-Algorithm to compare two trees, X and Y.
-
-A node x in X and node y in Y are defined to be
-similar iff the set of leaves in the subtree under
-x is identical to the set of leaves under y.
-
-A node is defined to be changed iff it is not
-similar to any node in the other tree.
-
-Nodes x and y are defined to be married iff every
-node in the subtree under x is similar to a node
-in the subtree under y. Married nodes are considered
-to be equal. The subtrees under two married nodes can
-at most differ by exchanges of left and right branches,
-which we do not consider to be significant here.
-
-A node is changed iff it is not married. If a node is
-changed, then it has a dissimilar node in its subtree,
-and it follows immediately from the definition of marriage
-that its parent is also a bachelor. Hence all nodes on the
-path from a changed node to the root are changed.
-
-We assume the trees have the same set of leaves, so
-every leaf is trivially both similar and married to
-the same leaf in the opposite tree. Changed nodes
-are therefore always internal (i.e., non-leaf) nodes.
-
-Example:
-
- -----A
- -----k
- ----j -----B
---i -----C
- ------D
-
-
- -----A
- -----p
- ----n -----B
---m -----D
- ------C
-
-
-The following pairs of internal nodes are similar.
-
- Nodes Set of leaves
- ----- -------------
- k,p A,B
- i,m A,B,C,D
-
-Changed nodes in the first tree are i and j, changed nodes
-in the second tree are m and n.
-
-Node k and p are married, but i and m are not (because j
-and n are changed). The diffs are C, D and k.
-
-To achieve O(N) we avoid traversing a given subtree multiple
-times and also avoid comparing lists of leaves.
-
-We visit nodes in depth-first order (i.e., a node is visited
-before its parent).
-
-If either child of a node is changed, we flag it as changed.
-
-If both children of the node we are visiting are married,
-we check whether the spouses of those children have the
-same parent in the other tree. If the parents are different,
-the current node is a bachelor. If they have the same parent,
-then the node we are visiting is the spouse of that parent.
-We assign this newly identified married couple a unique integer
-id. The id of a node is in one-to-one correspondence with the
-set of leaves in its subtree. Two nodes have the same set of
-leaves iff they have the same id. Changed nodes do not get
-an id.
-***/
-
-void DiffTreesE(const Tree &NewTree, const Tree &OldTree,
- unsigned NewNodeIndexToOldNodeIndex[])
- {
-#if TRACE
- Log("DiffTreesE NewTree:\n");
- NewTree.LogMe();
- Log("\n");
- Log("OldTree:\n");
- OldTree.LogMe();
-#endif
-
- if (!NewTree.IsRooted() || !OldTree.IsRooted())
- Quit("DiffTrees: requires rooted trees");
-
- const unsigned uNodeCount = NewTree.GetNodeCount();
- const unsigned uOldNodeCount = OldTree.GetNodeCount();
- const unsigned uLeafCount = NewTree.GetLeafCount();
- const unsigned uOldLeafCount = OldTree.GetLeafCount();
- if (uNodeCount != uOldNodeCount || uLeafCount != uOldLeafCount)
- Quit("DiffTreesE: different node counts");
-
- {
- unsigned *IdToOldNodeIndex = new unsigned[uNodeCount];
- for (unsigned uOldNodeIndex = 0; uOldNodeIndex < uNodeCount; ++uOldNodeIndex)
- {
- if (OldTree.IsLeaf(uOldNodeIndex))
- {
- unsigned Id = OldTree.GetLeafId(uOldNodeIndex);
- IdToOldNodeIndex[Id] = uOldNodeIndex;
- }
- }
-
-// Initialize NewNodeIndexToOldNodeIndex[]
-// All internal nodes are marked as changed, but may be updated later.
- for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
- {
- if (NewTree.IsLeaf(uNewNodeIndex))
- {
- unsigned uId = NewTree.GetLeafId(uNewNodeIndex);
- assert(uId < uLeafCount);
-
- unsigned uOldNodeIndex = IdToOldNodeIndex[uId];
- assert(uOldNodeIndex < uNodeCount);
-
- NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldNodeIndex;
- }
- else
- NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;
- }
- delete[] IdToOldNodeIndex;
- }
-
-// Depth-first traversal of tree.
-// The order guarantees that a node is visited before
-// its parent is visited.
- for (unsigned uNewNodeIndex = NewTree.FirstDepthFirstNode();
- NULL_NEIGHBOR != uNewNodeIndex;
- uNewNodeIndex = NewTree.NextDepthFirstNode(uNewNodeIndex))
- {
- if (NewTree.IsLeaf(uNewNodeIndex))
- continue;
-
- // If either child is changed, flag this node as changed and continue.
- unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
- unsigned uOldLeft = NewNodeIndexToOldNodeIndex[uNewLeft];
- if (NODE_CHANGED == uOldLeft)
- {
- NewNodeIndexToOldNodeIndex[uNewLeft] = NODE_CHANGED;
- continue;
- }
-
- unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);
- unsigned uOldRight = NewNodeIndexToOldNodeIndex[uNewRight];
- if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewRight])
- {
- NewNodeIndexToOldNodeIndex[uNewRight] = NODE_CHANGED;
- continue;
- }
-
- unsigned uOldParentLeft = OldTree.GetParent(uOldLeft);
- unsigned uOldParentRight = OldTree.GetParent(uOldRight);
- if (uOldParentLeft == uOldParentRight)
- NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldParentLeft;
- else
- NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;
- }
-
-#if TRACE
- {
- Log("NewToOld ");
- for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
- {
- Log(" [%3u]=", uNewNodeIndex);
- if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewNodeIndex])
- Log(" X");
- else
- Log("%3u", NewNodeIndexToOldNodeIndex[uNewNodeIndex]);
- if ((uNewNodeIndex+1)%8 == 0)
- Log("\n ");
- }
- Log("\n");
- }
-#endif
-
-#if DEBUG
- {
- for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
- {
- unsigned uOld = NewNodeIndexToOldNodeIndex[uNewNodeIndex];
- if (NewTree.IsLeaf(uNewNodeIndex))
- {
- if (uOld >= uNodeCount)
- {
- Log("NewNode=%u uOld=%u > uNodeCount=%u\n",
- uNewNodeIndex, uOld, uNodeCount);
- Quit("Diff check failed");
- }
- unsigned uIdNew = NewTree.GetLeafId(uNewNodeIndex);
- unsigned uIdOld = OldTree.GetLeafId(uOld);
- if (uIdNew != uIdOld)
- {
- Log("NewNode=%u uOld=%u IdNew=%u IdOld=%u\n",
- uNewNodeIndex, uOld, uIdNew, uIdOld);
- Quit("Diff check failed");
- }
- continue;
- }
-
- if (NODE_CHANGED == uOld)
- continue;
-
- unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
- unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);
-
- unsigned uOldLeft = OldTree.GetLeft(uOld);
- unsigned uOldRight = OldTree.GetRight(uOld);
-
- unsigned uNewLeftPartner = NewNodeIndexToOldNodeIndex[uNewLeft];
- unsigned uNewRightPartner = NewNodeIndexToOldNodeIndex[uNewRight];
-
- bool bSameNotRotated = (uNewLeftPartner == uOldLeft && uNewRightPartner == uOldRight);
- bool bSameRotated = (uNewLeftPartner == uOldRight && uNewRightPartner == uOldLeft);
- if (!bSameNotRotated && !bSameRotated)
- {
- Log("NewNode=%u NewL=%u NewR=%u\n", uNewNodeIndex, uNewLeft, uNewRight);
- Log("OldNode=%u OldL=%u OldR=%u\n", uOld, uOldLeft, uOldRight);
- Log("NewLPartner=%u NewRPartner=%u\n", uNewLeftPartner, uNewRightPartner);
- Quit("Diff check failed");
- }
- }
- }
-#endif
- }
Deleted: trunk/packages/muscle/trunk/distcalc.cpp
===================================================================
--- trunk/packages/muscle/trunk/distcalc.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/distcalc.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,72 +0,0 @@
-#include "muscle.h"
-#include "distfunc.h"
-#include "distcalc.h"
-#include "msa.h"
-
-void DistCalcDF::Init(const DistFunc &DF)
- {
- m_ptrDF = &DF;
- }
-
-void DistCalcDF::CalcDistRange(unsigned i, dist_t Dist[]) const
- {
- for (unsigned j = 0; j < i; ++j)
- Dist[j] = m_ptrDF->GetDist(i, j);
- }
-
-unsigned DistCalcDF::GetCount() const
- {
- return m_ptrDF->GetCount();
- }
-
-unsigned DistCalcDF::GetId(unsigned i) const
- {
- return m_ptrDF->GetId(i);
- }
-
-const char *DistCalcDF::GetName(unsigned i) const
- {
- return m_ptrDF->GetName(i);
- }
-
-void DistCalcMSA::Init(const MSA &msa, DISTANCE Distance)
- {
- m_ptrMSA = &msa;
- m_Distance = Distance;
- }
-
-void DistCalcMSA::CalcDistRange(unsigned i, dist_t Dist[]) const
- {
-// const unsigned uSeqIndex1 = m_ptrMSA->GetSeqIndex(i);
- for (unsigned j = 0; j < i; ++j)
- {
-// const unsigned uSeqIndex2 = m_ptrMSA->GetSeqIndex(j);
- const float PctId = (float) m_ptrMSA->GetPctIdentityPair(i, j);
- switch (m_Distance)
- {
- case DISTANCE_PctIdKimura:
- Dist[j] = (float) KimuraDist(PctId);
- break;
- case DISTANCE_PctIdLog:
- Dist[j] = (float) PctIdToMAFFTDist(PctId);
- break;
- default:
- Quit("DistCalcMSA: Invalid DISTANCE_%u", m_Distance);
- }
- }
- }
-
-unsigned DistCalcMSA::GetCount() const
- {
- return m_ptrMSA->GetSeqCount();
- }
-
-unsigned DistCalcMSA::GetId(unsigned i) const
- {
- return m_ptrMSA->GetSeqId(i);
- }
-
-const char *DistCalcMSA::GetName(unsigned i) const
- {
- return m_ptrMSA->GetSeqName(i);
- }
Deleted: trunk/packages/muscle/trunk/distcalc.h
===================================================================
--- trunk/packages/muscle/trunk/distcalc.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/distcalc.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,45 +0,0 @@
-#ifndef DistCalc_h
-#define DistCalc_h
-
-typedef float dist_t;
-const dist_t BIG_DIST = (dist_t) 1e29;
-
-class DistFunc;
-
-class DistCalc
- {
-public:
- virtual void CalcDistRange(unsigned i, dist_t Dist[]) const = 0;
- virtual unsigned GetCount() const = 0;
- virtual unsigned GetId(unsigned i) const = 0;
- virtual const char *GetName(unsigned i) const = 0;
- };
-
-class DistCalcDF : public DistCalc
- {
-public:
- void Init(const DistFunc &DF);
- virtual void CalcDistRange(unsigned i, dist_t Dist[]) const;
- virtual unsigned GetCount() const;
- virtual unsigned GetId(unsigned i) const;
- virtual const char *GetName(unsigned i) const;
-
-private:
- const DistFunc *m_ptrDF;
- };
-
-class DistCalcMSA : public DistCalc
- {
-public:
- void Init(const MSA &msa, DISTANCE Distance);
- virtual void CalcDistRange(unsigned i, dist_t Dist[]) const;
- virtual unsigned GetCount() const;
- virtual unsigned GetId(unsigned i) const;
- virtual const char *GetName(unsigned i) const;
-
-private:
- const MSA *m_ptrMSA;
- DISTANCE m_Distance;
- };
-
-#endif // DistCalc_h
Deleted: trunk/packages/muscle/trunk/distfunc.cpp
===================================================================
--- trunk/packages/muscle/trunk/distfunc.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/distfunc.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,113 +0,0 @@
-#include "muscle.h"
-#include "distfunc.h"
-#include <assert.h>
-
-DistFunc::DistFunc()
- {
- m_Dists = 0;
- m_uCount = 0;
- m_uCacheCount = 0;
- m_Names = 0;
- m_Ids = 0;
- }
-
-DistFunc::~DistFunc()
- {
- if (0 != m_Names)
- {
- for (unsigned i = 0; i < m_uCount; ++i)
- free(m_Names[i]);
- }
- delete[] m_Dists;
- delete[] m_Names;
- delete[] m_Ids;
- }
-
-float DistFunc::GetDist(unsigned uIndex1, unsigned uIndex2) const
- {
- return m_Dists[VectorIndex(uIndex1, uIndex2)];
- }
-
-unsigned DistFunc::GetCount() const
- {
- return m_uCount;
- }
-
-void DistFunc::SetCount(unsigned uCount)
- {
- m_uCount = uCount;
- if (uCount <= m_uCacheCount)
- return;
- delete[] m_Dists;
- m_Dists = new float[VectorLength()];
- m_Names = new char *[m_uCount];
- m_Ids = new unsigned[m_uCount];
- m_uCacheCount = uCount;
-
- memset(m_Names, 0, m_uCount*sizeof(char *));
- memset(m_Ids, 0xff, m_uCount*sizeof(unsigned));
- memset(m_Dists, 0, VectorLength()*sizeof(float));
- }
-
-void DistFunc::SetDist(unsigned uIndex1, unsigned uIndex2, float dDist)
- {
- m_Dists[VectorIndex(uIndex1, uIndex2)] = dDist;
- m_Dists[VectorIndex(uIndex2, uIndex1)] = dDist;
- }
-
-unsigned DistFunc::VectorIndex(unsigned uIndex1, unsigned uIndex2) const
- {
- assert(uIndex1 < m_uCount && uIndex2 < m_uCount);
- return uIndex1*m_uCount + uIndex2;
- }
-
-unsigned DistFunc::VectorLength() const
- {
- return m_uCount*m_uCount;
- }
-
-void DistFunc::SetName(unsigned uIndex, const char szName[])
- {
- assert(uIndex < m_uCount);
- m_Names[uIndex] = strsave(szName);
- }
-
-void DistFunc::SetId(unsigned uIndex, unsigned uId)
- {
- assert(uIndex < m_uCount);
- m_Ids[uIndex] = uId;
- }
-
-const char *DistFunc::GetName(unsigned uIndex) const
- {
- assert(uIndex < m_uCount);
- return m_Names[uIndex];
- }
-
-unsigned DistFunc::GetId(unsigned uIndex) const
- {
- assert(uIndex < m_uCount);
- return m_Ids[uIndex];
- }
-
-void DistFunc::LogMe() const
- {
- Log("DistFunc::LogMe count=%u\n", m_uCount);
- Log(" ");
- for (unsigned i = 0; i < m_uCount; ++i)
- Log(" %7u", i);
- Log("\n");
-
- Log(" ");
- for (unsigned i = 0; i < m_uCount; ++i)
- Log(" %7.7s", m_Names[i] ? m_Names[i] : "");
- Log("\n");
-
- for (unsigned i = 0; i < m_uCount; ++i)
- {
- Log("%4u %10.10s : ", i, m_Names[i] ? m_Names[i] : "");
- for (unsigned j = 0; j <= i; ++j)
- Log(" %7.4g", GetDist(i, j));
- Log("\n");
- }
- }
Deleted: trunk/packages/muscle/trunk/distfunc.h
===================================================================
--- trunk/packages/muscle/trunk/distfunc.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/distfunc.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,36 +0,0 @@
-#ifndef DistFunc_h
-#define DistFunc_h
-
-class DistFunc
- {
-public:
- DistFunc();
- virtual ~DistFunc();
-
-public:
- virtual void SetCount(unsigned uCount);
- virtual void SetDist(unsigned uIndex1, unsigned uIndex2, float dDist);
-
- void SetName(unsigned uIndex, const char szName[]);
- void SetId(unsigned uIndex, unsigned uId);
- const char *GetName(unsigned uIndex) const;
- unsigned GetId(unsigned uIndex) const;
-
- virtual float GetDist(unsigned uIndex1, unsigned uIndex2) const;
- virtual unsigned GetCount() const;
-
- void LogMe() const;
-
-protected:
- unsigned VectorIndex(unsigned uIndex, unsigned uIndex2) const;
- unsigned VectorLength() const;
-
-private:
- unsigned m_uCount;
- unsigned m_uCacheCount;
- float *m_Dists;
- char **m_Names;
- unsigned *m_Ids;
- };
-
-#endif // DistFunc_h
Deleted: trunk/packages/muscle/trunk/distpwkimura.cpp
===================================================================
--- trunk/packages/muscle/trunk/distpwkimura.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/distpwkimura.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,45 +0,0 @@
-#include "muscle.h"
-#include "distfunc.h"
-#include "msa.h"
-#include "seqvect.h"
-#include "pwpath.h"
-
-void DistPWKimura(const SeqVect &v, DistFunc &DF)
- {
- SEQWEIGHT SeqWeightSave = GetSeqWeightMethod();
- SetSeqWeightMethod(SEQWEIGHT_Henikoff);
-
- const unsigned uSeqCount = v.Length();
- DF.SetCount(uSeqCount);
-
- const unsigned uPairCount = (uSeqCount*(uSeqCount + 1))/2;
- unsigned uCount = 0;
- SetProgressDesc("PWKimura distance");
- for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
- {
- const Seq &s1 = v.GetSeq(uSeqIndex1);
- MSA msa1;
- msa1.FromSeq(s1);
- for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)
- {
- if (0 == uCount%20)
- Progress(uCount, uPairCount);
- ++uCount;
- const Seq &s2 = v.GetSeq(uSeqIndex2);
- MSA msa2;
- msa2.FromSeq(s2);
-
- PWPath Path;
- MSA msaOut;
- AlignTwoMSAs(msa1, msa2, msaOut, Path, false, false);
-
- double dPctId = msaOut.GetPctIdentityPair(0, 1);
- float f = (float) KimuraDist(dPctId);
-
- DF.SetDist(uSeqIndex1, uSeqIndex2, f);
- }
- }
- ProgressStepsDone();
-
- SetSeqWeightMethod(SeqWeightSave);
- }
Deleted: trunk/packages/muscle/trunk/domuscle.cpp
===================================================================
--- trunk/packages/muscle/trunk/domuscle.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/domuscle.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,299 +0,0 @@
-#include "muscle.h"
-#include "textfile.h"
-#include "seqvect.h"
-#include "distfunc.h"
-#include "msa.h"
-#include "tree.h"
-#include "profile.h"
-#include "timing.h"
-
-static char g_strUseTreeWarning[] =
-"\n******** WARNING ****************\n"
-"\nYou specified the -usetree option.\n"
-"Note that a good evolutionary tree may NOT be a good\n"
-"guide tree for multiple alignment. For more details,\n"
-"please refer to the user guide. To disable this\n"
-"warning, use -usetree_nowarn <treefilename>.\n\n";
-
-void DoMuscle()
- {
- SetOutputFileName(g_pstrOutFileName);
- SetInputFileName(g_pstrInFileName);
-
- SetMaxIters(g_uMaxIters);
- SetSeqWeightMethod(g_SeqWeight1);
-
- TextFile fileIn(g_pstrInFileName);
- SeqVect v;
- v.FromFASTAFile(fileIn);
- const unsigned uSeqCount = v.Length();
-
- if (0 == uSeqCount)
- Quit("No sequences in input file");
-
- ALPHA Alpha = ALPHA_Undefined;
- switch (g_SeqType)
- {
- case SEQTYPE_Auto:
- Alpha = v.GuessAlpha();
- break;
-
- case SEQTYPE_Protein:
- Alpha = ALPHA_Amino;
- break;
-
- case SEQTYPE_DNA:
- Alpha = ALPHA_DNA;
- break;
-
- case SEQTYPE_RNA:
- Alpha = ALPHA_RNA;
- break;
-
- default:
- Quit("Invalid seq type");
- }
- SetAlpha(Alpha);
- v.FixAlpha();
-
- PTR_SCOREMATRIX UserMatrix = 0;
- if (0 != g_pstrMatrixFileName)
- {
- const char *FileName = g_pstrMatrixFileName;
- const char *Path = getenv("MUSCLE_MXPATH");
- if (Path != 0)
- {
- size_t n = strlen(Path) + 1 + strlen(FileName) + 1;
- char *NewFileName = new char[n];
- sprintf(NewFileName, "%s/%s", Path, FileName);
- FileName = NewFileName;
- }
- TextFile File(FileName);
- UserMatrix = ReadMx(File);
- g_Alpha = ALPHA_Amino;
- g_PPScore = PPSCORE_SP;
- }
-
- SetPPScore();
-
- if (0 != UserMatrix)
- g_ptrScoreMatrix = UserMatrix;
-
- unsigned uMaxL = 0;
- unsigned uTotL = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- unsigned L = v.GetSeq(uSeqIndex).Length();
- uTotL += L;
- if (L > uMaxL)
- uMaxL = L;
- }
-
- SetIter(1);
- g_bDiags = g_bDiags1;
- SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);
-
- SetMuscleSeqVect(v);
-
- MSA::SetIdCount(uSeqCount);
-
-// Initialize sequence ids.
-// From this point on, ids must somehow propogate from here.
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- v.SetSeqId(uSeqIndex, uSeqIndex);
-
- if (0 == uSeqCount)
- Quit("Input file '%s' has no sequences", g_pstrInFileName);
- if (1 == uSeqCount)
- {
- TextFile fileOut(g_pstrOutFileName, true);
- v.ToFile(fileOut);
- return;
- }
-
- if (uSeqCount > 1)
- MHackStart(v);
-
-// First iteration
- Tree GuideTree;
- if (0 != g_pstrUseTreeFileName)
- {
- // Discourage users...
- if (!g_bUseTreeNoWarn)
- fprintf(stderr, g_strUseTreeWarning);
-
- // Read tree from file
- TextFile TreeFile(g_pstrUseTreeFileName);
- GuideTree.FromFile(TreeFile);
-
- // Make sure tree is rooted
- if (!GuideTree.IsRooted())
- Quit("User tree must be rooted");
-
- if (GuideTree.GetLeafCount() != uSeqCount)
- Quit("User tree does not match input sequences");
-
- const unsigned uNodeCount = GuideTree.GetNodeCount();
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- {
- if (!GuideTree.IsLeaf(uNodeIndex))
- continue;
- const char *LeafName = GuideTree.GetLeafName(uNodeIndex);
- unsigned uSeqIndex;
- bool SeqFound = v.FindName(LeafName, &uSeqIndex);
- if (!SeqFound)
- Quit("Label %s in tree does not match sequences", LeafName);
- }
-
- // Set ids
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- const char *SeqName = v.GetSeqName(uSeqIndex);
- unsigned uLeafIndex = GuideTree.GetLeafNodeIndex(SeqName);
- GuideTree.SetLeafId(uLeafIndex, uSeqIndex);
- }
- }
- else
- TreeFromSeqVect(v, GuideTree, g_Cluster1, g_Distance1, g_Root1);
-
- const char *Tree1 = ValueOpt("Tree1");
- if (0 != Tree1)
- {
- TextFile f(Tree1, true);
- GuideTree.ToFile(f);
- if (g_bCluster)
- return;
- }
-
- SetMuscleTree(GuideTree);
- ValidateMuscleIds(GuideTree);
-
- MSA msa;
- ProgNode *ProgNodes = 0;
- if (g_bLow)
- ProgNodes = ProgressiveAlignE(v, GuideTree, msa);
- else
- ProgressiveAlign(v, GuideTree, msa);
- SetCurrentAlignment(msa);
-
- if (0 != g_pstrComputeWeightsFileName)
- {
- extern void OutWeights(const char *FileName, const MSA &msa);
- SetMSAWeightsMuscle(msa);
- OutWeights(g_pstrComputeWeightsFileName, msa);
- return;
- }
-
- ValidateMuscleIds(msa);
-
- if (1 == g_uMaxIters || 2 == uSeqCount)
- {
- //TextFile fileOut(g_pstrOutFileName, true);
- //MHackEnd(msa);
- //msa.ToFile(fileOut);
- MuscleOutput(msa);
- return;
- }
-
- if (0 == g_pstrUseTreeFileName)
- {
- g_bDiags = g_bDiags2;
- SetIter(2);
-
- if (g_bLow)
- {
- if (0 != g_uMaxTreeRefineIters)
- RefineTreeE(msa, v, GuideTree, ProgNodes);
- }
- else
- RefineTree(msa, GuideTree);
-
- const char *Tree2 = ValueOpt("Tree2");
- if (0 != Tree2)
- {
- TextFile f(Tree2, true);
- GuideTree.ToFile(f);
- }
- }
-
- SetSeqWeightMethod(g_SeqWeight2);
- SetMuscleTree(GuideTree);
-
- if (g_bAnchors)
- RefineVert(msa, GuideTree, g_uMaxIters - 2);
- else
- RefineHoriz(msa, GuideTree, g_uMaxIters - 2, false, false);
-
-#if 0
-// Refining by subfamilies is disabled as it didn't give better
-// results. I tried doing this before and after RefineHoriz.
-// Should get back to this as it seems like this should work.
- RefineSubfams(msa, GuideTree, g_uMaxIters - 2);
-#endif
-
- ValidateMuscleIds(msa);
- ValidateMuscleIds(GuideTree);
-
- //TextFile fileOut(g_pstrOutFileName, true);
- //MHackEnd(msa);
- //msa.ToFile(fileOut);
- MuscleOutput(msa);
- }
-
-void Run()
- {
- SetStartTime();
- Log("Started %s\n", GetTimeAsStr());
- for (int i = 0; i < g_argc; ++i)
- Log("%s ", g_argv[i]);
- Log("\n");
-
-#if TIMING
- TICKS t1 = GetClockTicks();
-#endif
- if (g_bRefine)
- Refine();
- else if (g_bRefineW)
- {
- extern void DoRefineW();
- DoRefineW();
- }
- else if (g_bProfDB)
- ProfDB();
- else if (g_bSW)
- Local();
- else if (0 != g_pstrSPFileName)
- DoSP();
- else if (g_bProfile)
- Profile();
- else if (g_bPPScore)
- PPScore();
- else if (g_bPAS)
- ProgAlignSubFams();
- else
- DoMuscle();
-
-#if TIMING
- extern TICKS g_ticksDP;
- extern TICKS g_ticksObjScore;
- TICKS t2 = GetClockTicks();
- TICKS TotalTicks = t2 - t1;
- TICKS ticksOther = TotalTicks - g_ticksDP - g_ticksObjScore;
- double dSecs = TicksToSecs(TotalTicks);
- double PctDP = (double) g_ticksDP*100.0/(double) TotalTicks;
- double PctOS = (double) g_ticksObjScore*100.0/(double) TotalTicks;
- double PctOther = (double) ticksOther*100.0/(double) TotalTicks;
- Log(" Ticks Secs Pct\n");
- Log(" ============ ======= =====\n");
- Log("DP %12ld %7.2f %5.1f%%\n",
- (long) g_ticksDP, TicksToSecs(g_ticksDP), PctDP);
- Log("OS %12ld %7.2f %5.1f%%\n",
- (long) g_ticksObjScore, TicksToSecs(g_ticksObjScore), PctOS);
- Log("Other %12ld %7.2f %5.1f%%\n",
- (long) ticksOther, TicksToSecs(ticksOther), PctOther);
- Log("Total %12ld %7.2f 100.0%%\n", (long) TotalTicks, dSecs);
-#endif
-
- ListDiagSavings();
- Log("Finished %s\n", GetTimeAsStr());
- }
Deleted: trunk/packages/muscle/trunk/dosp.cpp
===================================================================
--- trunk/packages/muscle/trunk/dosp.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/dosp.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,60 +0,0 @@
-#include "muscle.h"
-#include "textfile.h"
-#include "msa.h"
-#include "objscore.h"
-#include "tree.h"
-#include "profile.h"
-
-void DoSP()
- {
- TextFile f(g_pstrSPFileName);
-
- MSA a;
- a.FromFile(f);
-
- ALPHA Alpha = ALPHA_Undefined;
- switch (g_SeqType)
- {
- case SEQTYPE_Auto:
- Alpha = a.GuessAlpha();
- break;
-
- case SEQTYPE_Protein:
- Alpha = ALPHA_Amino;
- break;
-
- case SEQTYPE_DNA:
- Alpha = ALPHA_DNA;
- break;
-
- case SEQTYPE_RNA:
- Alpha = ALPHA_RNA;
- break;
-
- default:
- Quit("Invalid SeqType");
- }
- SetAlpha(Alpha);
- a.FixAlpha();
-
- SetPPScore();
-
- const unsigned uSeqCount = a.GetSeqCount();
- if (0 == uSeqCount)
- Quit("No sequences in input file %s", g_pstrSPFileName);
-
- MSA::SetIdCount(uSeqCount);
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- a.SetSeqId(uSeqIndex, uSeqIndex);
-
- SetSeqWeightMethod(g_SeqWeight1);
- Tree tree;
- TreeFromMSA(a, tree, g_Cluster2, g_Distance2, g_Root2);
- SetMuscleTree(tree);
- SetMSAWeightsMuscle((MSA &) a);
-
- SCORE SP = ObjScoreSP(a);
-
- Log("File=%s;SP=%.4g\n", g_pstrSPFileName, SP);
- fprintf(stderr, "File=%s;SP=%.4g\n", g_pstrSPFileName, SP);
- }
Deleted: trunk/packages/muscle/trunk/dpregionlist.h
===================================================================
--- trunk/packages/muscle/trunk/dpregionlist.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/dpregionlist.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,73 +0,0 @@
-#ifndef DPRegionList_h
-#define DPRegionList_h
-
-#include "diaglist.h"
-
-enum DPREGIONTYPE
- {
- DPREGIONTYPE_Unknown,
- DPREGIONTYPE_Diag,
- DPREGIONTYPE_Rect
- };
-
-struct DPRegion
- {
- DPREGIONTYPE m_Type;
- union
- {
- Diag m_Diag;
- Rect m_Rect;
- };
- };
-
-const unsigned MAX_DPREGIONS = 1024;
-
-class DPRegionList
- {
-public:
- DPRegionList()
- {
- m_uCount = 0;
- }
- ~DPRegionList()
- {
- Free();
- }
-
-public:
-// Creation
- void Clear()
- {
- Free();
- }
- void Add(const DPRegion &r);
-
-// Accessors
- unsigned GetCount() const
- {
- return m_uCount;
- }
- const DPRegion &Get(unsigned uIndex) const
- {
- assert(uIndex < m_uCount);
- return m_DPRegions[uIndex];
- }
-
-// Diagnostics
- void LogMe() const;
-
-private:
- void Free()
- {
- m_uCount = 0;
- }
-
-private:
- unsigned m_uCount;
- DPRegion m_DPRegions[MAX_DPREGIONS];
- };
-
-void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
- unsigned uLengthA, unsigned uLengthB);
-
-#endif // DPRegionList_h
Deleted: trunk/packages/muscle/trunk/dpreglist.cpp
===================================================================
--- trunk/packages/muscle/trunk/dpreglist.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/dpreglist.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,108 +0,0 @@
-#include "muscle.h"
-#include "dpreglist.h"
-
-unsigned DPRegionList::GetDPArea() const
- {
- unsigned uArea = 0;
- for (unsigned i = 0; i < m_uCount; ++i)
- {
- const DPRegion &r = m_DPRegions[i];
- if (DPREGIONTYPE_Rect == r.m_Type)
- uArea += r.m_Rect.m_uLengthA*r.m_Rect.m_uLengthB;
- }
- return uArea;
- }
-
-void DPRegionList::Add(const DPRegion &r)
- {
- if (m_uCount == MAX_DPREGIONS)
- Quit("DPRegionList::Add, overflow %d", m_uCount);
- m_DPRegions[m_uCount] = r;
- ++m_uCount;
- }
-
-void DPRegionList::LogMe() const
- {
- Log("DPRegionList::LogMe, count=%u\n", m_uCount);
- Log("Region Type StartA StartB EndA EndB\n");
- Log("------ ---- ------ ------ ---- ----\n");
- for (unsigned i = 0; i < m_uCount; ++i)
- {
- const DPRegion &r = m_DPRegions[i];
- Log("%6u ", i);
- if (DPREGIONTYPE_Diag == r.m_Type)
- Log("Diag %6u %6u %6u %6u\n",
- r.m_Diag.m_uStartPosA,
- r.m_Diag.m_uStartPosB,
- r.m_Diag.m_uStartPosA + r.m_Diag.m_uLength - 1,
- r.m_Diag.m_uStartPosB + r.m_Diag.m_uLength - 1);
- else if (DPREGIONTYPE_Rect == r.m_Type)
- Log("Rect %6u %6u %6u %6u\n",
- r.m_Rect.m_uStartPosA,
- r.m_Rect.m_uStartPosB,
- r.m_Rect.m_uStartPosA + r.m_Rect.m_uLengthA - 1,
- r.m_Rect.m_uStartPosB + r.m_Rect.m_uLengthB - 1);
- else
- Log(" *** ERROR *** Type=%u\n", r.m_Type);
- }
- }
-
-void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
- unsigned uLengthA, unsigned uLengthB)
- {
- if (g_uDiagMargin > g_uMinDiagLength/2)
- Quit("Invalid parameters, diagmargin=%d must be <= 2*diaglength=%d",
- g_uDiagMargin, g_uMinDiagLength);
-
- unsigned uStartPosA = 0;
- unsigned uStartPosB = 0;
- const unsigned uDiagCount = DL.GetCount();
- DPRegion r;
- for (unsigned uDiagIndex = 0; uDiagIndex < uDiagCount; ++uDiagIndex)
- {
- const Diag &d = DL.Get(uDiagIndex);
- assert(d.m_uLength >= g_uMinDiagLength);
- const unsigned uStartVertexA = d.m_uStartPosA + g_uDiagMargin - 1;
- const unsigned uStartVertexB = d.m_uStartPosB + g_uDiagMargin - 1;
- const unsigned uEndVertexA = d.m_uStartPosA + d.m_uLength - g_uDiagMargin;
- const unsigned uEndVertexB = d.m_uStartPosB + d.m_uLength - g_uDiagMargin;
-
- r.m_Type = DPREGIONTYPE_Rect;
- r.m_Rect.m_uStartPosA = uStartPosA;
- r.m_Rect.m_uStartPosB = uStartPosB;
-
- assert(uStartVertexA + 1 >= uStartPosA);
- assert(uStartVertexB + 1 >= uStartPosB);
- r.m_Rect.m_uLengthA = uStartVertexA + 1 - uStartPosA;
- r.m_Rect.m_uLengthB = uStartVertexB + 1 - uStartPosB;
- RL.Add(r);
-
- if (uEndVertexA > uStartVertexA + 1)
- {
- const unsigned uDiagLengthMinusCaps = uEndVertexA - uStartVertexA - 1;
-
- r.m_Type = DPREGIONTYPE_Diag;
- r.m_Diag.m_uStartPosA = uStartVertexA + 1;
- r.m_Diag.m_uStartPosB = uStartVertexB + 1;
- assert(uEndVertexA - uStartVertexA == uEndVertexB - uStartVertexB);
- r.m_Diag.m_uLength = uEndVertexA - uStartVertexA - 1;
- RL.Add(r);
- }
-
- uStartPosA = uEndVertexA;
- uStartPosB = uEndVertexB;
- }
-
- assert((int) uLengthA - (int) uStartPosA >= (int) g_uDiagMargin);
- assert((int) uLengthB - (int) uStartPosB >= (int) g_uDiagMargin);
-
- r.m_Type = DPREGIONTYPE_Rect;
- r.m_Rect.m_uStartPosA = uStartPosA;
- r.m_Rect.m_uStartPosB = uStartPosB;
-
- assert(uLengthA >= uStartPosA);
- assert(uLengthB >= uStartPosB);
- r.m_Rect.m_uLengthA = uLengthA - uStartPosA;
- r.m_Rect.m_uLengthB = uLengthB - uStartPosB;
- RL.Add(r);
- }
Deleted: trunk/packages/muscle/trunk/dpreglist.h
===================================================================
--- trunk/packages/muscle/trunk/dpreglist.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/dpreglist.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,76 +0,0 @@
-#ifndef dpreglist_h
-#define dpreglist_h
-
-#include "diaglist.h"
-
-enum DPREGIONTYPE
- {
- DPREGIONTYPE_Unknown,
- DPREGIONTYPE_Diag,
- DPREGIONTYPE_Rect
- };
-
-struct DPRegion
- {
- DPREGIONTYPE m_Type;
- union
- {
- Diag m_Diag;
- Rect m_Rect;
- };
- };
-
-const unsigned MAX_DPREGIONS = 1024;
-
-class DPRegionList
- {
-public:
- DPRegionList()
- {
- m_uCount = 0;
- }
- ~DPRegionList()
- {
- Free();
- }
-
-public:
-// Creation
- void Clear()
- {
- Free();
- }
- void Add(const DPRegion &r);
-
-// Accessors
- unsigned GetCount() const
- {
- return m_uCount;
- }
-
- const DPRegion &Get(unsigned uIndex) const
- {
- assert(uIndex < m_uCount);
- return m_DPRegions[uIndex];
- }
-
- unsigned GetDPArea() const;
-
-// Diagnostics
- void LogMe() const;
-
-private:
- void Free()
- {
- m_uCount = 0;
- }
-
-private:
- unsigned m_uCount;
- DPRegion m_DPRegions[MAX_DPREGIONS];
- };
-
-void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
- unsigned uLengthA, unsigned uLengthB);
-
-#endif // dpreglist_h
Deleted: trunk/packages/muscle/trunk/drawtree.cpp
===================================================================
--- trunk/packages/muscle/trunk/drawtree.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/drawtree.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,41 +0,0 @@
-#include "muscle.h"
-#include "tree.h"
-
-/***
-Simple tree drawing algorithm.
-
-y coordinate of node is index in depth-first traversal.
-x coordinate is distance from root.
-***/
-
-static unsigned DistFromRoot(const Tree &tree, unsigned uNodeIndex)
- {
- const unsigned uRoot = tree.GetRootNodeIndex();
- unsigned uDist = 0;
- while (uNodeIndex != uRoot)
- {
- ++uDist;
- uNodeIndex = tree.GetParent(uNodeIndex);
- }
- return uDist;
- }
-
-static void DrawNode(const Tree &tree, unsigned uNodeIndex)
- {
- if (!tree.IsLeaf(uNodeIndex))
- DrawNode(tree, tree.GetLeft(uNodeIndex));
-
- unsigned uDist = DistFromRoot(tree, uNodeIndex);
- for (unsigned i = 0; i < 5*uDist; ++i)
- Log(" ");
- Log("%d\n", uNodeIndex);
-
- if (!tree.IsLeaf(uNodeIndex))
- DrawNode(tree, tree.GetRight(uNodeIndex));
- }
-
-void DrawTree(const Tree &tree)
- {
- unsigned uRoot = tree.GetRootNodeIndex();
- DrawNode(tree, uRoot);
- }
Deleted: trunk/packages/muscle/trunk/edgelist.cpp
===================================================================
--- trunk/packages/muscle/trunk/edgelist.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/edgelist.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,88 +0,0 @@
-#include "muscle.h"
-#include "edgelist.h"
-
-EdgeList::EdgeList()
- {
- m_uNode1 = 0;
- m_uNode2 = 0;
- m_uCount = 0;
- m_uCacheSize = 0;
- }
-
-EdgeList::~EdgeList()
- {
- Clear();
- }
-
-void EdgeList::Clear()
- {
- delete[] m_uNode1;
- delete[] m_uNode2;
- m_uNode1 = 0;
- m_uNode2 = 0;
- m_uCount = 0;
- m_uCacheSize = 0;
- }
-
-void EdgeList::Add(unsigned uNode1, unsigned uNode2)
- {
- if (m_uCount <= m_uCacheSize)
- Expand();
- m_uNode1[m_uCount] = uNode1;
- m_uNode2[m_uCount] = uNode2;
- ++m_uCount;
- }
-
-unsigned EdgeList::GetCount() const
- {
- return m_uCount;
- }
-
-void EdgeList::GetEdge(unsigned uIndex, unsigned *ptruNode1, unsigned *ptruNode2) const
- {
- if (uIndex > m_uCount)
- Quit("EdgeList::GetEdge(%u) count=%u", uIndex, m_uCount);
- *ptruNode1 = m_uNode1[uIndex];
- *ptruNode2 = m_uNode2[uIndex];
- }
-
-void EdgeList::Copy(const EdgeList &rhs)
- {
- Clear();
- const unsigned uCount = rhs.GetCount();
- for (unsigned n = 0; n < uCount; ++n)
- {
- unsigned uNode1;
- unsigned uNode2;
- rhs.GetEdge(n, &uNode1, &uNode2);
- Add(uNode1, uNode2);
- }
- }
-
-void EdgeList::Expand()
- {
- unsigned uNewCacheSize = m_uCacheSize + 512;
- unsigned *NewNode1 = new unsigned[uNewCacheSize];
- unsigned *NewNode2 = new unsigned[uNewCacheSize];
- if (m_uCount > 0)
- {
- memcpy(NewNode1, m_uNode1, m_uCount*sizeof(unsigned));
- memcpy(NewNode2, m_uNode2, m_uCount*sizeof(unsigned));
- }
- delete[] m_uNode1;
- delete[] m_uNode2;
- m_uNode1 = NewNode1;
- m_uNode2 = NewNode2;
- m_uCacheSize = uNewCacheSize;
- }
-
-void EdgeList::LogMe() const
- {
- for (unsigned n = 0; n < m_uCount; ++n)
- {
- if (n > 0)
- Log(" ");
- Log("%u->%u", m_uNode1[n], m_uNode2[n]);
- }
- Log("\n");
- }
Deleted: trunk/packages/muscle/trunk/edgelist.h
===================================================================
--- trunk/packages/muscle/trunk/edgelist.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/edgelist.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,28 +0,0 @@
-#ifndef EdgeList_h
-#define EdgeList_h
-
-class EdgeList
- {
-public:
- EdgeList();
- virtual ~EdgeList();
-
-public:
- void Clear();
- void Add(unsigned uNode1, unsigned uNode2);
- unsigned GetCount() const;
- void GetEdge(unsigned uIndex, unsigned *ptruNode1, unsigned *ptruNode2) const;
- void Copy(const EdgeList &rhs);
- void LogMe() const;
-
-private:
- void Expand();
-
-private:
- unsigned m_uCount;
- unsigned m_uCacheSize;
- unsigned *m_uNode1;
- unsigned *m_uNode2;
- };
-
-#endif // EdgeList_h
Deleted: trunk/packages/muscle/trunk/enumopts.cpp
===================================================================
--- trunk/packages/muscle/trunk/enumopts.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/enumopts.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,8 +0,0 @@
-#include "muscle.h"
-#include "enumopts.h"
-
-#define s(t) EnumOpt t##_Opts[] = {
-#define c(t, x) #x, t##_##x,
-#define e(t) 0, 0 };
-
-#include "enums.h"
Deleted: trunk/packages/muscle/trunk/enumopts.h
===================================================================
--- trunk/packages/muscle/trunk/enumopts.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/enumopts.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,16 +0,0 @@
-#ifndef enumopts_h
-#define enumopts_h
-
-struct EnumOpt
- {
- const char *pstrOpt;
- int iValue;
- };
-
-#define s(t) extern EnumOpt t##_Opts[];
-#define c(t, x) /* empty */
-#define e(t) /* empty */
-#include "enums.h"
-
-
-#endif // enumopts_h
Deleted: trunk/packages/muscle/trunk/enums.h
===================================================================
--- trunk/packages/muscle/trunk/enums.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/enums.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,95 +0,0 @@
-// enums.h
-// Define enum types.
-// Exploit macro hacks to avoid lots of repetetive typing.
-// Generally I am opposed to macro hacks because of the
-// highly obscure code that results, but in this case it
-// makes maintenance much easier and less error-prone.
-// The idea is that this file can be included in different
-// places with different definitions of s (Start), c (Case)
-// and e (End). See types.h.
-
-s(ALPHA)
-c(ALPHA, Amino)
-c(ALPHA, DNA)
-c(ALPHA, RNA)
-e(ALPHA)
-
-s(SEQTYPE)
-c(SEQTYPE, Protein)
-c(SEQTYPE, DNA)
-c(SEQTYPE, RNA)
-c(SEQTYPE, Auto)
-e(SEQTYPE)
-
-s(ROOT)
-c(ROOT, Pseudo)
-c(ROOT, MidLongestSpan)
-c(ROOT, MinAvgLeafDist)
-e(ROOT)
-
-s(CLUSTER)
-c(CLUSTER, UPGMA)
-c(CLUSTER, UPGMAMax)
-c(CLUSTER, UPGMAMin)
-c(CLUSTER, UPGMB)
-c(CLUSTER, NeighborJoining)
-e(CLUSTER)
-
-s(JOIN)
-c(JOIN, NearestNeighbor)
-c(JOIN, NeighborJoining)
-e(JOIN)
-
-s(LINKAGE)
-c(LINKAGE, Min)
-c(LINKAGE, Avg)
-c(LINKAGE, Max)
-c(LINKAGE, NeighborJoining)
-c(LINKAGE, Biased)
-e(LINKAGE)
-
-s(DISTANCE)
-c(DISTANCE, Kmer6_6)
-c(DISTANCE, Kmer20_3)
-c(DISTANCE, Kmer20_4)
-c(DISTANCE, Kbit20_3)
-c(DISTANCE, Kmer4_6)
-c(DISTANCE, PctIdKimura)
-c(DISTANCE, PctIdLog)
-c(DISTANCE, PWKimura)
-e(DISTANCE)
-
-s(PPSCORE)
-c(PPSCORE, LE)
-c(PPSCORE, SP)
-c(PPSCORE, SV)
-c(PPSCORE, SPN)
-e(PPSCORE)
-
-s(SEQWEIGHT)
-c(SEQWEIGHT, None)
-c(SEQWEIGHT, Henikoff)
-c(SEQWEIGHT, HenikoffPB)
-c(SEQWEIGHT, GSC)
-c(SEQWEIGHT, ClustalW)
-c(SEQWEIGHT, ThreeWay)
-e(SEQWEIGHT)
-
-s(OBJSCORE)
-c(OBJSCORE, SP) // Sum of Pairs of sequences
-c(OBJSCORE, DP) // Dynamic Programming score
-c(OBJSCORE, XP) // Cross Pairs = sum of pairs between two MSAs
-c(OBJSCORE, PS) // sum of Prof-Seq score for all seqs in MSA
-c(OBJSCORE, SPF) // sum of pairs, fast approximation
-c(OBJSCORE, SPM) // sp if <= 100 seqs, spf otherwise
-e(OBJSCORE)
-
-s(TERMGAPS)
-c(TERMGAPS, Full)
-c(TERMGAPS, Half)
-c(TERMGAPS, Ext)
-e(TERMGAPS)
-
-#undef s
-#undef c
-#undef e
Deleted: trunk/packages/muscle/trunk/enumtostr.cpp
===================================================================
--- trunk/packages/muscle/trunk/enumtostr.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/enumtostr.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,16 +0,0 @@
-#include "muscle.h"
-#include <stdio.h>
-
-static char szMsg[64];
-
-// Define XXXToStr(XXX x) functions for each enum type XXX.
-#define s(t) const char *t##ToStr(t x) { switch (x) { case t##_Undefined: return "Undefined";
-#define c(t, x) case t##_##x: return #x;
-#define e(t) } sprintf(szMsg, #t "_%d", x); return szMsg; }
-#include "enums.h"
-
-// Define StrToXXX(const char *Str) functions for each enum type XXX.
-#define s(t) t StrTo##t(const char *Str) { if (0) ;
-#define c(t, x) else if (0 == stricmp(#x, Str)) return t##_##x;
-#define e(t) Quit("Invalid value %s for type %s", Str, #t); return t##_Undefined; }
-#include "enums.h"
Deleted: trunk/packages/muscle/trunk/estring.cpp
===================================================================
--- trunk/packages/muscle/trunk/estring.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/estring.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,689 +0,0 @@
-#include "muscle.h"
-#include "pwpath.h"
-#include "estring.h"
-#include "seq.h"
-#include "msa.h"
-
-/***
-An "estring" is an edit string that operates on a sequence.
-An estring is represented as a vector of integers.
-It is interpreted in order of increasing suffix.
-A positive value n means copy n letters.
-A negative value -n means insert n indels.
-Zero marks the end of the vector.
-Consecutive entries must have opposite sign, i.e. the
-shortest possible representation must be used.
-
-A "tpair" is a traceback path for a pairwise alignment
-represented as two estrings, one for each sequence.
-***/
-
-#define c2(c,d) (((unsigned char) c) << 8 | (unsigned char) d)
-
-unsigned LengthEstring(const short es[])
- {
- unsigned i = 0;
- while (*es++ != 0)
- ++i;
- return i;
- }
-
-short *EstringNewCopy(const short es[])
- {
- unsigned n = LengthEstring(es) + 1;
- short *esNew = new short[n];
- memcpy(esNew, es, n*sizeof(short));
- return esNew;
- }
-
-void LogEstring(const short es[])
- {
- Log("<");
- for (unsigned i = 0; es[i] != 0; ++i)
- {
- if (i > 0)
- Log(" ");
- Log("%d", es[i]);
- }
- Log(">");
- }
-
-static bool EstringsEq(const short es1[], const short es2[])
- {
- for (;;)
- {
- if (*es1 != *es2)
- return false;
- if (0 == *es1)
- break;
- ++es1;
- ++es2;
- }
- return true;
- }
-
-static void EstringCounts(const short es[], unsigned *ptruSymbols,
- unsigned *ptruIndels)
- {
- unsigned uSymbols = 0;
- unsigned uIndels = 0;
- for (unsigned i = 0; es[i] != 0; ++i)
- {
- short n = es[i];
- if (n > 0)
- uSymbols += n;
- else if (n < 0)
- uIndels += -n;
- }
- *ptruSymbols = uSymbols;
- *ptruIndels = uIndels;
- }
-
-static char *EstringOp(const short es[], const char s[])
- {
- unsigned uSymbols;
- unsigned uIndels;
- EstringCounts(es, &uSymbols, &uIndels);
- assert((unsigned) strlen(s) == uSymbols);
- char *sout = new char[uSymbols + uIndels + 1];
- char *psout = sout;
- for (;;)
- {
- int n = *es++;
- if (0 == n)
- break;
- if (n > 0)
- for (int i = 0; i < n; ++i)
- *psout++ = *s++;
- else
- for (int i = 0; i < -n; ++i)
- *psout++ = '-';
- }
- assert(0 == *s);
- *psout = 0;
- return sout;
- }
-
-void EstringOp(const short es[], const Seq &sIn, Seq &sOut)
- {
-#if DEBUG
- unsigned uSymbols;
- unsigned uIndels;
- EstringCounts(es, &uSymbols, &uIndels);
- assert(sIn.Length() == uSymbols);
-#endif
- sOut.Clear();
- sOut.SetName(sIn.GetName());
- int p = 0;
- for (;;)
- {
- int n = *es++;
- if (0 == n)
- break;
- if (n > 0)
- for (int i = 0; i < n; ++i)
- {
- char c = sIn[p++];
- sOut.push_back(c);
- }
- else
- for (int i = 0; i < -n; ++i)
- sOut.push_back('-');
- }
- }
-
-unsigned EstringOp(const short es[], const Seq &sIn, MSA &a)
- {
- unsigned uSymbols;
- unsigned uIndels;
- EstringCounts(es, &uSymbols, &uIndels);
- assert(sIn.Length() == uSymbols);
-
- unsigned uColCount = uSymbols + uIndels;
-
- a.Clear();
- a.SetSize(1, uColCount);
-
- a.SetSeqName(0, sIn.GetName());
- a.SetSeqId(0, sIn.GetId());
-
- unsigned p = 0;
- unsigned uColIndex = 0;
- for (;;)
- {
- int n = *es++;
- if (0 == n)
- break;
- if (n > 0)
- for (int i = 0; i < n; ++i)
- {
- char c = sIn[p++];
- a.SetChar(0, uColIndex++, c);
- }
- else
- for (int i = 0; i < -n; ++i)
- a.SetChar(0, uColIndex++, '-');
- }
- assert(uColIndex == uColCount);
- return uColCount;
- }
-
-void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB)
- {
-// First pass to determine size of estrings esA and esB
- const unsigned uEdgeCount = Path.GetEdgeCount();
- if (0 == uEdgeCount)
- {
- short *esA = new short[1];
- short *esB = new short[1];
- esA[0] = 0;
- esB[0] = 0;
- *ptresA = esA;
- *ptresB = esB;
- return;
- }
-
- unsigned iLengthA = 1;
- unsigned iLengthB = 1;
- const char cFirstEdgeType = Path.GetEdge(0).cType;
- char cPrevEdgeType = cFirstEdgeType;
- for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
- char cEdgeType = Edge.cType;
-
- switch (c2(cPrevEdgeType, cEdgeType))
- {
- case c2('M', 'M'):
- case c2('D', 'D'):
- case c2('I', 'I'):
- break;
-
- case c2('D', 'M'):
- case c2('M', 'D'):
- ++iLengthB;
- break;
-
- case c2('I', 'M'):
- case c2('M', 'I'):
- ++iLengthA;
- break;
-
- case c2('I', 'D'):
- case c2('D', 'I'):
- ++iLengthB;
- ++iLengthA;
- break;
-
- default:
- assert(false);
- }
- cPrevEdgeType = cEdgeType;
- }
-
-// Pass2 for seq A
- {
- short *esA = new short[iLengthA+1];
- unsigned iA = 0;
- switch (Path.GetEdge(0).cType)
- {
- case 'M':
- case 'D':
- esA[0] = 1;
- break;
-
- case 'I':
- esA[0] = -1;
- break;
-
- default:
- assert(false);
- }
-
- char cPrevEdgeType = cFirstEdgeType;
- for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
- char cEdgeType = Edge.cType;
-
- switch (c2(cPrevEdgeType, cEdgeType))
- {
- case c2('M', 'M'):
- case c2('D', 'D'):
- case c2('D', 'M'):
- case c2('M', 'D'):
- ++(esA[iA]);
- break;
-
- case c2('I', 'D'):
- case c2('I', 'M'):
- ++iA;
- esA[iA] = 1;
- break;
-
- case c2('M', 'I'):
- case c2('D', 'I'):
- ++iA;
- esA[iA] = -1;
- break;
-
- case c2('I', 'I'):
- --(esA[iA]);
- break;
-
- default:
- assert(false);
- }
-
- cPrevEdgeType = cEdgeType;
- }
- assert(iA == iLengthA - 1);
- esA[iLengthA] = 0;
- *ptresA = esA;
- }
-
- {
-// Pass2 for seq B
- short *esB = new short[iLengthB+1];
- unsigned iB = 0;
- switch (Path.GetEdge(0).cType)
- {
- case 'M':
- case 'I':
- esB[0] = 1;
- break;
-
- case 'D':
- esB[0] = -1;
- break;
-
- default:
- assert(false);
- }
-
- char cPrevEdgeType = cFirstEdgeType;
- for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
- char cEdgeType = Edge.cType;
-
- switch (c2(cPrevEdgeType, cEdgeType))
- {
- case c2('M', 'M'):
- case c2('I', 'I'):
- case c2('I', 'M'):
- case c2('M', 'I'):
- ++(esB[iB]);
- break;
-
- case c2('D', 'I'):
- case c2('D', 'M'):
- ++iB;
- esB[iB] = 1;
- break;
-
- case c2('M', 'D'):
- case c2('I', 'D'):
- ++iB;
- esB[iB] = -1;
- break;
-
- case c2('D', 'D'):
- --(esB[iB]);
- break;
-
- default:
- assert(false);
- }
-
- cPrevEdgeType = cEdgeType;
- }
- assert(iB == iLengthB - 1);
- esB[iLengthB] = 0;
- *ptresB = esB;
- }
-
-#if DEBUG
- {
- const PWEdge &LastEdge = Path.GetEdge(uEdgeCount - 1);
- unsigned uSymbols;
- unsigned uIndels;
- EstringCounts(*ptresA, &uSymbols, &uIndels);
- assert(uSymbols == LastEdge.uPrefixLengthA);
- assert(uSymbols + uIndels == uEdgeCount);
-
- EstringCounts(*ptresB, &uSymbols, &uIndels);
- assert(uSymbols == LastEdge.uPrefixLengthB);
- assert(uSymbols + uIndels == uEdgeCount);
-
- PWPath TmpPath;
- EstringsToPath(*ptresA, *ptresB, TmpPath);
- TmpPath.AssertEqual(Path);
- }
-#endif
- }
-
-void EstringsToPath(const short esA[], const short esB[], PWPath &Path)
- {
- Path.Clear();
- unsigned iA = 0;
- unsigned iB = 0;
- int nA = esA[iA++];
- int nB = esB[iB++];
- unsigned uPrefixLengthA = 0;
- unsigned uPrefixLengthB = 0;
- for (;;)
- {
- char cType;
- if (nA > 0)
- {
- if (nB > 0)
- {
- cType = 'M';
- --nA;
- --nB;
- }
- else if (nB < 0)
- {
- cType = 'D';
- --nA;
- ++nB;
- }
- else
- assert(false);
- }
- else if (nA < 0)
- {
- if (nB > 0)
- {
- cType = 'I';
- ++nA;
- --nB;
- }
- else
- assert(false);
- }
- else
- assert(false);
-
- switch (cType)
- {
- case 'M':
- ++uPrefixLengthA;
- ++uPrefixLengthB;
- break;
- case 'D':
- ++uPrefixLengthA;
- break;
- case 'I':
- ++uPrefixLengthB;
- break;
- }
-
- PWEdge Edge;
- Edge.cType = cType;
- Edge.uPrefixLengthA = uPrefixLengthA;
- Edge.uPrefixLengthB = uPrefixLengthB;
- Path.AppendEdge(Edge);
-
- if (nA == 0)
- {
- if (0 == esA[iA])
- {
- assert(0 == esB[iB]);
- break;
- }
- nA = esA[iA++];
- }
- if (nB == 0)
- nB = esB[iB++];
- }
- }
-
-/***
-Multiply two estrings to make a third estring.
-The product of two estrings e1*e2 is defined to be
-the estring that produces the same result as applying
-e1 then e2. Multiplication is not commutative. In fact,
-the reversed order is undefined unless both estrings
-consist of a single, identical, positive entry.
-A primary motivation for using estrings is that
-multiplication is very fast, reducing the time
-needed to construct the root alignment.
-
-Example
-
- <-1,3>(XXX) = -XXX
- <2,-1,2>(-XXX) = -X-XX
-
-Therefore,
-
- <-1,3>*<2,-1,2> = <-1,1,-1,2>
-***/
-
-static bool CanMultiplyEstrings(const short es1[], const short es2[])
- {
- unsigned uSymbols1;
- unsigned uSymbols2;
- unsigned uIndels1;
- unsigned uIndels2;
- EstringCounts(es1, &uSymbols1, &uIndels1);
- EstringCounts(es2, &uSymbols2, &uIndels2);
- return uSymbols1 + uIndels1 == uSymbols2;
- }
-
-static inline void AppendGaps(short esp[], int &ip, int n)
- {
- if (-1 == ip)
- esp[++ip] = n;
- else if (esp[ip] < 0)
- esp[ip] += n;
- else
- esp[++ip] = n;
- }
-
-static inline void AppendSymbols(short esp[], int &ip, int n)
- {
- if (-1 == ip)
- esp[++ip] = n;
- else if (esp[ip] > 0)
- esp[ip] += n;
- else
- esp[++ip] = n;
- }
-
-void MulEstrings(const short es1[], const short es2[], short esp[])
- {
- assert(CanMultiplyEstrings(es1, es2));
-
- unsigned i1 = 0;
- int ip = -1;
- int n1 = es1[i1++];
- for (unsigned i2 = 0; ; ++i2)
- {
- int n2 = es2[i2];
- if (0 == n2)
- break;
- if (n2 > 0)
- {
- for (;;)
- {
- if (n1 < 0)
- {
- if (n2 > -n1)
- {
- AppendGaps(esp, ip, n1);
- n2 += n1;
- n1 = es1[i1++];
- }
- else if (n2 == -n1)
- {
- AppendGaps(esp, ip, n1);
- n1 = es1[i1++];
- break;
- }
- else
- {
- assert(n2 < -n1);
- AppendGaps(esp, ip, -n2);
- n1 += n2;
- break;
- }
- }
- else
- {
- assert(n1 > 0);
- if (n2 > n1)
- {
- AppendSymbols(esp, ip, n1);
- n2 -= n1;
- n1 = es1[i1++];
- }
- else if (n2 == n1)
- {
- AppendSymbols(esp, ip, n1);
- n1 = es1[i1++];
- break;
- }
- else
- {
- assert(n2 < n1);
- AppendSymbols(esp, ip, n2);
- n1 -= n2;
- break;
- }
- }
- }
- }
- else
- {
- assert(n2 < 0);
- AppendGaps(esp, ip, n2);
- }
- }
- esp[++ip] = 0;
-
-#if DEBUG
- {
- int MaxLen = (int) (LengthEstring(es1) + LengthEstring(es2) + 1);
- assert(ip < MaxLen);
- if (ip >= 2)
- for (int i = 0; i < ip - 2; ++i)
- {
- if (!(esp[i] > 0 && esp[i+1] < 0 || esp[i] < 0 && esp[i+1] > 0))
- {
- Log("Bad result of MulEstring: ");
- LogEstring(esp);
- Quit("Assert failed (alternating signs)");
- }
- }
- unsigned uSymbols1;
- unsigned uSymbols2;
- unsigned uSymbolsp;
- unsigned uIndels1;
- unsigned uIndels2;
- unsigned uIndelsp;
- EstringCounts(es1, &uSymbols1, &uIndels1);
- EstringCounts(es2, &uSymbols2, &uIndels2);
- EstringCounts(esp, &uSymbolsp, &uIndelsp);
- if (uSymbols1 + uIndels1 != uSymbols2)
- {
- Log("Bad result of MulEstring: ");
- LogEstring(esp);
- Quit("Assert failed (counts1 %u %u %u)",
- uSymbols1, uIndels1, uSymbols2);
- }
- }
-#endif
- }
-
-static void test(const short es1[], const short es2[], const short esa[])
- {
- unsigned uSymbols1;
- unsigned uSymbols2;
- unsigned uIndels1;
- unsigned uIndels2;
- EstringCounts(es1, &uSymbols1, &uIndels1);
- EstringCounts(es2, &uSymbols2, &uIndels2);
-
- char s[4096];
- memset(s, 'X', sizeof(s));
- s[uSymbols1] = 0;
-
- char *s1 = EstringOp(es1, s);
- char *s12 = EstringOp(es2, s1);
-
- memset(s, 'X', sizeof(s));
- s[uSymbols2] = 0;
- char *s2 = EstringOp(es2, s);
-
- Log("%s * %s = %s\n", s1, s2, s12);
-
- LogEstring(es1);
- Log(" * ");
- LogEstring(es2);
- Log(" = ");
- LogEstring(esa);
- Log("\n");
-
- short esp[4096];
- MulEstrings(es1, es2, esp);
- LogEstring(esp);
- if (!EstringsEq(esp, esa))
- Log(" *ERROR* ");
- Log("\n");
-
- memset(s, 'X', sizeof(s));
- s[uSymbols1] = 0;
- char *sp = EstringOp(esp, s);
- Log("%s\n", sp);
- Log("\n==========\n\n");
- }
-
-void TestEstrings()
- {
- SetListFileName("c:\\tmp\\muscle.log", false);
- //{
- //short es1[] = { -1, 1, -1, 0 };
- //short es2[] = { 1, -1, 2, 0 };
- //short esa[] = { -2, 1, -1, 0 };
- //test(es1, es2, esa);
- //}
- //{
- //short es1[] = { 2, -1, 2, 0 };
- //short es2[] = { 1, -1, 3, -1, 1, 0 };
- //short esa[] = { 1, -1, 1, -1, 1, -1, 1, 0 };
- //test(es1, es2, esa);
- //}
- //{
- //short es1[] = { -1, 3, 0 };
- //short es2[] = { 2, -1, 2, 0 };
- //short esa[] = { -1, 1, -1, 2, 0 };
- //test(es1, es2, esa);
- //}
- //{
- //short es1[] = { -1, 1, -1, 1, 0};
- //short es2[] = { 4, 0 };
- //short esa[] = { -1, 1, -1, 1, 0};
- //test(es1, es2, esa);
- //}
- //{
- //short es1[] = { 1, -1, 1, -1, 0};
- //short es2[] = { 4, 0 };
- //short esa[] = { 1, -1, 1, -1, 0};
- //test(es1, es2, esa);
- //}
- //{
- //short es1[] = { 1, -1, 1, -1, 0};
- //short es2[] = { -1, 4, -1, 0 };
- //short esa[] = { -1, 1, -1, 1, -2, 0};
- //test(es1, es2, esa);
- //}
- {
- short es1[] = { 106, -77, 56, -2, 155, -3, 123, -2, 0};
- short es2[] = { 50, -36, 34, -3, 12, -6, 1, -6, 18, -17, 60, -5, 349, -56, 0 };
- short esa[] = { 0 };
- test(es1, es2, esa);
- }
- exit(0);
- }
Deleted: trunk/packages/muscle/trunk/estring.h
===================================================================
--- trunk/packages/muscle/trunk/estring.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/estring.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,13 +0,0 @@
-#ifndef pathsum_h
-#define pathsum_h
-
-void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB);
-void EstringsToPath(const short esA[], const short esB[], PWPath &Path);
-void MulEstrings(const short es1[], const short es2[], short esp[]);
-void EstringOp(const short es[], const Seq &sIn, Seq &sOut);
-unsigned EstringOp(const short es[], const Seq &sIn, MSA &a);
-void LogEstring(const short es[]);
-unsigned LengthEstring(const short es[]);
-short *EstringNewCopy(const short es[]);
-
-#endif // pathsum_h
Deleted: trunk/packages/muscle/trunk/fasta.cpp
===================================================================
--- trunk/packages/muscle/trunk/fasta.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/fasta.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,56 +0,0 @@
-#include "muscle.h"
-#include <stdio.h>
-#include <ctype.h>
-#include "msa.h"
-#include "textfile.h"
-
-const unsigned FASTA_BLOCK = 60;
-
-void MSA::FromFASTAFile(TextFile &File)
- {
- Clear();
-
- FILE *f = File.GetStdioFile();
-
- unsigned uSeqCount = 0;
- unsigned uColCount = uInsane;
- for (;;)
- {
- char *Label;
- unsigned uSeqLength;
- char *SeqData = GetFastaSeq(f, &uSeqLength, &Label, false);
- if (0 == SeqData)
- break;
- AppendSeq(SeqData, uSeqLength, Label);
- }
- }
-
-void MSA::ToFASTAFile(TextFile &File) const
- {
- const unsigned uColCount = GetColCount();
- assert(uColCount > 0);
- const unsigned uLinesPerSeq = (GetColCount() - 1)/FASTA_BLOCK + 1;
- const unsigned uSeqCount = GetSeqCount();
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- File.PutString(">");
- File.PutString(GetSeqName(uSeqIndex));
- File.PutString("\n");
-
- unsigned n = 0;
- for (unsigned uLine = 0; uLine < uLinesPerSeq; ++uLine)
- {
- unsigned uLetters = uColCount - uLine*FASTA_BLOCK;
- if (uLetters > FASTA_BLOCK)
- uLetters = FASTA_BLOCK;
- for (unsigned i = 0; i < uLetters; ++i)
- {
- char c = GetChar(uSeqIndex, n);
- File.PutChar(c);
- ++n;
- }
- File.PutChar('\n');
- }
- }
- }
Deleted: trunk/packages/muscle/trunk/fasta2.cpp
===================================================================
--- trunk/packages/muscle/trunk/fasta2.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/fasta2.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,117 +0,0 @@
-#include "muscle.h"
-#include <stdio.h>
-#include <errno.h>
-
-const int BUFFER_BYTES = 16*1024;
-const int CR = '\r';
-const int NL = '\n';
-
-#define ADD(c) \
- { \
- if (Pos >= BufferLength) \
- { \
- const int NewBufferLength = BufferLength + BUFFER_BYTES; \
- char *NewBuffer = new char[NewBufferLength]; \
- memcpy(NewBuffer, Buffer, BufferLength); \
- delete[] Buffer; \
- Buffer = NewBuffer; \
- BufferLength = NewBufferLength; \
- } \
- Buffer[Pos++] = c; \
- }
-
-// Get next sequence from file.
-char *GetFastaSeq(FILE *f, unsigned *ptrSeqLength, char **ptrLabel, bool DeleteGaps)
- {
- unsigned BufferLength = 0;
- unsigned Pos = 0;
- char *Buffer = 0;
-
- int c = fgetc(f);
- if (EOF == c)
- return 0;
- if ('>' != c)
- Quit("Invalid file format, expected '>' to start FASTA label");
-
- for (;;)
- {
- int c = fgetc(f);
- if (EOF == c)
- Quit("End-of-file or input error in FASTA label");
-
- // Ignore CR (discard, do not include in label)
- if (CR == c)
- continue;
-
- // NL terminates label
- if (NL == c)
- break;
-
- // All other characters added to label
- ADD(c)
- }
-
-// Nul-terminate label
- ADD(0)
- *ptrLabel = Buffer;
-
- BufferLength = 0;
- Pos = 0;
- Buffer = 0;
- int PreviousChar = NL;
- for (;;)
- {
- int c = fgetc(f);
- if (EOF == c)
- {
- if (feof(f))
- break;
- else if (ferror(f))
- Quit("Error reading FASTA file, ferror=TRUE feof=FALSE errno=%d %s",
- errno, strerror(errno));
- else
- Quit("Error reading FASTA file, fgetc=EOF feof=FALSE ferror=FALSE errno=%d %s",
- errno, strerror(errno));
- }
-
- if ('>' == c)
- {
- if (NL == PreviousChar)
- {
- ungetc(c, f);
- break;
- }
- else
- Quit("Unexpected '>' in FASTA sequence data");
- }
- else if (isspace(c))
- ;
- else if (IsGapChar(c))
- {
- if (!DeleteGaps)
- ADD(c)
- }
- else if (isalpha(c))
- {
- c = toupper(c);
- ADD(c)
- }
- else if (isprint(c))
- {
- Warning("Invalid character '%c' in FASTA sequence data, ignored", c);
- continue;
- }
- else
- {
- Warning("Invalid byte hex %02x in FASTA sequence data, ignored", (unsigned char) c);
- continue;
- }
- PreviousChar = c;
- }
-
- if (0 == Pos)
- return GetFastaSeq(f, ptrSeqLength, ptrLabel, DeleteGaps);
-
- *ptrSeqLength = Pos;
- return Buffer;
- }
Deleted: trunk/packages/muscle/trunk/fastclust.cpp
===================================================================
--- trunk/packages/muscle/trunk/fastclust.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/fastclust.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,57 +0,0 @@
-#include "muscle.h"
-#include "seqvect.h"
-#include "distfunc.h"
-#include "clust.h"
-#include "clustsetdf.h"
-#include "tree.h"
-#include "clust.h"
-#include "distcalc.h"
-#include <math.h>
-
-static void TreeFromSeqVect_NJ(const DistFunc &DF, CLUSTER Cluster, Tree &tree)
- {
- ClustSetDF CSD(DF);
-
- Clust C;
- C.Create(CSD, Cluster);
-
- tree.FromClust(C);
- }
-
-static void TreeFromSeqVect_UPGMA(const DistFunc &DF, CLUSTER Cluster, Tree &tree)
- {
- LINKAGE Linkage = LINKAGE_Undefined;
- switch (Cluster)
- {
- case CLUSTER_UPGMA:
- Linkage = LINKAGE_Avg;
- break;
- case CLUSTER_UPGMAMin:
- Linkage = LINKAGE_Min;
- break;
- case CLUSTER_UPGMAMax:
- Linkage = LINKAGE_Max;
- break;
- case CLUSTER_UPGMB:
- Linkage = LINKAGE_Biased;
- break;
- default:
- Quit("TreeFromSeqVect_UPGMA, CLUSTER_%u not supported", Cluster);
- }
-
- DistCalcDF DC;
- DC.Init(DF);
- UPGMA2(DC, tree, Linkage);
- }
-
-void TreeFromSeqVect(const SeqVect &v, Tree &tree, CLUSTER Cluster,
- DISTANCE Distance, ROOT Root)
- {
- DistFunc DF;
- DistUnaligned(v, Distance, DF);
- if (CLUSTER_NeighborJoining == Cluster)
- TreeFromSeqVect_NJ(DF, Cluster, tree);
- else
- TreeFromSeqVect_UPGMA(DF, Cluster, tree);
- FixRoot(tree, Root);
- }
Deleted: trunk/packages/muscle/trunk/fastdist.cpp
===================================================================
--- trunk/packages/muscle/trunk/fastdist.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/fastdist.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,50 +0,0 @@
-#include "muscle.h"
-#include "distfunc.h"
-#include "seqvect.h"
-
-void DistUnaligned(const SeqVect &v, DISTANCE DistMethod, DistFunc &DF)
- {
- const unsigned uSeqCount = v.Length();
-
- switch (DistMethod)
- {
- case DISTANCE_Kmer6_6:
- DistKmer6_6(v, DF);
- break;
-
- case DISTANCE_Kmer20_3:
- DistKmer20_3(v, DF);
- break;
-
- case DISTANCE_Kmer20_4:
- FastDistKmer(v, DF);
- break;
-
- case DISTANCE_Kbit20_3:
- DistKbit20_3(v, DF);
- break;
-
- case DISTANCE_Kmer4_6:
- DistKmer4_6(v, DF);
- break;
-
- case DISTANCE_PWKimura:
- DistPWKimura(v, DF);
- break;
-
- default:
- Quit("DistUnaligned, unsupported distance method %d", DistMethod);
- }
-
-// const char **SeqNames = (const char **) malloc(uSeqCount*sizeof(char *));
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- const Seq &s = *(v[uSeqIndex]);
-
- const char *ptrName = s.GetName();
- unsigned uId = s.GetId();
-
- DF.SetName(uSeqIndex, ptrName);
- DF.SetId(uSeqIndex, uId);
- }
- }
Deleted: trunk/packages/muscle/trunk/fastdistjones.cpp
===================================================================
--- trunk/packages/muscle/trunk/fastdistjones.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/fastdistjones.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,206 +0,0 @@
-#include "muscle.h"
-#include "distfunc.h"
-#include "seqvect.h"
-#include <math.h>
-
-const unsigned TRIPLE_COUNT = 20*20*20;
-
-struct TripleCount
- {
- unsigned m_uSeqCount; // How many sequences have this triple?
- unsigned short *m_Counts; // m_Counts[s] = nr of times triple found in seq s
- };
-static TripleCount *TripleCounts;
-
-// WARNING: Sequences MUST be stripped of gaps and upper case!
-void DistKmer20_3(const SeqVect &v, DistFunc &DF)
- {
- const unsigned uSeqCount = v.Length();
-
- DF.SetCount(uSeqCount);
- if (0 == uSeqCount)
- return;
- for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
- {
- DF.SetDist(uSeq1, uSeq1, 0);
- for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
- DF.SetDist(uSeq1, uSeq2, 0);
- }
-
- const unsigned uTripleArrayBytes = TRIPLE_COUNT*sizeof(TripleCount);
- TripleCounts = (TripleCount *) malloc(uTripleArrayBytes);
- if (0 == TripleCounts)
- Quit("Not enough memory (TripleCounts)");
- memset(TripleCounts, 0, uTripleArrayBytes);
-
- for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)
- {
- TripleCount &tc = *(TripleCounts + uWord);
- const unsigned uBytes = uSeqCount*sizeof(short);
- tc.m_Counts = (unsigned short *) malloc(uBytes);
- memset(tc.m_Counts, 0, uBytes);
- }
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- Seq &s = *(v[uSeqIndex]);
- const unsigned uSeqLength = s.Length();
- for (unsigned uPos = 0; uPos < uSeqLength - 2; ++uPos)
- {
- const unsigned uLetter1 = CharToLetterEx(s[uPos]);
- if (uLetter1 >= 20)
- continue;
- const unsigned uLetter2 = CharToLetterEx(s[uPos+1]);
- if (uLetter2 >= 20)
- continue;
- const unsigned uLetter3 = CharToLetterEx(s[uPos+2]);
- if (uLetter3 >= 20)
- continue;
-
- const unsigned uWord = uLetter1 + uLetter2*20 + uLetter3*20*20;
- assert(uWord < TRIPLE_COUNT);
-
- TripleCount &tc = *(TripleCounts + uWord);
- const unsigned uOldCount = tc.m_Counts[uSeqIndex];
- if (0 == uOldCount)
- ++(tc.m_uSeqCount);
-
- ++(tc.m_Counts[uSeqIndex]);
- }
- }
-
-#if TRACE
- {
- Log("TripleCounts\n");
- unsigned uGrandTotal = 0;
- for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)
- {
- const TripleCount &tc = *(TripleCounts + uWord);
- if (0 == tc.m_uSeqCount)
- continue;
-
- const unsigned uLetter3 = uWord/(20*20);
- const unsigned uLetter2 = (uWord - uLetter3*20*20)/20;
- const unsigned uLetter1 = uWord%20;
- Log("Word %6u %c%c%c %6u",
- uWord,
- LetterToCharAmino(uLetter1),
- LetterToCharAmino(uLetter2),
- LetterToCharAmino(uLetter3),
- tc.m_uSeqCount);
-
- unsigned uSeqCountWithThisWord = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- const unsigned uCount = tc.m_Counts[uSeqIndex];
- if (uCount > 0)
- {
- ++uSeqCountWithThisWord;
- Log(" %u=%u", uSeqIndex, uCount);
- uGrandTotal += uCount;
- }
- }
- if (uSeqCountWithThisWord != tc.m_uSeqCount)
- Log(" *** SQ ERROR *** %u %u", tc.m_uSeqCount, uSeqCountWithThisWord);
- Log("\n");
- }
-
- unsigned uTotalBySeqLength = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- Seq &s = *(v[uSeqIndex]);
- const unsigned uSeqLength = s.Length();
- uTotalBySeqLength += uSeqLength - 2;
- }
- if (uGrandTotal != uTotalBySeqLength)
- Log("*** TOTALS DISAGREE *** %u %u\n", uGrandTotal, uTotalBySeqLength);
- }
-#endif
-
- const unsigned uSeqListBytes = uSeqCount*sizeof(unsigned);
- unsigned short *SeqList = (unsigned short *) malloc(uSeqListBytes);
-
- for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)
- {
- const TripleCount &tc = *(TripleCounts + uWord);
- if (0 == tc.m_uSeqCount)
- continue;
-
- unsigned uSeqCountFound = 0;
- memset(SeqList, 0, uSeqListBytes);
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- if (tc.m_Counts[uSeqIndex] > 0)
- {
- SeqList[uSeqCountFound] = uSeqIndex;
- ++uSeqCountFound;
- if (uSeqCountFound == tc.m_uSeqCount)
- break;
- }
- }
- assert(uSeqCountFound == tc.m_uSeqCount);
-
- for (unsigned uSeq1 = 0; uSeq1 < uSeqCountFound; ++uSeq1)
- {
- const unsigned uSeqIndex1 = SeqList[uSeq1];
- const unsigned uCount1 = tc.m_Counts[uSeqIndex1];
- for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
- {
- const unsigned uSeqIndex2 = SeqList[uSeq2];
- const unsigned uCount2 = tc.m_Counts[uSeqIndex2];
- const unsigned uMinCount = uCount1 < uCount2 ? uCount1 : uCount2;
- const double d = DF.GetDist(uSeqIndex1, uSeqIndex2);
- DF.SetDist(uSeqIndex1, uSeqIndex2, (float) (d + uMinCount));
- }
- }
- }
- delete[] SeqList;
- free(TripleCounts);
-
- unsigned uDone = 0;
- const unsigned uTotal = (uSeqCount*(uSeqCount - 1))/2;
- for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
- {
- DF.SetDist(uSeq1, uSeq1, 0.0);
-
- const Seq &s1 = *(v[uSeq1]);
- const unsigned uLength1 = s1.Length();
-
- for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
- {
- const Seq &s2 = *(v[uSeq2]);
- const unsigned uLength2 = s2.Length();
- unsigned uMinLength = uLength1 < uLength2 ? uLength1 : uLength2;
- if (uMinLength < 3)
- {
- DF.SetDist(uSeq1, uSeq2, 1.0);
- continue;
- }
-
- const double dTripleCount = DF.GetDist(uSeq1, uSeq2);
- if (dTripleCount == 0)
- {
- DF.SetDist(uSeq1, uSeq2, 1.0);
- continue;
- }
- double dNormalizedTripletScore = dTripleCount/(uMinLength - 2);
- //double dEstimatedPairwiseIdentity = exp(0.3912*log(dNormalizedTripletScore));
- //if (dEstimatedPairwiseIdentity > 1)
- // dEstimatedPairwiseIdentity = 1;
-// DF.SetDist(uSeq1, uSeq2, (float) (1.0 - dEstimatedPairwiseIdentity));
- DF.SetDist(uSeq1, uSeq2, (float) dNormalizedTripletScore);
-
-#if TRACE
- {
- Log("%s - %s Triplet count = %g Lengths %u, %u Estimated pwid = %g\n",
- s1.GetName(), s2.GetName(), dTripleCount, uLength1, uLength2,
- dEstimatedPairwiseIdentity);
- }
-#endif
- if (uDone%1000 == 0)
- Progress(uDone, uTotal);
- }
- }
- ProgressStepsDone();
- }
Deleted: trunk/packages/muscle/trunk/fastdistkbit.cpp
===================================================================
--- trunk/packages/muscle/trunk/fastdistkbit.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/fastdistkbit.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,109 +0,0 @@
-#include "muscle.h"
-#include "distfunc.h"
-#include "seqvect.h"
-#include <math.h>
-
-#define MIN(x, y) ((x) < (y) ? (x) : (y))
-
-static void SetKmerBitVector(const Seq &s, byte Bits[])
- {
- const unsigned uLength = s.Length();
- const unsigned k = 3; // kmer length
- unsigned i = 0;
- unsigned c = 0;
- unsigned h = 0;
- for (unsigned j = 0; j < k - 1; ++j)
- {
- unsigned x = CharToLetterEx(s[i++]);
- if (x <= AX_Y)
- c = c*20 + x;
- else
- {
- c = 0;
- h = j + 1;
- }
- }
- for ( ; i < uLength; ++i)
- {
- unsigned x = CharToLetterEx(s[i++]);
- if (x <= AX_Y)
- c = (c*20 + x)%8000;
- else
- {
- c = 0;
- h = i + k;
- }
- if (i >= h)
- {
- unsigned ByteOffset = c/8;
- unsigned BitOffset = c%8;
- Bits[ByteOffset] |= (1 << BitOffset);
- }
- }
- }
-
-static unsigned CommonBitCount(const byte Bits1[], const byte Bits2[])
- {
- const byte * const p1end = Bits1 + 1000;
- const byte *p2 = Bits2;
-
- unsigned uCount = 0;
- for (const byte *p1 = Bits1; p1 != p1end; ++p1)
- {
- // Here is a cute trick for efficiently counting the
- // bits common between two bytes by combining them into
- // a single word.
- unsigned b = *p1 | (*p2 << 8);
- while (b != 0)
- {
- if (b & 0x101)
- ++uCount;
- b >>= 1;
- }
- ++p2;
- }
- return uCount;
- }
-
-void DistKbit20_3(const SeqVect &v, DistFunc &DF)
- {
- const unsigned uSeqCount = v.Length();
- DF.SetCount(uSeqCount);
-
-// There are 20^3 = 8,000 distinct kmers in the 20-letter alphabet.
-// For each sequence, we create a bit vector of length 8,000, i.e.
-// 1,000 bytes, having one bit per kmer. The bit is set to 1 if the
-// kmer is present in the sequence.
- const unsigned uBytes = uSeqCount*1000;
- byte *BitVector = new byte[uBytes];
- memset(BitVector, 0, uBytes);
-
- SetProgressDesc("K-bit distance matrix");
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- SetKmerBitVector(*v[uSeqIndex], BitVector + uSeqIndex*1000);
-
- unsigned uDone = 0;
- const unsigned uTotal = (uSeqCount*(uSeqCount - 1))/2;
- for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
- {
- const byte *Bits1 = BitVector + uSeqIndex1*1000;
- const unsigned uLength1 = v[uSeqIndex1]->Length();
- for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)
- {
- const byte *Bits2 = BitVector + uSeqIndex2*1000;
- const unsigned uLength2 = v[uSeqIndex2]->Length();
- const float fCount = (float) CommonBitCount(Bits1, Bits2);
-
- // Distance measure = K / min(L1, L2)
- // K is number of distinct kmers that are found in both sequences
- const float fDist = fCount / MIN(uLength1, uLength2);
- DF.SetDist(uSeqIndex1, uSeqIndex2, fDist);
- if (uDone%10000 == 0)
- Progress(uDone, uTotal);
- ++uDone;
- }
- }
- ProgressStepsDone();
-
- delete[] BitVector;
- }
Deleted: trunk/packages/muscle/trunk/fastdistkmer.cpp
===================================================================
--- trunk/packages/muscle/trunk/fastdistkmer.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/fastdistkmer.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,247 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "seqvect.h"
-#include "seq.h"
-#include "distfunc.h"
-#include <math.h>
-
-#define TRACE 0
-
-/***
-Some candidate alphabets considered because they
-have high correlations and small table sizes.
-Correlation coefficent is between k-mer distance
-and %id D measured from a CLUSTALW alignment.
-Table size is N^k where N is size of alphabet.
-A is standard (uncompressed) amino alphabet.
-
- Correlation
-Alpha N k Table Size all 25-50%
------ -- - ---------- ---- ------
-A 20 3 8,000 0.943 0.575
-A 20 4 160,000 0.962 0.685 <<
-LiA 14 4 38,416 0.966 0.645
-SEB 14 4 38,416 0.964 0.634
-LiA 13 4 28,561 0.965 0.640
-LiA 12 4 20,736 0.963 0.620
-LiA 10 5 100,000 0.964 0.652
-
-We select A with k=4 because it has the best
-correlations. The only drawback is a large table
-size, but space is readily available and the only
-additional time cost is in resetting the table to
-zero, which can be done quickly with memset or by
-keeping a list of the k-mers that were found (should
-test to see which is faster, and may vary by compiler
-and processor type). It also has the minor advantage
-that we don't need to convert the alphabet.
-
-Fractional identity d is estimated as follows.
-
- F = fractional k-mer count
- if F is 0: F = 0.01
- Y = log(0.02 + F)
- d = -4.1 + 4.12*Y
-
-The constant 0.02 was chosen to make the relationship
-between Y and D linear. The constants -4.1 and 4.12
-were chosen to fit a straight line to the scatterplot
-of Y vs D.
-***/
-
-#define MIN(x, y) (((x) < (y)) ? (x) : (y))
-
-const unsigned K = 4;
-const unsigned N = 20;
-const unsigned N_2 = 20*20;
-const unsigned N_3 = 20*20*20;
-const unsigned N_4 = 20*20*20*20;
-
-const unsigned TABLE_SIZE = N_4;
-
-// For debug output
-const char *KmerToStr(unsigned Kmer)
- {
- static char s[5];
-
- unsigned c3 = (Kmer/N_3)%N;
- unsigned c2 = (Kmer/N_2)%N;
- unsigned c1 = (Kmer/N)%N;
- unsigned c0 = Kmer%N;
-
- s[0] = LetterToChar(c3);
- s[1] = LetterToChar(c2);
- s[2] = LetterToChar(c1);
- s[3] = LetterToChar(c0);
- return s;
- }
-
-void CountKmers(const byte s[], unsigned uSeqLength, byte KmerCounts[])
- {
-#if TRACE
- Log("CountKmers\n");
-#endif
- memset(KmerCounts, 0, TABLE_SIZE*sizeof(byte));
-
- const byte *ptrKmerStart = s;
- const byte *ptrKmerEnd = s + 4;
- const byte *ptrSeqEnd = s + uSeqLength;
-
- unsigned c3 = s[0]*N_3;
- unsigned c2 = s[1]*N_2;
- unsigned c1 = s[2]*N;
- unsigned c0 = s[3];
-
- unsigned Kmer = c3 + c2 + c1 + c0;
-
- for (;;)
- {
- assert(Kmer < TABLE_SIZE);
-
-#if TRACE
- Log("Kmer=%d=%s\n", Kmer, KmerToStr(Kmer));
-#endif
- ++(KmerCounts[Kmer]);
-
- if (ptrKmerEnd == ptrSeqEnd)
- break;
-
- // Compute k-mer as function of previous k-mer:
- // 1. Subtract first letter from previous k-mer.
- // 2. Multiply by N.
- // 3. Add next letter.
- c3 = (*ptrKmerStart++) * N_3;
- Kmer = (Kmer - c3)*N;
- Kmer += *ptrKmerEnd++;
- }
- }
-
-unsigned CommonKmerCount(const byte Seq[], unsigned uSeqLength,
- const byte KmerCounts1[], const byte Seq2[], unsigned uSeqLength2)
- {
- byte KmerCounts2[TABLE_SIZE];
- CountKmers(Seq2, uSeqLength2, KmerCounts2);
-
- const byte *ptrKmerStart = Seq;
- const byte *ptrKmerEnd = Seq + 4;
- const byte *ptrSeqEnd = Seq + uSeqLength;
-
- unsigned c3 = Seq[0]*N_3;
- unsigned c2 = Seq[1]*N_2;
- unsigned c1 = Seq[2]*N;
- unsigned c0 = Seq[3];
-
- unsigned Kmer = c3 + c2 + c1 + c0;
-
- unsigned uCommonCount = 0;
- for (;;)
- {
- assert(Kmer < TABLE_SIZE);
-
- const byte Count1 = KmerCounts1[Kmer];
- const byte Count2 = KmerCounts2[Kmer];
-
- uCommonCount += MIN(Count1, Count2);
-
- // Hack so we don't double-count
- KmerCounts2[Kmer] = 0;
-
- if (ptrKmerEnd == ptrSeqEnd)
- break;
-
- // Compute k-mer as function of previous k-mer:
- // 1. Subtract first letter from previous k-mer.
- // 2. Multiply by N.
- // 3. Add next letter.
- c3 = (*ptrKmerStart++) * N_3;
- Kmer = (Kmer - c3)*N;
- Kmer += *ptrKmerEnd++;
- }
- return uCommonCount;
- }
-
-static void SeqToLetters(const Seq &s, byte Letters[])
- {
- const unsigned uSeqLength = s.Length();
- for (unsigned uCol = 0; uCol < uSeqLength; ++uCol)
- {
- char c = s.GetChar(uCol);
- // Ugly hack. My k-mer counting code isn't wild-card
- // aware. Arbitrarily replace wildcards by a specific
- // amino acid.
- if (IsWildcardChar(c))
- c = 'A';
- *Letters++ = CharToLetter(c);
- }
- }
-
-void FastDistKmer(const SeqVect &v, DistFunc &DF)
- {
- byte KmerCounts[TABLE_SIZE];
-
- const unsigned uSeqCount = v.GetSeqCount();
-
- DF.SetCount(uSeqCount);
- if (0 == uSeqCount)
- return;
-
-// Initialize distance matrix to zero
- for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
- {
- DF.SetDist(uSeq1, uSeq1, 0);
- for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
- DF.SetDist(uSeq1, uSeq2, 0);
- }
-
- unsigned uMaxLength = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- const Seq &s = v.GetSeq(uSeqIndex);
- unsigned uSeqLength = s.Length();
- if (uSeqLength > uMaxLength)
- uMaxLength = uSeqLength;
- }
- if (0 == uMaxLength)
- return;
-
- byte *Seq1Letters = new byte[uMaxLength];
- byte *Seq2Letters = new byte[uMaxLength];
-
- for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount - 1; ++uSeqIndex1)
- {
- const Seq &s1 = v.GetSeq(uSeqIndex1);
- const unsigned uSeqLength1 = s1.Length();
-
- SeqToLetters(s1, Seq1Letters);
- CountKmers(Seq1Letters, uSeqLength1, KmerCounts);
-
- for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount;
- ++uSeqIndex2)
- {
- const Seq &s2 = v.GetSeq(uSeqIndex2);
- const unsigned uSeqLength2 = s2.Length();
-
- SeqToLetters(s2, Seq2Letters);
-
- unsigned uCommonKmerCount = CommonKmerCount(Seq1Letters, uSeqLength1,
- KmerCounts, Seq2Letters, uSeqLength2);
-
- unsigned uMinLength = MIN(uSeqLength1, uSeqLength2);
- double F = (double) uCommonKmerCount / (uMinLength - K + 1);
- if (0.0 == F)
- F = 0.01;
- double Y = log(0.02 + F);
- double EstimatedPctId = Y/4.12 + 0.995;
- double KD = KimuraDist(EstimatedPctId);
-// DF.SetDist(uSeqIndex1, uSeqIndex2, (float) KD);
- DF.SetDist(uSeqIndex1, uSeqIndex2, (float) (1 - F));
-#if TRACE
- Log("CommonCount=%u, MinLength=%u, F=%6.4f Y=%6.4f, %%id=%6.4f, KimuraDist=%8.4f\n",
- uCommonKmerCount, uMinLength, F, Y, EstimatedPctId, KD);
-#endif
- }
- }
-
- delete[] Seq1Letters;
- delete[] Seq2Letters;
- }
Deleted: trunk/packages/muscle/trunk/fastdistmafft.cpp
===================================================================
--- trunk/packages/muscle/trunk/fastdistmafft.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/fastdistmafft.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,290 +0,0 @@
-#include "muscle.h"
-#include "distfunc.h"
-#include "seqvect.h"
-#include <math.h>
-
-#define TRACE 0
-
-#define MIN(x, y) (((x) < (y)) ? (x) : (y))
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-
-const unsigned TUPLE_COUNT = 6*6*6*6*6*6;
-static unsigned char Count1[TUPLE_COUNT];
-static unsigned char Count2[TUPLE_COUNT];
-
-// Amino acid groups according to MAFFT (sextet5)
-// 0 = A G P S T
-// 1 = I L M V
-// 2 = N D Q E B Z
-// 3 = R H K
-// 4 = F W Y
-// 5 = C
-// 6 = X . - U
-unsigned ResidueGroup[] =
- {
- 0, // AX_A,
- 5, // AX_C,
- 2, // AX_D,
- 2, // AX_E,
- 4, // AX_F,
- 0, // AX_G,
- 3, // AX_H,
- 1, // AX_I,
- 3, // AX_K,
- 1, // AX_L,
- 1, // AX_M,
- 2, // AX_N,
- 0, // AX_P,
- 2, // AX_Q,
- 3, // AX_R,
- 0, // AX_S,
- 0, // AX_T,
- 1, // AX_V,
- 4, // AX_W,
- 4, // AX_Y,
-
- 2, // AX_B, // D or N
- 2, // AX_Z, // E or Q
- 0, // AX_X, // Unknown // ******** TODO *************
- // This isn't the correct way of avoiding group 6
- 0 // AX_GAP, // ******** TODO ******************
- };
-unsigned uResidueGroupCount = sizeof(ResidueGroup)/sizeof(ResidueGroup[0]);
-
-static char *TupleToStr(int t)
- {
- static char s[7];
- int t1, t2, t3, t4, t5, t6;
-
- t1 = t%6;
- t2 = (t/6)%6;
- t3 = (t/(6*6))%6;
- t4 = (t/(6*6*6))%6;
- t5 = (t/(6*6*6*6))%6;
- t6 = (t/(6*6*6*6*6))%6;
-
- s[5] = '0' + t1;
- s[4] = '0' + t2;
- s[3] = '0' + t3;
- s[2] = '0' + t4;
- s[1] = '0' + t5;
- s[0] = '0' + t6;
- return s;
- }
-
-static unsigned GetTuple(const unsigned uLetters[], unsigned n)
- {
- assert(uLetters[n] < uResidueGroupCount);
- assert(uLetters[n+1] < uResidueGroupCount);
- assert(uLetters[n+2] < uResidueGroupCount);
- assert(uLetters[n+3] < uResidueGroupCount);
- assert(uLetters[n+4] < uResidueGroupCount);
- assert(uLetters[n+5] < uResidueGroupCount);
-
- unsigned u1 = ResidueGroup[uLetters[n]];
- unsigned u2 = ResidueGroup[uLetters[n+1]];
- unsigned u3 = ResidueGroup[uLetters[n+2]];
- unsigned u4 = ResidueGroup[uLetters[n+3]];
- unsigned u5 = ResidueGroup[uLetters[n+4]];
- unsigned u6 = ResidueGroup[uLetters[n+5]];
-
- return u6 + u5*6 + u4*6*6 + u3*6*6*6 + u2*6*6*6*6 + u1*6*6*6*6*6;
- }
-
-static void CountTuples(const unsigned L[], unsigned uTupleCount, unsigned char Count[])
- {
- memset(Count, 0, TUPLE_COUNT*sizeof(unsigned char));
- for (unsigned n = 0; n < uTupleCount; ++n)
- {
- const unsigned uTuple = GetTuple(L, n);
- ++(Count[uTuple]);
- }
- }
-
-static void ListCount(const unsigned char Count[])
- {
- for (unsigned n = 0; n < TUPLE_COUNT; ++n)
- {
- if (0 == Count[n])
- continue;
- Log("%s %u\n", TupleToStr(n), Count[n]);
- }
- }
-
-void DistKmer6_6(const SeqVect &v, DistFunc &DF)
- {
- const unsigned uSeqCount = v.Length();
-
- DF.SetCount(uSeqCount);
- if (0 == uSeqCount)
- return;
-
-// Initialize distance matrix to zero
- for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
- {
- DF.SetDist(uSeq1, uSeq1, 0);
- for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
- DF.SetDist(uSeq1, uSeq2, 0);
- }
-
-// Convert to letters
- unsigned **Letters = new unsigned *[uSeqCount];
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- Seq &s = *(v[uSeqIndex]);
- const unsigned uSeqLength = s.Length();
- unsigned *L = new unsigned[uSeqLength];
- Letters[uSeqIndex] = L;
- for (unsigned n = 0; n < uSeqLength; ++n)
- {
- char c = s[n];
- L[n] = CharToLetterEx(c);
- assert(L[n] < uResidueGroupCount);
- }
- }
-
- unsigned **uCommonTupleCount = new unsigned *[uSeqCount];
- for (unsigned n = 0; n < uSeqCount; ++n)
- {
- uCommonTupleCount[n] = new unsigned[uSeqCount];
- memset(uCommonTupleCount[n], 0, uSeqCount*sizeof(unsigned));
- }
-
- const unsigned uPairCount = (uSeqCount*(uSeqCount + 1))/2;
- unsigned uCount = 0;
- for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
- {
- Seq &seq1 = *(v[uSeq1]);
- const unsigned uSeqLength1 = seq1.Length();
- if (uSeqLength1 < 5)
- continue;
-
- const unsigned uTupleCount = uSeqLength1 - 5;
- const unsigned *L = Letters[uSeq1];
- CountTuples(L, uTupleCount, Count1);
-#if TRACE
- {
- Log("Seq1=%d\n", uSeq1);
- Log("Groups:\n");
- for (unsigned n = 0; n < uSeqLength1; ++n)
- Log("%u", ResidueGroup[L[n]]);
- Log("\n");
-
- Log("Tuples:\n");
- ListCount(Count1);
- }
-#endif
-
- SetProgressDesc("K-mer dist pass 1");
- for (unsigned uSeq2 = 0; uSeq2 <= uSeq1; ++uSeq2)
- {
- if (0 == uCount%500)
- Progress(uCount, uPairCount);
- ++uCount;
- Seq &seq2 = *(v[uSeq2]);
- const unsigned uSeqLength2 = seq2.Length();
- if (uSeqLength2 < 5)
- {
- if (uSeq1 == uSeq2)
- DF.SetDist(uSeq1, uSeq2, 0);
- else
- DF.SetDist(uSeq1, uSeq2, 1);
- continue;
- }
-
- // First pass through seq 2 to count tuples
- const unsigned uTupleCount = uSeqLength2 - 5;
- const unsigned *L = Letters[uSeq2];
- CountTuples(L, uTupleCount, Count2);
-#if TRACE
- Log("Seq2=%d Counts=\n", uSeq2);
- ListCount(Count2);
-#endif
-
- // Second pass to accumulate sum of shared tuples
- // MAFFT defines this as the sum over unique tuples
- // in seq2 of the minimum of the number of tuples found
- // in the two sequences.
- unsigned uSum = 0;
- for (unsigned n = 0; n < uTupleCount; ++n)
- {
- const unsigned uTuple = GetTuple(L, n);
- uSum += MIN(Count1[uTuple], Count2[uTuple]);
-
- // This is a hack to make sure each unique tuple counted only once.
- Count2[uTuple] = 0;
- }
-#if TRACE
- {
- Seq &s1 = *(v[uSeq1]);
- Seq &s2 = *(v[uSeq2]);
- const char *pName1 = s1.GetName();
- const char *pName2 = s2.GetName();
- Log("Common count %s(%d) - %s(%d) =%u\n",
- pName1, uSeq1, pName2, uSeq2, uSum);
- }
-#endif
- uCommonTupleCount[uSeq1][uSeq2] = uSum;
- uCommonTupleCount[uSeq2][uSeq1] = uSum;
- }
- }
- ProgressStepsDone();
-
- uCount = 0;
- SetProgressDesc("K-mer dist pass 2");
- for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
- {
- Seq &s1 = *(v[uSeq1]);
- const char *pName1 = s1.GetName();
-
- double dCommonTupleCount11 = uCommonTupleCount[uSeq1][uSeq1];
- if (0 == dCommonTupleCount11)
- dCommonTupleCount11 = 1;
-
- DF.SetDist(uSeq1, uSeq1, 0);
- for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
- {
- if (0 == uCount%500)
- Progress(uCount, uPairCount);
- ++uCount;
-
- double dCommonTupleCount22 = uCommonTupleCount[uSeq2][uSeq2];
- if (0 == dCommonTupleCount22)
- dCommonTupleCount22 = 1;
-
- const double dDist1 = 3.0*(dCommonTupleCount11 - uCommonTupleCount[uSeq1][uSeq2])
- /dCommonTupleCount11;
- const double dDist2 = 3.0*(dCommonTupleCount22 - uCommonTupleCount[uSeq1][uSeq2])
- /dCommonTupleCount22;
-
- // dMinDist is the value used for tree-building in MAFFT
- const double dMinDist = MIN(dDist1, dDist2);
- DF.SetDist(uSeq1, uSeq2, (float) dMinDist);
-
- //const double dEstimatedPctId = TupleDistToEstimatedPctId(dMinDist);
- //g_dfPwId.SetDist(uSeq1, uSeq2, dEstimatedPctId);
- // **** TODO **** why does this make score slightly worse??
- //const double dKimuraDist = KimuraDist(dEstimatedPctId);
- //DF.SetDist(uSeq1, uSeq2, dKimuraDist);
- }
- }
- ProgressStepsDone();
-
- for (unsigned n = 0; n < uSeqCount; ++n)
- delete[] uCommonTupleCount[n];
- delete[] uCommonTupleCount;
- delete[] Letters;
- }
-
-double PctIdToMAFFTDist(double dPctId)
- {
- if (dPctId < 0.05)
- dPctId = 0.05;
- double dDist = -log(dPctId);
- return dDist;
- }
-
-double PctIdToHeightMAFFT(double dPctId)
- {
- return PctIdToMAFFTDist(dPctId);
- }
Deleted: trunk/packages/muscle/trunk/fastdistnuc.cpp
===================================================================
--- trunk/packages/muscle/trunk/fastdistnuc.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/fastdistnuc.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,265 +0,0 @@
-#include "muscle.h"
-#include "distfunc.h"
-#include "seqvect.h"
-#include <math.h>
-
-#define TRACE 0
-
-#define MIN(x, y) (((x) < (y)) ? (x) : (y))
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-
-const unsigned TUPLE_COUNT = 6*6*6*6*6*6;
-static unsigned char Count1[TUPLE_COUNT];
-static unsigned char Count2[TUPLE_COUNT];
-
-// Nucleotide groups according to MAFFT (sextet5)
-// 0 = A
-// 1 = C
-// 2 = G
-// 3 = T
-// 4 = other
-
-static unsigned ResidueGroup[] =
- {
- 0, // NX_A,
- 1, // NX_C,
- 2, // NX_G,
- 3, // NX_T/U
- 4, // NX_N,
- 4, // NX_R,
- 4, // NX_Y,
- 4, // NX_GAP
- };
-static unsigned uResidueGroupCount = sizeof(ResidueGroup)/sizeof(ResidueGroup[0]);
-
-static char *TupleToStr(int t)
- {
- static char s[7];
- int t1, t2, t3, t4, t5, t6;
-
- t1 = t%6;
- t2 = (t/6)%6;
- t3 = (t/(6*6))%6;
- t4 = (t/(6*6*6))%6;
- t5 = (t/(6*6*6*6))%6;
- t6 = (t/(6*6*6*6*6))%6;
-
- s[5] = '0' + t1;
- s[4] = '0' + t2;
- s[3] = '0' + t3;
- s[2] = '0' + t4;
- s[1] = '0' + t5;
- s[0] = '0' + t6;
- return s;
- }
-
-static unsigned GetTuple(const unsigned uLetters[], unsigned n)
- {
- assert(uLetters[n] < uResidueGroupCount);
- assert(uLetters[n+1] < uResidueGroupCount);
- assert(uLetters[n+2] < uResidueGroupCount);
- assert(uLetters[n+3] < uResidueGroupCount);
- assert(uLetters[n+4] < uResidueGroupCount);
- assert(uLetters[n+5] < uResidueGroupCount);
-
- unsigned u1 = ResidueGroup[uLetters[n]];
- unsigned u2 = ResidueGroup[uLetters[n+1]];
- unsigned u3 = ResidueGroup[uLetters[n+2]];
- unsigned u4 = ResidueGroup[uLetters[n+3]];
- unsigned u5 = ResidueGroup[uLetters[n+4]];
- unsigned u6 = ResidueGroup[uLetters[n+5]];
-
- return u6 + u5*6 + u4*6*6 + u3*6*6*6 + u2*6*6*6*6 + u1*6*6*6*6*6;
- }
-
-static void CountTuples(const unsigned L[], unsigned uTupleCount, unsigned char Count[])
- {
- memset(Count, 0, TUPLE_COUNT*sizeof(unsigned char));
- for (unsigned n = 0; n < uTupleCount; ++n)
- {
- const unsigned uTuple = GetTuple(L, n);
- ++(Count[uTuple]);
- }
- }
-
-static void ListCount(const unsigned char Count[])
- {
- for (unsigned n = 0; n < TUPLE_COUNT; ++n)
- {
- if (0 == Count[n])
- continue;
- Log("%s %u\n", TupleToStr(n), Count[n]);
- }
- }
-
-void DistKmer4_6(const SeqVect &v, DistFunc &DF)
- {
- if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)
- Quit("DistKmer4_6 requires nucleo alphabet");
-
- const unsigned uSeqCount = v.Length();
-
- DF.SetCount(uSeqCount);
- if (0 == uSeqCount)
- return;
-
-// Initialize distance matrix to zero
- for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
- {
- DF.SetDist(uSeq1, uSeq1, 0);
- for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
- DF.SetDist(uSeq1, uSeq2, 0);
- }
-
-// Convert to letters
- unsigned **Letters = new unsigned *[uSeqCount];
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- Seq &s = *(v[uSeqIndex]);
- const unsigned uSeqLength = s.Length();
- unsigned *L = new unsigned[uSeqLength];
- Letters[uSeqIndex] = L;
- for (unsigned n = 0; n < uSeqLength; ++n)
- {
- char c = s[n];
- L[n] = CharToLetterEx(c);
- if (L[n] >= 4)
- L[n] = 4;
- }
- }
-
- unsigned **uCommonTupleCount = new unsigned *[uSeqCount];
- for (unsigned n = 0; n < uSeqCount; ++n)
- {
- uCommonTupleCount[n] = new unsigned[uSeqCount];
- memset(uCommonTupleCount[n], 0, uSeqCount*sizeof(unsigned));
- }
-
- const unsigned uPairCount = (uSeqCount*(uSeqCount + 1))/2;
- unsigned uCount = 0;
- for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
- {
- Seq &seq1 = *(v[uSeq1]);
- const unsigned uSeqLength1 = seq1.Length();
- if (uSeqLength1 < 5)
- continue;
-
- const unsigned uTupleCount = uSeqLength1 - 5;
- const unsigned *L = Letters[uSeq1];
- CountTuples(L, uTupleCount, Count1);
-#if TRACE
- {
- Log("Seq1=%d\n", uSeq1);
- Log("Groups:\n");
- for (unsigned n = 0; n < uSeqLength1; ++n)
- Log("%u", ResidueGroup[L[n]]);
- Log("\n");
-
- Log("Tuples:\n");
- ListCount(Count1);
- }
-#endif
-
- SetProgressDesc("K-mer dist pass 1");
- for (unsigned uSeq2 = 0; uSeq2 <= uSeq1; ++uSeq2)
- {
- if (0 == uCount%500)
- Progress(uCount, uPairCount);
- ++uCount;
- Seq &seq2 = *(v[uSeq2]);
- const unsigned uSeqLength2 = seq2.Length();
- if (uSeqLength2 < 5)
- {
- if (uSeq1 == uSeq2)
- DF.SetDist(uSeq1, uSeq2, 0);
- else
- DF.SetDist(uSeq1, uSeq2, 1);
- continue;
- }
-
- // First pass through seq 2 to count tuples
- const unsigned uTupleCount = uSeqLength2 - 5;
- const unsigned *L = Letters[uSeq2];
- CountTuples(L, uTupleCount, Count2);
-#if TRACE
- Log("Seq2=%d Counts=\n", uSeq2);
- ListCount(Count2);
-#endif
-
- // Second pass to accumulate sum of shared tuples
- // MAFFT defines this as the sum over unique tuples
- // in seq2 of the minimum of the number of tuples found
- // in the two sequences.
- unsigned uSum = 0;
- for (unsigned n = 0; n < uTupleCount; ++n)
- {
- const unsigned uTuple = GetTuple(L, n);
- uSum += MIN(Count1[uTuple], Count2[uTuple]);
-
- // This is a hack to make sure each unique tuple counted only once.
- Count2[uTuple] = 0;
- }
-#if TRACE
- {
- Seq &s1 = *(v[uSeq1]);
- Seq &s2 = *(v[uSeq2]);
- const char *pName1 = s1.GetName();
- const char *pName2 = s2.GetName();
- Log("Common count %s(%d) - %s(%d) =%u\n",
- pName1, uSeq1, pName2, uSeq2, uSum);
- }
-#endif
- uCommonTupleCount[uSeq1][uSeq2] = uSum;
- uCommonTupleCount[uSeq2][uSeq1] = uSum;
- }
- }
- ProgressStepsDone();
-
- uCount = 0;
- SetProgressDesc("K-mer dist pass 2");
- for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
- {
- Seq &s1 = *(v[uSeq1]);
- const char *pName1 = s1.GetName();
-
- double dCommonTupleCount11 = uCommonTupleCount[uSeq1][uSeq1];
- if (0 == dCommonTupleCount11)
- dCommonTupleCount11 = 1;
-
- DF.SetDist(uSeq1, uSeq1, 0);
- for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
- {
- if (0 == uCount%500)
- Progress(uCount, uPairCount);
- ++uCount;
-
- double dCommonTupleCount22 = uCommonTupleCount[uSeq2][uSeq2];
- if (0 == dCommonTupleCount22)
- dCommonTupleCount22 = 1;
-
- const double dDist1 = 3.0*(dCommonTupleCount11 - uCommonTupleCount[uSeq1][uSeq2])
- /dCommonTupleCount11;
- const double dDist2 = 3.0*(dCommonTupleCount22 - uCommonTupleCount[uSeq1][uSeq2])
- /dCommonTupleCount22;
-
- // dMinDist is the value used for tree-building in MAFFT
- const double dMinDist = MIN(dDist1, dDist2);
- DF.SetDist(uSeq1, uSeq2, (float) dMinDist);
-
- //const double dEstimatedPctId = TupleDistToEstimatedPctId(dMinDist);
- //g_dfPwId.SetDist(uSeq1, uSeq2, dEstimatedPctId);
- // **** TODO **** why does this make score slightly worse??
- //const double dKimuraDist = KimuraDist(dEstimatedPctId);
- //DF.SetDist(uSeq1, uSeq2, dKimuraDist);
- }
- }
- ProgressStepsDone();
-
- for (unsigned n = 0; n < uSeqCount; ++n)
- {
- delete[] uCommonTupleCount[n];
- delete[] Letters[n];
- }
- delete[] uCommonTupleCount;
- delete[] Letters;
- }
Deleted: trunk/packages/muscle/trunk/fastscorepath2.cpp
===================================================================
--- trunk/packages/muscle/trunk/fastscorepath2.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/fastscorepath2.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,165 +0,0 @@
-#include "muscle.h"
-#include "profile.h"
-#include "pwpath.h"
-
-SCORE FastScorePath2(const ProfPos *PA, unsigned uLengthA,
- const ProfPos *PB, unsigned uLengthB, const PWPath &Path)
- {
- const unsigned uEdgeCount = Path.GetEdgeCount();
- Log("Edge SS PLA PLB Match Gap Total\n");
- Log("---- -- --- --- ----- --- -----\n");
- char cType = 'S';
- SCORE scoreTotal = 0;
- for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
- const char cPrevType = cType;
- cType = Edge.cType;
- const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
- const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
- bool bGap = false;
- bool bMatch = false;
- SCORE scoreGap = 0;
- SCORE scoreMatch = 0;
-
- switch (cType)
- {
- case 'M':
- {
- if (0 == uPrefixLengthA || 0 == uPrefixLengthB)
- Quit("FastScorePath2, M zero length");
-
- const ProfPos &PPA = PA[uPrefixLengthA - 1];
- const ProfPos &PPB = PB[uPrefixLengthB - 1];
-
- bMatch = true;
- scoreMatch = ScoreProfPos2(PPA, PPB);
-
- if ('D' == cPrevType)
- {
- bGap = true;
- assert(uPrefixLengthA > 1);
- scoreGap = PA[uPrefixLengthA-2].m_scoreGapClose;
- }
- else if ('I' == cPrevType)
- {
- bGap = true;
- assert(uPrefixLengthB > 1);
- scoreGap = PB[uPrefixLengthB-2].m_scoreGapClose;
- }
- break;
- }
-
- case 'D':
- {
- if (0 == uPrefixLengthA)
- Quit("FastScorePath2, D zero length");
-
- const ProfPos &PPA = PA[uPrefixLengthA - 1];
- bGap = true;
- switch (cPrevType)
- {
- case 'S':
- scoreGap = PPA.m_scoreGapOpen;
- break;
- case 'M':
- scoreGap = PPA.m_scoreGapOpen;
- break;
- case 'D':
-// scoreGap = g_scoreGapExtend;
- scoreGap = 0;
- break;
- case 'I':
- Quit("FastScorePath2 DI");
- }
- break;
- }
-
- case 'I':
- {
- if (0 == uPrefixLengthB)
- Quit("FastScorePath2, I zero length");
-
- const ProfPos &PPB = PB[uPrefixLengthB - 1];
- bGap = true;
- switch (cPrevType)
- {
- case 'S':
- scoreGap = PPB.m_scoreGapOpen;
- break;
- case 'M':
- scoreGap = PPB.m_scoreGapOpen;
- break;
- case 'I':
- scoreGap = 0;
-// scoreGap = g_scoreGapExtend;
- break;
- case 'D':
- Quit("FastScorePath2 DI");
- }
- break;
- }
-
- case 'U':
- {
- Quit("FastScorePath2 U");
- }
-
- default:
- Quit("FastScorePath2: invalid type %c", cType);
- }
-
- Log("%4u %c%c %4u %4u ", uEdgeIndex, cPrevType, cType,
- uPrefixLengthA, uPrefixLengthB);
- if (bMatch)
- Log("%7.1f ", scoreMatch);
- else
- Log(" ");
- if (bGap)
- Log("%7.1f ", scoreGap);
- else
- Log(" ");
- SCORE scoreEdge = scoreMatch + scoreGap;
- scoreTotal += scoreEdge;
- Log("%7.1f %7.1f", scoreEdge, scoreTotal);
- Log("\n");
- }
-
- SCORE scoreGap = 0;
-// if (!g_bTermGapsHalf)
- switch (cType)
- {
- case 'M':
- scoreGap = 0;
- break;
-
- case 'D':
- {
- const ProfPos &LastPPA = PA[uLengthA - 1];
- scoreGap = LastPPA.m_scoreGapClose;
- break;
- }
-
- case 'I':
- {
- const ProfPos &LastPPB = PB[uLengthB - 1];
- scoreGap = LastPPB.m_scoreGapClose;
- break;
- }
-
- case 'U':
- Quit("Unaligned regions not supported");
-
- case 'S':
- break;
-
- default:
- Quit("Invalid type %c", cType);
- }
-
- Log(" %cE %4u %4u %7.1f\n", cType, uLengthA, uLengthB, scoreGap);
- scoreTotal += scoreGap;
-
- Log("Total = %g\n", scoreTotal);
- return scoreTotal;
- }
Deleted: trunk/packages/muscle/trunk/finddiags.cpp
===================================================================
--- trunk/packages/muscle/trunk/finddiags.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/finddiags.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,161 +0,0 @@
-#include "muscle.h"
-#include "profile.h"
-#include "diaglist.h"
-
-#define TRACE 0
-
-const unsigned KTUP = 5;
-const unsigned KTUPS = 6*6*6*6*6;
-static unsigned TuplePos[KTUPS];
-
-static char *TupleToStr(int t)
- {
- static char s[7];
- int t1, t2, t3, t4, t5;
-
- t1 = t%6;
- t2 = (t/6)%6;
- t3 = (t/(6*6))%6;
- t4 = (t/(6*6*6))%6;
- t5 = (t/(6*6*6*6))%6;
-
- s[4] = '0' + t1;
- s[3] = '0' + t2;
- s[2] = '0' + t3;
- s[1] = '0' + t4;
- s[0] = '0' + t5;
- return s;
- }
-
-static unsigned GetTuple(const ProfPos *PP, unsigned uPos)
- {
- const unsigned t0 = PP[uPos].m_uResidueGroup;
- if (RESIDUE_GROUP_MULTIPLE == t0)
- return EMPTY;
-
- const unsigned t1 = PP[uPos+1].m_uResidueGroup;
- if (RESIDUE_GROUP_MULTIPLE == t1)
- return EMPTY;
-
- const unsigned t2 = PP[uPos+2].m_uResidueGroup;
- if (RESIDUE_GROUP_MULTIPLE == t2)
- return EMPTY;
-
- const unsigned t3 = PP[uPos+3].m_uResidueGroup;
- if (RESIDUE_GROUP_MULTIPLE == t3)
- return EMPTY;
-
- const unsigned t4 = PP[uPos+4].m_uResidueGroup;
- if (RESIDUE_GROUP_MULTIPLE == t4)
- return EMPTY;
-
- return t0 + t1*6 + t2*6*6 + t3*6*6*6 + t4*6*6*6*6;
- }
-
-void FindDiags(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
- unsigned uLengthY, DiagList &DL)
- {
- if (ALPHA_Amino != g_Alpha)
- Quit("FindDiags: requires amino acid alphabet");
-
- DL.Clear();
-
- if (uLengthX < 12 || uLengthY < 12)
- return;
-
-// Set A to shorter profile, B to longer
- const ProfPos *PA;
- const ProfPos *PB;
- unsigned uLengthA;
- unsigned uLengthB;
- bool bSwap;
- if (uLengthX < uLengthY)
- {
- bSwap = false;
- PA = PX;
- PB = PY;
- uLengthA = uLengthX;
- uLengthB = uLengthY;
- }
- else
- {
- bSwap = true;
- PA = PY;
- PB = PX;
- uLengthA = uLengthY;
- uLengthB = uLengthX;
- }
-
-// Build tuple map for the longer profile, B
- if (uLengthB < KTUP)
- Quit("FindDiags: profile too short");
-
- memset(TuplePos, EMPTY, sizeof(TuplePos));
-
- for (unsigned uPos = 0; uPos < uLengthB - KTUP; ++uPos)
- {
- const unsigned uTuple = GetTuple(PB, uPos);
- if (EMPTY == uTuple)
- continue;
- TuplePos[uTuple] = uPos;
- }
-
-// Find matches
- for (unsigned uPosA = 0; uPosA < uLengthA - KTUP; ++uPosA)
- {
- const unsigned uTuple = GetTuple(PA, uPosA);
- if (EMPTY == uTuple)
- continue;
- const unsigned uPosB = TuplePos[uTuple];
- if (EMPTY == uPosB)
- continue;
-
- // This tuple is found in both profiles
- unsigned uStartPosA = uPosA;
- unsigned uStartPosB = uPosB;
-
- // Try to extend the match forwards
- unsigned uEndPosA = uPosA + KTUP - 1;
- unsigned uEndPosB = uPosB + KTUP - 1;
- for (;;)
- {
- if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)
- break;
- const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;
- if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)
- break;
- const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;
- if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)
- break;
- if (uAAGroupA != uAAGroupB)
- break;
- ++uEndPosA;
- ++uEndPosB;
- }
- uPosA = uEndPosA;
-
-#if TRACE
- {
- Log("Match: A %4u-%4u ", uStartPosA, uEndPosA);
- for (unsigned n = uStartPosA; n <= uEndPosA; ++n)
- Log("%c", 'A' + PA[n].m_uResidueGroup);
- Log("\n");
- Log(" B %4u-%4u ", uStartPosB, uEndPosB);
- for (unsigned n = uStartPosB; n <= uEndPosB; ++n)
- Log("%c", 'A' + PB[n].m_uResidueGroup);
- Log("\n");
- }
-#endif
-
- const unsigned uLength = uEndPosA - uStartPosA + 1;
- assert(uEndPosB - uStartPosB + 1 == uLength);
-
- if (uLength >= g_uMinDiagLength)
- {
- if (bSwap)
- DL.Add(uStartPosB, uStartPosA, uLength);
- else
- DL.Add(uStartPosA, uStartPosB, uLength);
- }
- }
- }
Deleted: trunk/packages/muscle/trunk/finddiagsn.cpp
===================================================================
--- trunk/packages/muscle/trunk/finddiagsn.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/finddiagsn.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,152 +0,0 @@
-#include "muscle.h"
-#include "profile.h"
-#include "diaglist.h"
-
-#define TRACE 0
-
-#define pow4(i) (1 << (2*i)) // 4^i = 2^(2*i)
-const unsigned K = 7;
-const unsigned KTUPS = pow4(K);
-static unsigned TuplePos[KTUPS];
-
-static char *TupleToStr(int t)
- {
- static char s[K];
-
- for (int i = 0; i < K; ++i)
- {
- unsigned Letter = (t/(pow4(i)))%4;
- assert(Letter >= 0 && Letter < 4);
- s[K-i-1] = LetterToChar(Letter);
- }
-
- return s;
- }
-
-static unsigned GetTuple(const ProfPos *PP, unsigned uPos)
- {
- unsigned t = 0;
-
- for (unsigned i = 0; i < K; ++i)
- {
- const unsigned uLetter = PP[uPos+i].m_uResidueGroup;
- if (RESIDUE_GROUP_MULTIPLE == uLetter)
- return EMPTY;
- t = t*4 + uLetter;
- }
-
- return t;
- }
-
-void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
- unsigned uLengthY, DiagList &DL)
- {
- if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)
- Quit("FindDiagsNuc: requires nucleo alphabet");
-
- DL.Clear();
-
-// 16 is arbitrary slop, no principled reason for this.
- if (uLengthX < K + 16 || uLengthY < K + 16)
- return;
-
-// Set A to shorter profile, B to longer
- const ProfPos *PA;
- const ProfPos *PB;
- unsigned uLengthA;
- unsigned uLengthB;
- bool bSwap;
- if (uLengthX < uLengthY)
- {
- bSwap = false;
- PA = PX;
- PB = PY;
- uLengthA = uLengthX;
- uLengthB = uLengthY;
- }
- else
- {
- bSwap = true;
- PA = PY;
- PB = PX;
- uLengthA = uLengthY;
- uLengthB = uLengthX;
- }
-
-#if TRACE
- Log("FindDiagsNuc(LengthA=%d LengthB=%d\n", uLengthA, uLengthB);
-#endif
-
-// Build tuple map for the longer profile, B
- if (uLengthB < K)
- Quit("FindDiags: profile too short");
-
- memset(TuplePos, EMPTY, sizeof(TuplePos));
-
- for (unsigned uPos = 0; uPos < uLengthB - K; ++uPos)
- {
- const unsigned uTuple = GetTuple(PB, uPos);
- if (EMPTY == uTuple)
- continue;
- TuplePos[uTuple] = uPos;
- }
-
-// Find matches
- for (unsigned uPosA = 0; uPosA < uLengthA - K; ++uPosA)
- {
- const unsigned uTuple = GetTuple(PA, uPosA);
- if (EMPTY == uTuple)
- continue;
- const unsigned uPosB = TuplePos[uTuple];
- if (EMPTY == uPosB)
- continue;
-
- // This tuple is found in both profiles
- unsigned uStartPosA = uPosA;
- unsigned uStartPosB = uPosB;
-
- // Try to extend the match forwards
- unsigned uEndPosA = uPosA + K - 1;
- unsigned uEndPosB = uPosB + K - 1;
- for (;;)
- {
- if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)
- break;
- const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;
- if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)
- break;
- const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;
- if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)
- break;
- if (uAAGroupA != uAAGroupB)
- break;
- ++uEndPosA;
- ++uEndPosB;
- }
- uPosA = uEndPosA;
-
-#if TRACE
- {
- Log("Match: A %4u-%4u ", uStartPosA, uEndPosA);
- for (unsigned n = uStartPosA; n <= uEndPosA; ++n)
- Log("%c", LetterToChar(PA[n].m_uResidueGroup));
- Log("\n");
- Log(" B %4u-%4u ", uStartPosB, uEndPosB);
- for (unsigned n = uStartPosB; n <= uEndPosB; ++n)
- Log("%c", LetterToChar(PB[n].m_uResidueGroup));
- Log("\n");
- }
-#endif
-
- const unsigned uLength = uEndPosA - uStartPosA + 1;
- assert(uEndPosB - uStartPosB + 1 == uLength);
-
- if (uLength >= g_uMinDiagLength)
- {
- if (bSwap)
- DL.Add(uStartPosB, uStartPosA, uLength);
- else
- DL.Add(uStartPosA, uStartPosB, uLength);
- }
- }
- }
Deleted: trunk/packages/muscle/trunk/gapscoredimer.h
===================================================================
--- trunk/packages/muscle/trunk/gapscoredimer.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/gapscoredimer.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,69 +0,0 @@
-// source code generated by dimer.py
-
-static SCORE GapScoreMM(const ProfPos &PPA, const ProfPos &PPB)
- {
- return
- g_scoreGapOpen*(PPA.m_LL*PPB.m_LG + PPA.m_LG*PPB.m_LL + PPA.m_LG*PPB.m_GL + PPA.m_GL*PPB.m_LG) +
- g_scoreGapExtend*(PPA.m_LL*PPB.m_GG + PPA.m_GG*PPB.m_LL) +
- g_scoreGapAmbig*(PPA.m_GL*PPB.m_GG + PPA.m_GG*PPB.m_GL);
- }
-
-static SCORE GapScoreMD(const ProfPos &PPA, const ProfPos &PPB)
- {
- return
- g_scoreGapOpen*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_GL + PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_GL) +
- g_scoreGapExtend*(PPA.m_LL*PPB.m_LG + PPA.m_LL*PPB.m_GG) +
- g_scoreGapAmbig*(PPA.m_GL*PPB.m_LG + PPA.m_GL*PPB.m_GG);
- }
-
-static SCORE GapScoreMI(const ProfPos &PPA, const ProfPos &PPB)
- {
- return
- g_scoreGapOpen*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_GL + PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_GL) +
- g_scoreGapExtend*(PPA.m_LG*PPB.m_LL + PPA.m_GG*PPB.m_LL) +
- g_scoreGapAmbig*(PPA.m_LG*PPB.m_GL + PPA.m_GG*PPB.m_GL);
- }
-
-static SCORE GapScoreDM(const ProfPos &PPA, const ProfPos &PPB)
- {
- return
- g_scoreGapOpen*(PPA.m_LG*PPB.m_LL + PPA.m_LG*PPB.m_GL) +
- g_scoreGapExtend*(PPA.m_LL*PPB.m_LG + PPA.m_LL*PPB.m_GG) +
- g_scoreGapAmbig*(PPA.m_GL*PPB.m_LG + PPA.m_GL*PPB.m_GG + PPA.m_GG*PPB.m_LL + PPA.m_GG*PPB.m_GL);
- }
-
-static SCORE GapScoreDD(const ProfPos &PPA, const ProfPos &PPB)
- {
- return
- g_scoreGapExtend*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_LG + PPA.m_LL*PPB.m_GL + PPA.m_LL*PPB.m_GG) +
- g_scoreGapAmbig*(PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_LG + PPA.m_GL*PPB.m_GL + PPA.m_GL*PPB.m_GG);
- }
-
-static SCORE GapScoreDI(const ProfPos &PPA, const ProfPos &PPB)
- {
- return
- g_scoreGapOpen*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_GL + PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_GL) +
- g_scoreGapAmbig*(PPA.m_LG*PPB.m_LL + PPA.m_LG*PPB.m_GL + PPA.m_GG*PPB.m_LL + PPA.m_GG*PPB.m_GL);
- }
-
-static SCORE GapScoreIM(const ProfPos &PPA, const ProfPos &PPB)
- {
- return
- g_scoreGapOpen*(PPA.m_LL*PPB.m_LG + PPA.m_GL*PPB.m_LG) +
- g_scoreGapExtend*(PPA.m_LG*PPB.m_LL + PPA.m_GG*PPB.m_LL) +
- g_scoreGapAmbig*(PPA.m_LL*PPB.m_GG + PPA.m_LG*PPB.m_GL + PPA.m_GL*PPB.m_GG + PPA.m_GG*PPB.m_GL);
- }
-
-static SCORE GapScoreID(const ProfPos &PPA, const ProfPos &PPB)
- {
- return
- g_scoreGapOpen*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_GL + PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_GL) +
- g_scoreGapAmbig*(PPA.m_LL*PPB.m_LG + PPA.m_LL*PPB.m_GG + PPA.m_GL*PPB.m_LG + PPA.m_GL*PPB.m_GG);
- }
-
-static SCORE GapScoreII(const ProfPos &PPA, const ProfPos &PPB)
- {
- return
- g_scoreGapExtend*(PPA.m_LL*PPB.m_LL + PPA.m_LG*PPB.m_LL + PPA.m_GL*PPB.m_LL + PPA.m_GG*PPB.m_LL) +
- g_scoreGapAmbig*(PPA.m_LL*PPB.m_GL + PPA.m_LG*PPB.m_GL + PPA.m_GL*PPB.m_GL + PPA.m_GG*PPB.m_GL);
- }
Deleted: trunk/packages/muscle/trunk/glbalign.cpp
===================================================================
--- trunk/packages/muscle/trunk/glbalign.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/glbalign.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,165 +0,0 @@
-#include "muscle.h"
-#include "pwpath.h"
-#include "timing.h"
-#include "textfile.h"
-#include "msa.h"
-#include "profile.h"
-
-#if !VER_3_52
-
-#define COMPARE_SIMPLE 0
-
-#if TIMING
-TICKS g_ticksDP = 0;
-#endif
-
-#if 1
-extern bool g_bKeepSimpleDP;
-SCORE NWSmall(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path);
-SCORE NWDASmall(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path);
-SCORE NWDASimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path);
-SCORE NWDASimple2(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path);
-SCORE GlobalAlignSimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path);
-
-SCORE GlobalAlignNoDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path)
- {
- return GlobalAlign(PA, uLengthA, PB, uLengthB, Path);
- }
-
-#if COMPARE_SIMPLE
-
-SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path)
- {
-#if TIMING
- TICKS t1 = GetClockTicks();
-#endif
- g_bKeepSimpleDP = true;
- PWPath SimplePath;
- GlobalAlignSimple(PA, uLengthA, PB, uLengthB, SimplePath);
-
- SCORE Score = NWSmall(PA, uLengthA, PB, uLengthB, Path);
-
- if (!Path.Equal(SimplePath))
- {
- Log("Simple:\n");
- SimplePath.LogMe();
- Log("Small:\n");
- Path.LogMe();
- Quit("Paths differ");
- }
-
-#if TIMING
- TICKS t2 = GetClockTicks();
- g_ticksDP += (t2 - t1);
-#endif
- return Score;
- }
-
-#else // COMPARE_SIMPLE
-
-SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path)
- {
-#if TIMING
- TICKS t1 = GetClockTicks();
-#endif
- SCORE Score = NWSmall(PA, uLengthA, PB, uLengthB, Path);
-#if TIMING
- TICKS t2 = GetClockTicks();
- g_ticksDP += (t2 - t1);
-#endif
- return Score;
- }
-
-#endif
-
-#else // 1
-
-static void AllInserts(PWPath &Path, unsigned uLengthB)
- {
- Path.Clear();
- PWEdge Edge;
- Edge.cType = 'I';
- Edge.uPrefixLengthA = 0;
- for (unsigned uPrefixLengthB = 1; uPrefixLengthB <= uLengthB; ++uPrefixLengthB)
- {
- Edge.uPrefixLengthB = uPrefixLengthB;
- Path.AppendEdge(Edge);
- }
- }
-
-static void AllDeletes(PWPath &Path, unsigned uLengthA)
- {
- Path.Clear();
- PWEdge Edge;
- Edge.cType = 'D';
- Edge.uPrefixLengthB = 0;
- for (unsigned uPrefixLengthA = 1; uPrefixLengthA <= uLengthA; ++uPrefixLengthA)
- {
- Edge.uPrefixLengthA = uPrefixLengthA;
- Path.AppendEdge(Edge);
- }
- }
-
-SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path)
- {
-#if TIMING
- TICKS t1 = GetClockTicks();
-#endif
- if (0 == uLengthA)
- {
- AllInserts(Path, uLengthB);
- return 0;
- }
- else if (0 == uLengthB)
- {
- AllDeletes(Path, uLengthA);
- return 0;
- }
-
- SCORE Score = 0;
- if (g_bDiags)
- Score = GlobalAlignDiags(PA, uLengthA, PB, uLengthB, Path);
- else
- Score = GlobalAlignNoDiags(PA, uLengthA, PB, uLengthB, Path);
-#if TIMING
- TICKS t2 = GetClockTicks();
- g_ticksDP += (t2 - t1);
-#endif
- return Score;
- }
-
-SCORE GlobalAlignNoDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path)
- {
- if (g_bDimer)
- return GlobalAlignDimer(PA, uLengthA, PB, uLengthB, Path);
-
- switch (g_PPScore)
- {
- case PPSCORE_LE:
- return GlobalAlignLE(PA, uLengthA, PB, uLengthB, Path);
-
- case PPSCORE_SP:
- case PPSCORE_SV:
- return GlobalAlignSP(PA, uLengthA, PB, uLengthB, Path);
-
- case PPSCORE_SPN:
- return GlobalAlignSPN(PA, uLengthA, PB, uLengthB, Path);
- }
-
- Quit("Invalid PP score (GlobalAlignNoDiags)");
- return 0;
- }
-
-#endif
-
-#endif // !VER_3_52
Deleted: trunk/packages/muscle/trunk/glbalign352.cpp
===================================================================
--- trunk/packages/muscle/trunk/glbalign352.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/glbalign352.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,55 +0,0 @@
-#include "muscle.h"
-#include "pwpath.h"
-#include "timing.h"
-#include "textfile.h"
-#include "msa.h"
-#include "profile.h"
-
-#if VER_3_52
-
-#if TIMING
-TICKS g_ticksDP = 0;
-#endif
-
-SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path)
- {
-#if TIMING
- TICKS t1 = GetClockTicks();
-#endif
- SCORE Score = 0;
- if (g_bDiags)
- Score = GlobalAlignDiags(PA, uLengthA, PB, uLengthB, Path);
- else
- Score = GlobalAlignNoDiags(PA, uLengthA, PB, uLengthB, Path);
-#if TIMING
- TICKS t2 = GetClockTicks();
- g_ticksDP += (t2 - t1);
-#endif
- return Score;
- }
-
-SCORE GlobalAlignNoDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path)
- {
- if (g_bDimer)
- return GlobalAlignDimer(PA, uLengthA, PB, uLengthB, Path);
-
- switch (g_PPScore)
- {
- case PPSCORE_LE:
- return GlobalAlignLE(PA, uLengthA, PB, uLengthB, Path);
-
- case PPSCORE_SP:
- case PPSCORE_SV:
- return GlobalAlignSP(PA, uLengthA, PB, uLengthB, Path);
-
- case PPSCORE_SPN:
- return GlobalAlignSPN(PA, uLengthA, PB, uLengthB, Path);
- }
-
- Quit("Invalid PP score (GlobalAlignNoDiags)");
- return 0;
- }
-
-#endif // VER_3_52
Deleted: trunk/packages/muscle/trunk/glbaligndiag.cpp
===================================================================
--- trunk/packages/muscle/trunk/glbaligndiag.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/glbaligndiag.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,172 +0,0 @@
-#include "muscle.h"
-#include "dpreglist.h"
-#include "diaglist.h"
-#include "pwpath.h"
-#include "profile.h"
-#include "timing.h"
-
-#define TRACE 0
-#define TRACE_PATH 0
-#define LIST_DIAGS 0
-
-static double g_dDPAreaWithoutDiags = 0.0;
-static double g_dDPAreaWithDiags = 0.0;
-
-static void OffsetPath(PWPath &Path, unsigned uOffsetA, unsigned uOffsetB)
- {
- const unsigned uEdgeCount = Path.GetEdgeCount();
- for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
-
- // Nasty hack -- poke new values back into path, circumventing class
- PWEdge &NonConstEdge = (PWEdge &) Edge;
- NonConstEdge.uPrefixLengthA += uOffsetA;
- NonConstEdge.uPrefixLengthB += uOffsetB;
- }
- }
-
-static void DiagToPath(const Diag &d, PWPath &Path)
- {
- Path.Clear();
- const unsigned uLength = d.m_uLength;
- for (unsigned i = 0; i < uLength; ++i)
- {
- PWEdge Edge;
- Edge.cType = 'M';
- Edge.uPrefixLengthA = d.m_uStartPosA + i + 1;
- Edge.uPrefixLengthB = d.m_uStartPosB + i + 1;
- Path.AppendEdge(Edge);
- }
- }
-
-static void AppendRegPath(PWPath &Path, const PWPath &RegPath)
- {
- const unsigned uRegEdgeCount = RegPath.GetEdgeCount();
- for (unsigned uRegEdgeIndex = 0; uRegEdgeIndex < uRegEdgeCount; ++uRegEdgeIndex)
- {
- const PWEdge &RegEdge = RegPath.GetEdge(uRegEdgeIndex);
- Path.AppendEdge(RegEdge);
- }
- }
-
-SCORE GlobalAlignDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path)
- {
-#if LIST_DIAGS
- TICKS t1 = GetClockTicks();
-#endif
-
- DiagList DL;
-
- if (ALPHA_Amino == g_Alpha)
- FindDiags(PA, uLengthA, PB, uLengthB, DL);
- else if (ALPHA_DNA == g_Alpha || ALPHA_RNA == g_Alpha)
- FindDiagsNuc(PA, uLengthA, PB, uLengthB, DL);
- else
- Quit("GlobalAlignDiags: bad alpha");
-
-#if TRACE
- Log("GlobalAlignDiags, diag list:\n");
- DL.LogMe();
-#endif
-
- DL.Sort();
- DL.DeleteIncompatible();
-
-#if TRACE
- Log("After DeleteIncompatible:\n");
- DL.LogMe();
-#endif
-
- MergeDiags(DL);
-
-#if TRACE
- Log("After MergeDiags:\n");
- DL.LogMe();
-#endif
-
- DPRegionList RL;
- DiagListToDPRegionList(DL, RL, uLengthA, uLengthB);
-
-#if TRACE
- Log("RegionList:\n");
- RL.LogMe();
-#endif
-
-#if LIST_DIAGS
- {
- TICKS t2 = GetClockTicks();
- unsigned uArea = RL.GetDPArea();
- Log("ticks=%ld\n", (long) (t2 - t1));
- Log("area=%u\n", uArea);
- }
-#endif
-
- g_dDPAreaWithoutDiags += uLengthA*uLengthB;
-
- double dDPAreaWithDiags = 0.0;
- const unsigned uRegionCount = RL.GetCount();
- for (unsigned uRegionIndex = 0; uRegionIndex < uRegionCount; ++uRegionIndex)
- {
- const DPRegion &r = RL.Get(uRegionIndex);
-
- PWPath RegPath;
- if (DPREGIONTYPE_Diag == r.m_Type)
- {
- DiagToPath(r.m_Diag, RegPath);
-#if TRACE_PATH
- Log("DiagToPath, path=\n");
- RegPath.LogMe();
-#endif
- }
- else if (DPREGIONTYPE_Rect == r.m_Type)
- {
- const unsigned uRegStartPosA = r.m_Rect.m_uStartPosA;
- const unsigned uRegStartPosB = r.m_Rect.m_uStartPosB;
- const unsigned uRegLengthA = r.m_Rect.m_uLengthA;
- const unsigned uRegLengthB = r.m_Rect.m_uLengthB;
- const ProfPos *RegPA = PA + uRegStartPosA;
- const ProfPos *RegPB = PB + uRegStartPosB;
-
- dDPAreaWithDiags += uRegLengthA*uRegLengthB;
- GlobalAlignNoDiags(RegPA, uRegLengthA, RegPB, uRegLengthB, RegPath);
-#if TRACE_PATH
- Log("GlobalAlignNoDiags RegPath=\n");
- RegPath.LogMe();
-#endif
- OffsetPath(RegPath, uRegStartPosA, uRegStartPosB);
-#if TRACE_PATH
- Log("After offset path, RegPath=\n");
- RegPath.LogMe();
-#endif
- }
- else
- Quit("GlobalAlignDiags, Invalid region type %u", r.m_Type);
-
- AppendRegPath(Path, RegPath);
-#if TRACE_PATH
- Log("After AppendPath, path=");
- Path.LogMe();
-#endif
- }
-
-#if TRACE
- {
- double dDPAreaWithoutDiags = uLengthA*uLengthB;
- Log("DP area with diags %.3g without %.3g pct saved %.3g %%\n",
- dDPAreaWithDiags, dDPAreaWithoutDiags, (1.0 - dDPAreaWithDiags/dDPAreaWithoutDiags)*100.0);
- }
-#endif
- g_dDPAreaWithDiags += dDPAreaWithDiags;
- return 0;
- }
-
-void ListDiagSavings()
- {
- if (!g_bVerbose || !g_bDiags)
- return;
- double dAreaSaved = g_dDPAreaWithoutDiags - g_dDPAreaWithDiags;
- double dPct = dAreaSaved*100.0/g_dDPAreaWithoutDiags;
- Log("DP area saved by diagonals %-4.1f%%\n", dPct);
- }
Deleted: trunk/packages/muscle/trunk/glbalignle.cpp
===================================================================
--- trunk/packages/muscle/trunk/glbalignle.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/glbalignle.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,435 +0,0 @@
-#include "muscle.h"
-#include "profile.h"
-#include "pwpath.h"
-
-#define OCC 1
-
-struct DP_MEMORY
- {
- unsigned uLength;
- SCORE *GapOpenA;
- SCORE *GapOpenB;
- SCORE *GapCloseA;
- SCORE *GapCloseB;
- SCORE *MPrev;
- SCORE *MCurr;
- SCORE *MWork;
- SCORE *DPrev;
- SCORE *DCurr;
- SCORE *DWork;
- SCORE **ScoreMxB;
-#if OCC
- FCOUNT *OccA;
- FCOUNT *OccB;
-#endif
- unsigned **SortOrderA;
- unsigned *uDeletePos;
- FCOUNT **FreqsA;
- int **TraceBack;
- };
-
-static struct DP_MEMORY DPM;
-
-static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
- {
-// Max prefix length
- unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
- if (uLength < DPM.uLength)
- return;
-
-// Add 256 to allow for future expansion and
-// round up to next multiple of 32.
- uLength += 256;
- uLength += 32 - uLength%32;
-
- const unsigned uOldLength = DPM.uLength;
- if (uOldLength > 0)
- {
- for (unsigned i = 0; i < uOldLength; ++i)
- {
- delete[] DPM.TraceBack[i];
- delete[] DPM.FreqsA[i];
- delete[] DPM.SortOrderA[i];
- }
- for (unsigned n = 0; n < 20; ++n)
- delete[] DPM.ScoreMxB[n];
-
- delete[] DPM.MPrev;
- delete[] DPM.MCurr;
- delete[] DPM.MWork;
- delete[] DPM.DPrev;
- delete[] DPM.DCurr;
- delete[] DPM.DWork;
- delete[] DPM.uDeletePos;
- delete[] DPM.GapOpenA;
- delete[] DPM.GapOpenB;
- delete[] DPM.GapCloseA;
- delete[] DPM.GapCloseB;
- delete[] DPM.SortOrderA;
- delete[] DPM.FreqsA;
- delete[] DPM.ScoreMxB;
- delete[] DPM.TraceBack;
-#if OCC
- delete[] DPM.OccA;
- delete[] DPM.OccB;
-#endif
- }
-
- DPM.uLength = uLength;
-
- DPM.GapOpenA = new SCORE[uLength];
- DPM.GapOpenB = new SCORE[uLength];
- DPM.GapCloseA = new SCORE[uLength];
- DPM.GapCloseB = new SCORE[uLength];
-#if OCC
- DPM.OccA = new FCOUNT[uLength];
- DPM.OccB = new FCOUNT[uLength];
-#endif
-
- DPM.SortOrderA = new unsigned*[uLength];
- DPM.FreqsA = new FCOUNT*[uLength];
- DPM.ScoreMxB = new SCORE*[20];
- DPM.MPrev = new SCORE[uLength];
- DPM.MCurr = new SCORE[uLength];
- DPM.MWork = new SCORE[uLength];
-
- DPM.DPrev = new SCORE[uLength];
- DPM.DCurr = new SCORE[uLength];
- DPM.DWork = new SCORE[uLength];
- DPM.uDeletePos = new unsigned[uLength];
-
- DPM.TraceBack = new int*[uLength];
-
- for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
- DPM.ScoreMxB[uLetter] = new SCORE[uLength];
-
- for (unsigned i = 0; i < uLength; ++i)
- {
- DPM.SortOrderA[i] = new unsigned[20];
- DPM.FreqsA[i] = new FCOUNT[20];
- DPM.TraceBack[i] = new int[uLength];
- }
- }
-
-SCORE GlobalAlignLE(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path)
- {
- SetTermGaps(PA, uLengthA);
- SetTermGaps(PB, uLengthB);
-
- const unsigned uPrefixCountA = uLengthA + 1;
- const unsigned uPrefixCountB = uLengthB + 1;
-
- AllocDPMem(uLengthA, uLengthB);
-
- SCORE *GapOpenA = DPM.GapOpenA;
- SCORE *GapOpenB = DPM.GapOpenB;
- SCORE *GapCloseA = DPM.GapCloseA;
- SCORE *GapCloseB = DPM.GapCloseB;
-
- unsigned **SortOrderA = DPM.SortOrderA;
- FCOUNT **FreqsA = DPM.FreqsA;
- SCORE **ScoreMxB = DPM.ScoreMxB;
- SCORE *MPrev = DPM.MPrev;
- SCORE *MCurr = DPM.MCurr;
- SCORE *MWork = DPM.MWork;
-
- SCORE *DPrev = DPM.DPrev;
- SCORE *DCurr = DPM.DCurr;
- SCORE *DWork = DPM.DWork;
-
-#if OCC
- FCOUNT *OccA = DPM.OccA;
- FCOUNT *OccB = DPM.OccB;
-#endif
-
- unsigned *uDeletePos = DPM.uDeletePos;
-
- int **TraceBack = DPM.TraceBack;
-
- for (unsigned i = 0; i < uLengthA; ++i)
- {
- GapOpenA[i] = PA[i].m_scoreGapOpen;
- GapCloseA[i] = PA[i].m_scoreGapClose;
-#if OCC
- OccA[i] = PA[i].m_fOcc;
-#endif
-
- for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
- {
- SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
- FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
- }
- }
-
- for (unsigned j = 0; j < uLengthB; ++j)
- {
- GapOpenB[j] = PB[j].m_scoreGapOpen;
- GapCloseB[j] = PB[j].m_scoreGapClose;
-#if OCC
- OccB[j] = PB[j].m_fOcc;
-#endif
- }
-
- for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
- {
- for (unsigned j = 0; j < uLengthB; ++j)
- ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
- }
-
- for (unsigned i = 0; i < uPrefixCountA; ++i)
- memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
-
-// Special case for i=0
- unsigned **ptrSortOrderA = SortOrderA;
- FCOUNT **ptrFreqsA = FreqsA;
- assert(ptrSortOrderA == &(SortOrderA[0]));
- assert(ptrFreqsA == &(FreqsA[0]));
- TraceBack[0][0] = 0;
-
- SCORE scoreSum = 0;
- unsigned *ptrSortOrderAi = SortOrderA[0];
- const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
- FCOUNT *ptrFreqsAi = FreqsA[0];
- for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
- {
- const unsigned uLetter = *ptrSortOrderAi;
- const FCOUNT fcLetter = ptrFreqsAi[uLetter];
- if (0 == fcLetter)
- break;
- scoreSum += fcLetter*ScoreMxB[uLetter][0];
- }
- if (0 == scoreSum)
- MPrev[0] = -2.5;
- else
- {
-#if OCC
- MPrev[0] = (logf(scoreSum) - g_scoreCenter)*OccA[0]*OccB[0];
-#else
- MPrev[0] = (logf(scoreSum) - g_scoreCenter);
-#endif
- }
-
-// D(0,0) is -infinity (requires I->D).
- DPrev[0] = MINUS_INFINITY;
-
- for (unsigned j = 1; j < uLengthB; ++j)
- {
- // Only way to get M(0, j) looks like this:
- // A ----X
- // B XXXXX
- // 0 j
- // So gap-open at j=0, gap-close at j-1.
- SCORE scoreSum = 0;
- unsigned *ptrSortOrderAi = SortOrderA[0];
- const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
- FCOUNT *ptrFreqsAi = FreqsA[0];
- for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
- {
- const unsigned uLetter = *ptrSortOrderAi;
- const FCOUNT fcLetter = ptrFreqsAi[uLetter];
- if (0 == fcLetter)
- break;
- scoreSum += fcLetter*ScoreMxB[uLetter][j];
- }
- if (0 == scoreSum)
- MPrev[j] = -2.5;
- else
- {
-#if OCC
- MPrev[j] = (logf(scoreSum) - g_scoreCenter)*OccA[0]*OccB[j] +
- GapOpenB[0] + GapCloseB[j-1];
-#else
- MPrev[j] = (logf(scoreSum) - g_scoreCenter) +
- GapOpenB[0] + GapCloseB[j-1];
-#endif
- }
- TraceBack[0][j] = -(int) j;
-
- // Assume no D->I transitions, then can't be a delete if only
- // one letter from A.
- DPrev[j] = MINUS_INFINITY;
- }
-
- SCORE IPrev_j_1;
- for (unsigned i = 1; i < uLengthA; ++i)
- {
- ++ptrSortOrderA;
- ++ptrFreqsA;
- assert(ptrSortOrderA == &(SortOrderA[i]));
- assert(ptrFreqsA == &(FreqsA[i]));
-
- SCORE *ptrMCurr_j = MCurr;
- memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
- const FCOUNT *FreqsAi = *ptrFreqsA;
-
- const unsigned *SortOrderAi = *ptrSortOrderA;
- const unsigned *ptrSortOrderAiEnd = SortOrderAi + 20;
- const SCORE *ptrMCurrMax = MCurr + uLengthB;
- for (const unsigned *ptrSortOrderAi = SortOrderAi;
- ptrSortOrderAi != ptrSortOrderAiEnd;
- ++ptrSortOrderAi)
- {
- const unsigned uLetter = *ptrSortOrderAi;
- SCORE *NSBR_Letter = ScoreMxB[uLetter];
- const FCOUNT fcLetter = FreqsAi[uLetter];
- if (0 == fcLetter)
- break;
- SCORE *ptrNSBR = NSBR_Letter;
- for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
- *ptrMCurr += fcLetter*(*ptrNSBR++);
- }
-
-#if OCC
- const FCOUNT OccAi = OccA[i];
-#endif
- for (unsigned j = 0; j < uLengthB; ++j)
- {
- if (MCurr[j] == 0)
- MCurr[j] = -2.5;
- else
-#if OCC
- MCurr[j] = (logf(MCurr[j]) - g_scoreCenter)*OccAi*OccB[j];
-#else
- MCurr[j] = (logf(MCurr[j]) - g_scoreCenter);
-#endif
- }
-
- ptrMCurr_j = MCurr;
- unsigned *ptrDeletePos = uDeletePos;
-
- // Special case for j=0
- // Only way to get M(i, 0) looks like this:
- // 0 i
- // A XXXXX
- // B ----X
- // So gap-open at i=0, gap-close at i-1.
- assert(ptrMCurr_j == &(MCurr[0]));
- *ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];
-
- ++ptrMCurr_j;
-
- int *ptrTraceBack_ij = TraceBack[i];
- *ptrTraceBack_ij++ = (int) i;
-
- SCORE *ptrMPrev_j = MPrev;
- SCORE *ptrDPrev = DPrev;
- SCORE d = *ptrDPrev;
- SCORE DNew = *ptrMPrev_j + GapOpenA[i];
- if (DNew > d)
- {
- d = DNew;
- *ptrDeletePos = i;
- }
-
- SCORE *ptrDCurr = DCurr;
-
- assert(ptrDCurr == &(DCurr[0]));
- *ptrDCurr = d;
-
- // Can't have an insert if no letters from B
- IPrev_j_1 = MINUS_INFINITY;
-
- unsigned uInsertPos = 0;
- const SCORE scoreGapOpenAi = GapOpenA[i];
- const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];
-
- for (unsigned j = 1; j < uLengthB; ++j)
- {
- // Here, MPrev_j is preserved from previous
- // iteration so with current i,j is M[i-1][j-1]
- SCORE MPrev_j = *ptrMPrev_j;
- SCORE INew = MPrev_j + GapOpenB[j];
- if (INew > IPrev_j_1)
- {
- IPrev_j_1 = INew;
- uInsertPos = j;
- }
-
- SCORE scoreMax = MPrev_j;
-
- assert(ptrDPrev == &(DPrev[j-1]));
- SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
- if (scoreD > scoreMax)
- {
- scoreMax = scoreD;
- assert(ptrDeletePos == &(uDeletePos[j-1]));
- *ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
- assert(*ptrTraceBack_ij > 0);
- }
- ++ptrDeletePos;
-
- SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
- if (scoreI > scoreMax)
- {
- scoreMax = scoreI;
- *ptrTraceBack_ij = (int) uInsertPos - (int) j;
- assert(*ptrTraceBack_ij < 0);
- }
-
- assert(ptrSortOrderA == &(SortOrderA[i]));
- assert(ptrFreqsA == &(FreqsA[i]));
-
- *ptrMCurr_j += scoreMax;
- assert(ptrMCurr_j == &(MCurr[j]));
- ++ptrMCurr_j;
-
- MPrev_j = *(++ptrMPrev_j);
- assert(ptrDPrev == &(DPrev[j]));
- SCORE d = *ptrDPrev;
- SCORE DNew = MPrev_j + scoreGapOpenAi;
- if (DNew > d)
- {
- d = DNew;
- assert(ptrDeletePos == &uDeletePos[j]);
- *ptrDeletePos = i;
- }
- assert(ptrDCurr + 1 == &(DCurr[j]));
- *(++ptrDCurr) = d;
-
- ++ptrTraceBack_ij;
- }
-
- Rotate(MPrev, MCurr, MWork);
- Rotate(DPrev, DCurr, DWork);
- }
-
-// Special case for i=uLengthA
- SCORE IPrev = MINUS_INFINITY;
-
- unsigned uInsertPos;
-
- for (unsigned j = 1; j < uLengthB; ++j)
- {
- SCORE INew = MPrev[j-1] + GapOpenB[j];
- if (INew > IPrev)
- {
- uInsertPos = j;
- IPrev = INew;
- }
- }
-
-// Special case for i=uLengthA, j=uLengthB
- SCORE scoreMax = MPrev[uLengthB-1];
- int iTraceBack = 0;
-
- SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
- if (scoreD > scoreMax)
- {
- scoreMax = scoreD;
- iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
- }
-
- SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
- if (scoreI > scoreMax)
- {
- scoreMax = scoreI;
- iTraceBack = (int) uInsertPos - (int) uLengthB;
- }
-
- TraceBack[uLengthA][uLengthB] = iTraceBack;
-
- TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);
-
- return scoreMax;
- }
Deleted: trunk/packages/muscle/trunk/glbalignsimple.cpp
===================================================================
--- trunk/packages/muscle/trunk/glbalignsimple.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/glbalignsimple.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,368 +0,0 @@
-#include "muscle.h"
-#include <math.h>
-#include "pwpath.h"
-#include "profile.h"
-#include <stdio.h>
-
-#define TRACE 0
-
-#if 1 // SINGLE_AFFINE
-
-extern bool g_bKeepSimpleDP;
-extern SCORE *g_DPM;
-extern SCORE *g_DPD;
-extern SCORE *g_DPI;
-extern char *g_TBM;
-extern char *g_TBD;
-extern char *g_TBI;
-
-static const char *LocalScoreToStr(SCORE s)
- {
- static char str[16];
- if (s < -100000)
- return " *";
- sprintf(str, "%6.1f", s);
- return str;
- }
-
-static void ListTB(const char *TBM_, const ProfPos *PA, const ProfPos *PB,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- Log(" ");
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = ' ';
- if (uPrefixLengthB > 0)
- c = ConsensusChar(PB[uPrefixLengthB - 1]);
- Log(" %4u:%c", uPrefixLengthB, c);
- }
- Log("\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- Log(" %6c", TBM(uPrefixLengthA, uPrefixLengthB));
- Log("\n");
- }
- }
-
-static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- Log(" ");
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = ' ';
- if (uPrefixLengthB > 0)
- c = ConsensusChar(PB[uPrefixLengthB - 1]);
- Log(" %4u:%c", uPrefixLengthB, c);
- }
- Log("\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));
- Log("\n");
- }
- }
-
-SCORE GlobalAlignSimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path)
- {
- assert(uLengthB > 0 && uLengthA > 0);
-
- SetTermGaps(PA, uLengthA);
- SetTermGaps(PB, uLengthB);
-
- const unsigned uPrefixCountA = uLengthA + 1;
- const unsigned uPrefixCountB = uLengthB + 1;
-
-// Allocate DP matrices
- const size_t LM = uPrefixCountA*uPrefixCountB;
- SCORE *DPL_ = new SCORE[LM];
- SCORE *DPM_ = new SCORE[LM];
- SCORE *DPD_ = new SCORE[LM];
- SCORE *DPI_ = new SCORE[LM];
-
- char *TBM_ = new char[LM];
- char *TBD_ = new char[LM];
- char *TBI_ = new char[LM];
-
- memset(TBM_, '?', LM);
- memset(TBD_, '?', LM);
- memset(TBI_, '?', LM);
-
- DPM(0, 0) = 0;
- DPD(0, 0) = MINUS_INFINITY;
- DPI(0, 0) = MINUS_INFINITY;
-
- DPM(1, 0) = MINUS_INFINITY;
- DPD(1, 0) = PA[0].m_scoreGapOpen;
- TBD(1, 0) = 'D';
- DPI(1, 0) = MINUS_INFINITY;
-
- DPM(0, 1) = MINUS_INFINITY;
- DPD(0, 1) = MINUS_INFINITY;
- DPI(0, 1) = PB[0].m_scoreGapOpen;
- TBI(0, 1) = 'I';
-
-// Empty prefix of B is special case
- for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- // M=LetterA+LetterB, impossible with empty prefix
- DPM(uPrefixLengthA, 0) = MINUS_INFINITY;
-
- // D=LetterA+GapB
- DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) + g_scoreGapExtend;
- TBD(uPrefixLengthA, 0) = 'D';
-
- // I=GapA+LetterB, impossible with empty prefix
- DPI(uPrefixLengthA, 0) = MINUS_INFINITY;
- }
-
-// Empty prefix of A is special case
- for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- // M=LetterA+LetterB, impossible with empty prefix
- DPM(0, uPrefixLengthB) = MINUS_INFINITY;
-
- // D=LetterA+GapB, impossible with empty prefix
- DPD(0, uPrefixLengthB) = MINUS_INFINITY;
-
- // I=GapA+LetterB
- DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) + g_scoreGapExtend;
- TBI(0, uPrefixLengthB) = 'I';
- }
-
-// Special case to agree with NWFast, no D-I transitions so...
- DPD(uLengthA, 0) = MINUS_INFINITY;
-// DPI(0, uLengthB) = MINUS_INFINITY;
-
-// ============
-// Main DP loop
-// ============
- SCORE scoreGapCloseB = MINUS_INFINITY;
- for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- const ProfPos &PPB = PB[uPrefixLengthB - 1];
-
- SCORE scoreGapCloseA = MINUS_INFINITY;
- for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- const ProfPos &PPA = PA[uPrefixLengthA - 1];
-
- {
- // Match M=LetterA+LetterB
- SCORE scoreLL = ScoreProfPos2(PPA, PPB);
- DPL(uPrefixLengthA, uPrefixLengthB) = scoreLL;
-
- SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1);
- SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseA;
- SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseB;
-
- SCORE scoreBest;
- if (scoreMM >= scoreDM && scoreMM >= scoreIM)
- {
- scoreBest = scoreMM;
- TBM(uPrefixLengthA, uPrefixLengthB) = 'M';
- }
- else if (scoreDM >= scoreMM && scoreDM >= scoreIM)
- {
- scoreBest = scoreDM;
- TBM(uPrefixLengthA, uPrefixLengthB) = 'D';
- }
- else
- {
- assert(scoreIM >= scoreMM && scoreIM >= scoreDM);
- scoreBest = scoreIM;
- TBM(uPrefixLengthA, uPrefixLengthB) = 'I';
- }
- DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;
- }
-
- {
- // Delete D=LetterA+GapB
- SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) +
- PA[uPrefixLengthA-1].m_scoreGapOpen;
- SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + g_scoreGapExtend;
-
- SCORE scoreBest;
- if (scoreMD >= scoreDD)
- {
- scoreBest = scoreMD;
- TBD(uPrefixLengthA, uPrefixLengthB) = 'M';
- }
- else
- {
- assert(scoreDD >= scoreMD);
- scoreBest = scoreDD;
- TBD(uPrefixLengthA, uPrefixLengthB) = 'D';
- }
- DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;
- }
-
- // Insert I=GapA+LetterB
- {
- SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) +
- PB[uPrefixLengthB - 1].m_scoreGapOpen;
- SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + g_scoreGapExtend;
-
- SCORE scoreBest;
- if (scoreMI >= scoreII)
- {
- scoreBest = scoreMI;
- TBI(uPrefixLengthA, uPrefixLengthB) = 'M';
- }
- else
- {
- assert(scoreII > scoreMI);
- scoreBest = scoreII;
- TBI(uPrefixLengthA, uPrefixLengthB) = 'I';
- }
- DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;
- }
-
- scoreGapCloseA = PPA.m_scoreGapClose;
- }
- scoreGapCloseB = PPB.m_scoreGapClose;
- }
-
-#if TRACE
- Log("\n");
- Log("Simple DPL:\n");
- ListDP(DPL_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("\n");
- Log("Simple DPM:\n");
- ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("\n");
- Log("Simple DPD:\n");
- ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("\n");
- Log("Simple DPI:\n");
- ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("\n");
- Log("Simple TBM:\n");
- ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("\n");
- Log("Simple TBD:\n");
- ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("\n");
- Log("Simple TBI:\n");
- ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);
-#endif
-
-// Trace-back
-// ==========
- Path.Clear();
-
-// Find last edge
- SCORE M = DPM(uLengthA, uLengthB);
- SCORE D = DPD(uLengthA, uLengthB) + PA[uLengthA-1].m_scoreGapClose;
- SCORE I = DPI(uLengthA, uLengthB) + PB[uLengthB-1].m_scoreGapClose;
- char cEdgeType = '?';
-
- SCORE BestScore = MINUS_INFINITY;
- if (M >= D && M >= I)
- {
- cEdgeType = 'M';
- BestScore = M;
- }
- else if (D >= M && D >= I)
- {
- cEdgeType = 'D';
- BestScore = D;
- }
- else
- {
- assert(I >= M && I >= D);
- cEdgeType = 'I';
- BestScore = I;
- }
-
-#if TRACE
- Log("Simple: MAB=%.4g DAB=%.4g IAB=%.4g best=%c\n", M, D, I, cEdgeType);
-#endif
-
- unsigned PLA = uLengthA;
- unsigned PLB = uLengthB;
- for (;;)
- {
- PWEdge Edge;
- Edge.cType = cEdgeType;
- Edge.uPrefixLengthA = PLA;
- Edge.uPrefixLengthB = PLB;
-#if TRACE
- Log("Prepend %c%d.%d\n", Edge.cType, PLA, PLB);
-#endif
- Path.PrependEdge(Edge);
-
- switch (cEdgeType)
- {
- case 'M':
- assert(PLA > 0);
- assert(PLB > 0);
- cEdgeType = TBM(PLA, PLB);
- --PLA;
- --PLB;
- break;
-
- case 'D':
- assert(PLA > 0);
- cEdgeType = TBD(PLA, PLB);
- --PLA;
- break;
-
- case 'I':
- assert(PLB > 0);
- cEdgeType = TBI(PLA, PLB);
- --PLB;
- break;
-
- default:
- Quit("Invalid edge %c", cEdgeType);
- }
- if (0 == PLA && 0 == PLB)
- break;
- }
- Path.Validate();
-
-// SCORE Score = TraceBack(PA, uLengthA, PB, uLengthB, DPM_, DPD_, DPI_, Path);
-
-#if TRACE
- SCORE scorePath = FastScorePath2(PA, uLengthA, PB, uLengthB, Path);
- Path.LogMe();
- Log("Score = %s Path = %s\n", LocalScoreToStr(BestScore), LocalScoreToStr(scorePath));
-#endif
-
- if (g_bKeepSimpleDP)
- {
- g_DPM = DPM_;
- g_DPD = DPD_;
- g_DPI = DPI_;
-
- g_TBM = TBM_;
- g_TBD = TBD_;
- g_TBI = TBI_;
- }
- else
- {
- delete[] DPM_;
- delete[] DPD_;
- delete[] DPI_;
-
- delete[] TBM_;
- delete[] TBD_;
- delete[] TBI_;
- }
-
- return BestScore;
- }
-
-#endif // SINLGLE_AFFINE
Deleted: trunk/packages/muscle/trunk/glbalignsp.cpp
===================================================================
--- trunk/packages/muscle/trunk/glbalignsp.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/glbalignsp.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,374 +0,0 @@
-#include "muscle.h"
-#include "profile.h"
-#include "pwpath.h"
-
-struct DP_MEMORY
- {
- unsigned uLength;
- SCORE *GapOpenA;
- SCORE *GapOpenB;
- SCORE *GapCloseA;
- SCORE *GapCloseB;
- SCORE *MPrev;
- SCORE *MCurr;
- SCORE *MWork;
- SCORE *DPrev;
- SCORE *DCurr;
- SCORE *DWork;
- SCORE **ScoreMxB;
- unsigned **SortOrderA;
- unsigned *uDeletePos;
- FCOUNT **FreqsA;
- int **TraceBack;
- };
-
-static struct DP_MEMORY DPM;
-
-static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
- {
-// Max prefix length
- unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
- if (uLength < DPM.uLength)
- return;
-
-// Add 256 to allow for future expansion and
-// round up to next multiple of 32.
- uLength += 256;
- uLength += 32 - uLength%32;
-
- const unsigned uOldLength = DPM.uLength;
- if (uOldLength > 0)
- {
- for (unsigned i = 0; i < uOldLength; ++i)
- {
- delete[] DPM.TraceBack[i];
- delete[] DPM.FreqsA[i];
- delete[] DPM.SortOrderA[i];
- }
- for (unsigned n = 0; n < 20; ++n)
- delete[] DPM.ScoreMxB[n];
-
- delete[] DPM.MPrev;
- delete[] DPM.MCurr;
- delete[] DPM.MWork;
- delete[] DPM.DPrev;
- delete[] DPM.DCurr;
- delete[] DPM.DWork;
- delete[] DPM.uDeletePos;
- delete[] DPM.GapOpenA;
- delete[] DPM.GapOpenB;
- delete[] DPM.GapCloseA;
- delete[] DPM.GapCloseB;
- delete[] DPM.SortOrderA;
- delete[] DPM.FreqsA;
- delete[] DPM.ScoreMxB;
- delete[] DPM.TraceBack;
- }
-
- DPM.uLength = uLength;
-
- DPM.GapOpenA = new SCORE[uLength];
- DPM.GapOpenB = new SCORE[uLength];
- DPM.GapCloseA = new SCORE[uLength];
- DPM.GapCloseB = new SCORE[uLength];
-
- DPM.SortOrderA = new unsigned*[uLength];
- DPM.FreqsA = new FCOUNT*[uLength];
- DPM.ScoreMxB = new SCORE*[20];
- DPM.MPrev = new SCORE[uLength];
- DPM.MCurr = new SCORE[uLength];
- DPM.MWork = new SCORE[uLength];
-
- DPM.DPrev = new SCORE[uLength];
- DPM.DCurr = new SCORE[uLength];
- DPM.DWork = new SCORE[uLength];
- DPM.uDeletePos = new unsigned[uLength];
-
- DPM.TraceBack = new int*[uLength];
-
- for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
- DPM.ScoreMxB[uLetter] = new SCORE[uLength];
-
- for (unsigned i = 0; i < uLength; ++i)
- {
- DPM.SortOrderA[i] = new unsigned[20];
- DPM.FreqsA[i] = new FCOUNT[20];
- DPM.TraceBack[i] = new int[uLength];
- }
- }
-
-SCORE GlobalAlignSP(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path)
- {
- const unsigned uPrefixCountA = uLengthA + 1;
- const unsigned uPrefixCountB = uLengthB + 1;
-
- AllocDPMem(uLengthA, uLengthB);
-
- SCORE *GapOpenA = DPM.GapOpenA;
- SCORE *GapOpenB = DPM.GapOpenB;
- SCORE *GapCloseA = DPM.GapCloseA;
- SCORE *GapCloseB = DPM.GapCloseB;
-
- unsigned **SortOrderA = DPM.SortOrderA;
- FCOUNT **FreqsA = DPM.FreqsA;
- SCORE **ScoreMxB = DPM.ScoreMxB;
- SCORE *MPrev = DPM.MPrev;
- SCORE *MCurr = DPM.MCurr;
- SCORE *MWork = DPM.MWork;
-
- SCORE *DPrev = DPM.DPrev;
- SCORE *DCurr = DPM.DCurr;
- SCORE *DWork = DPM.DWork;
- unsigned *uDeletePos = DPM.uDeletePos;
-
- int **TraceBack = DPM.TraceBack;
-
- for (unsigned i = 0; i < uLengthA; ++i)
- {
- GapOpenA[i] = PA[i].m_scoreGapOpen;
- GapCloseA[i] = PA[i].m_scoreGapClose;
-
- for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
- {
- SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
- FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
- }
- }
-
- for (unsigned j = 0; j < uLengthB; ++j)
- {
- GapOpenB[j] = PB[j].m_scoreGapOpen;
- GapCloseB[j] = PB[j].m_scoreGapClose;
- }
-
- for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
- {
- for (unsigned j = 0; j < uLengthB; ++j)
- ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
- }
-
- for (unsigned i = 0; i < uPrefixCountA; ++i)
- memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
-
-// Special case for i=0
- unsigned **ptrSortOrderA = SortOrderA;
- FCOUNT **ptrFreqsA = FreqsA;
- assert(ptrSortOrderA == &(SortOrderA[0]));
- assert(ptrFreqsA == &(FreqsA[0]));
- TraceBack[0][0] = 0;
-
- SCORE scoreSum = 0;
- unsigned *ptrSortOrderAi = SortOrderA[0];
- const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
- FCOUNT *ptrFreqsAi = FreqsA[0];
- for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
- {
- const unsigned uLetter = *ptrSortOrderAi;
- const FCOUNT fcLetter = ptrFreqsAi[uLetter];
- if (0 == fcLetter)
- break;
- scoreSum += fcLetter*ScoreMxB[uLetter][0];
- }
- MPrev[0] = scoreSum - g_scoreCenter;
-
-// D(0,0) is -infinity (requires I->D).
- DPrev[0] = MINUS_INFINITY;
-
- for (unsigned j = 1; j < uLengthB; ++j)
- {
- // Only way to get M(0, j) looks like this:
- // A ----X
- // B XXXXX
- // 0 j
- // So gap-open at j=0, gap-close at j-1.
- SCORE scoreSum = 0;
- unsigned *ptrSortOrderAi = SortOrderA[0];
- const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
- FCOUNT *ptrFreqsAi = FreqsA[0];
- for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
- {
- const unsigned uLetter = *ptrSortOrderAi;
- const FCOUNT fcLetter = ptrFreqsAi[uLetter];
- if (0 == fcLetter)
- break;
- scoreSum += fcLetter*ScoreMxB[uLetter][j];
- }
- MPrev[j] = scoreSum - g_scoreCenter + GapOpenB[0] + GapCloseB[j-1];
- TraceBack[0][j] = -(int) j;
-
- // Assume no D->I transitions, then can't be a delete if only
- // one letter from A.
- DPrev[j] = MINUS_INFINITY;
- }
-
- SCORE IPrev_j_1;
- for (unsigned i = 1; i < uLengthA; ++i)
- {
- ++ptrSortOrderA;
- ++ptrFreqsA;
- assert(ptrSortOrderA == &(SortOrderA[i]));
- assert(ptrFreqsA == &(FreqsA[i]));
-
- SCORE *ptrMCurr_j = MCurr;
- memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
- const FCOUNT *FreqsAi = *ptrFreqsA;
-
- const unsigned *SortOrderAi = *ptrSortOrderA;
- const unsigned *ptrSortOrderAiEnd = SortOrderAi + 20;
- const SCORE *ptrMCurrMax = MCurr + uLengthB;
- for (const unsigned *ptrSortOrderAi = SortOrderAi;
- ptrSortOrderAi != ptrSortOrderAiEnd;
- ++ptrSortOrderAi)
- {
- const unsigned uLetter = *ptrSortOrderAi;
- SCORE *NSBR_Letter = ScoreMxB[uLetter];
- const FCOUNT fcLetter = FreqsAi[uLetter];
- if (0 == fcLetter)
- break;
- SCORE *ptrNSBR = NSBR_Letter;
- for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
- *ptrMCurr += fcLetter*(*ptrNSBR++);
- }
-
- for (unsigned j = 0; j < uLengthB; ++j)
- MCurr[j] -= g_scoreCenter;
-
- ptrMCurr_j = MCurr;
- unsigned *ptrDeletePos = uDeletePos;
-
- // Special case for j=0
- // Only way to get M(i, 0) looks like this:
- // 0 i
- // A XXXXX
- // B ----X
- // So gap-open at i=0, gap-close at i-1.
- assert(ptrMCurr_j == &(MCurr[0]));
- *ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];
-
- ++ptrMCurr_j;
-
- int *ptrTraceBack_ij = TraceBack[i];
- *ptrTraceBack_ij++ = (int) i;
-
- SCORE *ptrMPrev_j = MPrev;
- SCORE *ptrDPrev = DPrev;
- SCORE d = *ptrDPrev;
- SCORE DNew = *ptrMPrev_j + GapOpenA[i];
- if (DNew > d)
- {
- d = DNew;
- *ptrDeletePos = i;
- }
-
- SCORE *ptrDCurr = DCurr;
-
- assert(ptrDCurr == &(DCurr[0]));
- *ptrDCurr = d;
-
- // Can't have an insert if no letters from B
- IPrev_j_1 = MINUS_INFINITY;
-
- unsigned uInsertPos;
- const SCORE scoreGapOpenAi = GapOpenA[i];
- const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];
-
- for (unsigned j = 1; j < uLengthB; ++j)
- {
- // Here, MPrev_j is preserved from previous
- // iteration so with current i,j is M[i-1][j-1]
- SCORE MPrev_j = *ptrMPrev_j;
- SCORE INew = MPrev_j + GapOpenB[j];
- if (INew > IPrev_j_1)
- {
- IPrev_j_1 = INew;
- uInsertPos = j;
- }
-
- SCORE scoreMax = MPrev_j;
-
- assert(ptrDPrev == &(DPrev[j-1]));
- SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
- if (scoreD > scoreMax)
- {
- scoreMax = scoreD;
- assert(ptrDeletePos == &(uDeletePos[j-1]));
- *ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
- assert(*ptrTraceBack_ij > 0);
- }
- ++ptrDeletePos;
-
- SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
- if (scoreI > scoreMax)
- {
- scoreMax = scoreI;
- *ptrTraceBack_ij = (int) uInsertPos - (int) j;
- assert(*ptrTraceBack_ij < 0);
- }
-
- assert(ptrSortOrderA == &(SortOrderA[i]));
- assert(ptrFreqsA == &(FreqsA[i]));
-
- *ptrMCurr_j += scoreMax;
- assert(ptrMCurr_j == &(MCurr[j]));
- ++ptrMCurr_j;
-
- MPrev_j = *(++ptrMPrev_j);
- assert(ptrDPrev == &(DPrev[j]));
- SCORE d = *ptrDPrev;
- SCORE DNew = MPrev_j + scoreGapOpenAi;
- if (DNew > d)
- {
- d = DNew;
- assert(ptrDeletePos == &uDeletePos[j]);
- *ptrDeletePos = i;
- }
- assert(ptrDCurr + 1 == &(DCurr[j]));
- *(++ptrDCurr) = d;
-
- ++ptrTraceBack_ij;
- }
-
- Rotate(MPrev, MCurr, MWork);
- Rotate(DPrev, DCurr, DWork);
- }
-
-// Special case for i=uLengthA
- SCORE IPrev = MINUS_INFINITY;
-
- unsigned uInsertPos;
-
- for (unsigned j = 1; j < uLengthB; ++j)
- {
- SCORE INew = MPrev[j-1] + GapOpenB[j];
- if (INew > IPrev)
- {
- uInsertPos = j;
- IPrev = INew;
- }
- }
-
-// Special case for i=uLengthA, j=uLengthB
- SCORE scoreMax = MPrev[uLengthB-1];
- int iTraceBack = 0;
-
- SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
- if (scoreD > scoreMax)
- {
- scoreMax = scoreD;
- iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
- }
-
- SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
- if (scoreI > scoreMax)
- {
- scoreMax = scoreI;
- iTraceBack = (int) uInsertPos - (int) uLengthB;
- }
-
- TraceBack[uLengthA][uLengthB] = iTraceBack;
-
- TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);
-
- return scoreMax;
- }
Deleted: trunk/packages/muscle/trunk/glbalignspn.cpp
===================================================================
--- trunk/packages/muscle/trunk/glbalignspn.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/glbalignspn.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,409 +0,0 @@
-#include "muscle.h"
-#include "profile.h"
-#include "pwpath.h"
-
-struct DP_MEMORY
- {
- unsigned uLength;
- SCORE *GapOpenA;
- SCORE *GapOpenB;
- SCORE *GapCloseA;
- SCORE *GapCloseB;
- SCORE *MPrev;
- SCORE *MCurr;
- SCORE *MWork;
- SCORE *DPrev;
- SCORE *DCurr;
- SCORE *DWork;
- SCORE **ScoreMxB;
- unsigned **SortOrderA;
- unsigned *uDeletePos;
- FCOUNT **FreqsA;
- int **TraceBack;
- };
-
-static struct DP_MEMORY DPM;
-
-void FreeDPMemSPN()
- {
- const unsigned uOldLength = DPM.uLength;
- if (0 == uOldLength)
- return;
-
- for (unsigned i = 0; i < uOldLength; ++i)
- {
- delete[] DPM.TraceBack[i];
- delete[] DPM.FreqsA[i];
- delete[] DPM.SortOrderA[i];
- }
- for (unsigned n = 0; n < 4; ++n)
- delete[] DPM.ScoreMxB[n];
-
- delete[] DPM.MPrev;
- delete[] DPM.MCurr;
- delete[] DPM.MWork;
- delete[] DPM.DPrev;
- delete[] DPM.DCurr;
- delete[] DPM.DWork;
- delete[] DPM.uDeletePos;
- delete[] DPM.GapOpenA;
- delete[] DPM.GapOpenB;
- delete[] DPM.GapCloseA;
- delete[] DPM.GapCloseB;
- delete[] DPM.SortOrderA;
- delete[] DPM.FreqsA;
- delete[] DPM.ScoreMxB;
- delete[] DPM.TraceBack;
- }
-
-static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
- {
-// Max prefix length
- unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
- if (uLength < DPM.uLength)
- return;
-
-// Add 256 to allow for future expansion and
-// round up to next multiple of 32.
- uLength += 256;
- uLength += 32 - uLength%32;
-
- const unsigned uOldLength = DPM.uLength;
- if (uOldLength > 0)
- {
- for (unsigned i = 0; i < uOldLength; ++i)
- {
- delete[] DPM.TraceBack[i];
- delete[] DPM.FreqsA[i];
- delete[] DPM.SortOrderA[i];
- }
- for (unsigned n = 0; n < 4; ++n)
- delete[] DPM.ScoreMxB[n];
-
- delete[] DPM.MPrev;
- delete[] DPM.MCurr;
- delete[] DPM.MWork;
- delete[] DPM.DPrev;
- delete[] DPM.DCurr;
- delete[] DPM.DWork;
- delete[] DPM.uDeletePos;
- delete[] DPM.GapOpenA;
- delete[] DPM.GapOpenB;
- delete[] DPM.GapCloseA;
- delete[] DPM.GapCloseB;
- delete[] DPM.SortOrderA;
- delete[] DPM.FreqsA;
- delete[] DPM.ScoreMxB;
- delete[] DPM.TraceBack;
- }
-
- DPM.uLength = uLength;
-
- DPM.GapOpenA = new SCORE[uLength];
- DPM.GapOpenB = new SCORE[uLength];
- DPM.GapCloseA = new SCORE[uLength];
- DPM.GapCloseB = new SCORE[uLength];
-
- DPM.SortOrderA = new unsigned*[uLength];
- DPM.FreqsA = new FCOUNT*[uLength];
- DPM.ScoreMxB = new SCORE*[4];
- DPM.MPrev = new SCORE[uLength];
- DPM.MCurr = new SCORE[uLength];
- DPM.MWork = new SCORE[uLength];
-
- DPM.DPrev = new SCORE[uLength];
- DPM.DCurr = new SCORE[uLength];
- DPM.DWork = new SCORE[uLength];
- DPM.uDeletePos = new unsigned[uLength];
-
- DPM.TraceBack = new int*[uLength];
-
- for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
- DPM.ScoreMxB[uLetter] = new SCORE[uLength];
-
- for (unsigned i = 0; i < uLength; ++i)
- {
- DPM.SortOrderA[i] = new unsigned[4];
- DPM.FreqsA[i] = new FCOUNT[4];
- DPM.TraceBack[i] = new int[uLength];
- }
- }
-
-SCORE GlobalAlignSPN(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path)
- {
- if (ALPHA_DNA != g_Alpha || ALPHA_RNA == g_Alpha)
- Quit("GlobalAlignSPN: must be nucleo");
-
- const unsigned uPrefixCountA = uLengthA + 1;
- const unsigned uPrefixCountB = uLengthB + 1;
-
- AllocDPMem(uLengthA, uLengthB);
-
- SCORE *GapOpenA = DPM.GapOpenA;
- SCORE *GapOpenB = DPM.GapOpenB;
- SCORE *GapCloseA = DPM.GapCloseA;
- SCORE *GapCloseB = DPM.GapCloseB;
-
- unsigned **SortOrderA = DPM.SortOrderA;
- FCOUNT **FreqsA = DPM.FreqsA;
- SCORE **ScoreMxB = DPM.ScoreMxB;
- SCORE *MPrev = DPM.MPrev;
- SCORE *MCurr = DPM.MCurr;
- SCORE *MWork = DPM.MWork;
-
- SCORE *DPrev = DPM.DPrev;
- SCORE *DCurr = DPM.DCurr;
- SCORE *DWork = DPM.DWork;
- unsigned *uDeletePos = DPM.uDeletePos;
-
- int **TraceBack = DPM.TraceBack;
-
- for (unsigned i = 0; i < uLengthA; ++i)
- {
- GapOpenA[i] = PA[i].m_scoreGapOpen;
- GapCloseA[i] = PA[i].m_scoreGapClose;
-
- for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
- {
- SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
- FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
- }
- }
-
- for (unsigned j = 0; j < uLengthB; ++j)
- {
- GapOpenB[j] = PB[j].m_scoreGapOpen;
- GapCloseB[j] = PB[j].m_scoreGapClose;
- }
-
- for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
- {
- for (unsigned j = 0; j < uLengthB; ++j)
- ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
- }
-
- for (unsigned i = 0; i < uPrefixCountA; ++i)
- memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
-
-// Special case for i=0
- unsigned **ptrSortOrderA = SortOrderA;
- FCOUNT **ptrFreqsA = FreqsA;
- assert(ptrSortOrderA == &(SortOrderA[0]));
- assert(ptrFreqsA == &(FreqsA[0]));
- TraceBack[0][0] = 0;
-
- SCORE scoreSum = 0;
- unsigned *ptrSortOrderAi = SortOrderA[0];
- const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 4;
- FCOUNT *ptrFreqsAi = FreqsA[0];
- for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
- {
- const unsigned uLetter = *ptrSortOrderAi;
- const FCOUNT fcLetter = ptrFreqsAi[uLetter];
- if (0 == fcLetter)
- break;
- scoreSum += fcLetter*ScoreMxB[uLetter][0];
- }
- MPrev[0] = scoreSum - g_scoreCenter;
-
-// D(0,0) is -infinity (requires I->D).
- DPrev[0] = MINUS_INFINITY;
-
- for (unsigned j = 1; j < uLengthB; ++j)
- {
- // Only way to get M(0, j) looks like this:
- // A ----X
- // B XXXXX
- // 0 j
- // So gap-open at j=0, gap-close at j-1.
- SCORE scoreSum = 0;
- unsigned *ptrSortOrderAi = SortOrderA[0];
- const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 4;
- FCOUNT *ptrFreqsAi = FreqsA[0];
- for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
- {
- const unsigned uLetter = *ptrSortOrderAi;
- const FCOUNT fcLetter = ptrFreqsAi[uLetter];
- if (0 == fcLetter)
- break;
- scoreSum += fcLetter*ScoreMxB[uLetter][j];
- }
- MPrev[j] = scoreSum - g_scoreCenter + GapOpenB[0] + GapCloseB[j-1];
- TraceBack[0][j] = -(int) j;
-
- // Assume no D->I transitions, then can't be a delete if only
- // one letter from A.
- DPrev[j] = MINUS_INFINITY;
- }
-
- SCORE IPrev_j_1;
- for (unsigned i = 1; i < uLengthA; ++i)
- {
- ++ptrSortOrderA;
- ++ptrFreqsA;
- assert(ptrSortOrderA == &(SortOrderA[i]));
- assert(ptrFreqsA == &(FreqsA[i]));
-
- SCORE *ptrMCurr_j = MCurr;
- memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
- const FCOUNT *FreqsAi = *ptrFreqsA;
-
- const unsigned *SortOrderAi = *ptrSortOrderA;
- const unsigned *ptrSortOrderAiEnd = SortOrderAi + 4;
- const SCORE *ptrMCurrMax = MCurr + uLengthB;
- for (const unsigned *ptrSortOrderAi = SortOrderAi;
- ptrSortOrderAi != ptrSortOrderAiEnd;
- ++ptrSortOrderAi)
- {
- const unsigned uLetter = *ptrSortOrderAi;
- SCORE *NSBR_Letter = ScoreMxB[uLetter];
- const FCOUNT fcLetter = FreqsAi[uLetter];
- if (0 == fcLetter)
- break;
- SCORE *ptrNSBR = NSBR_Letter;
- for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
- *ptrMCurr += fcLetter*(*ptrNSBR++);
- }
-
- for (unsigned j = 0; j < uLengthB; ++j)
- MCurr[j] -= g_scoreCenter;
-
- ptrMCurr_j = MCurr;
- unsigned *ptrDeletePos = uDeletePos;
-
- // Special case for j=0
- // Only way to get M(i, 0) looks like this:
- // 0 i
- // A XXXXX
- // B ----X
- // So gap-open at i=0, gap-close at i-1.
- assert(ptrMCurr_j == &(MCurr[0]));
- *ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];
-
- ++ptrMCurr_j;
-
- int *ptrTraceBack_ij = TraceBack[i];
- *ptrTraceBack_ij++ = (int) i;
-
- SCORE *ptrMPrev_j = MPrev;
- SCORE *ptrDPrev = DPrev;
- SCORE d = *ptrDPrev;
- SCORE DNew = *ptrMPrev_j + GapOpenA[i];
- if (DNew > d)
- {
- d = DNew;
- *ptrDeletePos = i;
- }
-
- SCORE *ptrDCurr = DCurr;
-
- assert(ptrDCurr == &(DCurr[0]));
- *ptrDCurr = d;
-
- // Can't have an insert if no letters from B
- IPrev_j_1 = MINUS_INFINITY;
-
- unsigned uInsertPos;
- const SCORE scoreGapOpenAi = GapOpenA[i];
- const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];
-
- for (unsigned j = 1; j < uLengthB; ++j)
- {
- // Here, MPrev_j is preserved from previous
- // iteration so with current i,j is M[i-1][j-1]
- SCORE MPrev_j = *ptrMPrev_j;
- SCORE INew = MPrev_j + GapOpenB[j];
- if (INew > IPrev_j_1)
- {
- IPrev_j_1 = INew;
- uInsertPos = j;
- }
-
- SCORE scoreMax = MPrev_j;
-
- assert(ptrDPrev == &(DPrev[j-1]));
- SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
- if (scoreD > scoreMax)
- {
- scoreMax = scoreD;
- assert(ptrDeletePos == &(uDeletePos[j-1]));
- *ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
- assert(*ptrTraceBack_ij > 0);
- }
- ++ptrDeletePos;
-
- SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
- if (scoreI > scoreMax)
- {
- scoreMax = scoreI;
- *ptrTraceBack_ij = (int) uInsertPos - (int) j;
- assert(*ptrTraceBack_ij < 0);
- }
-
- assert(ptrSortOrderA == &(SortOrderA[i]));
- assert(ptrFreqsA == &(FreqsA[i]));
-
- *ptrMCurr_j += scoreMax;
- assert(ptrMCurr_j == &(MCurr[j]));
- ++ptrMCurr_j;
-
- MPrev_j = *(++ptrMPrev_j);
- assert(ptrDPrev == &(DPrev[j]));
- SCORE d = *ptrDPrev;
- SCORE DNew = MPrev_j + scoreGapOpenAi;
- if (DNew > d)
- {
- d = DNew;
- assert(ptrDeletePos == &uDeletePos[j]);
- *ptrDeletePos = i;
- }
- assert(ptrDCurr + 1 == &(DCurr[j]));
- *(++ptrDCurr) = d;
-
- ++ptrTraceBack_ij;
- }
-
- Rotate(MPrev, MCurr, MWork);
- Rotate(DPrev, DCurr, DWork);
- }
-
-// Special case for i=uLengthA
- SCORE IPrev = MINUS_INFINITY;
-
- unsigned uInsertPos;
-
- for (unsigned j = 1; j < uLengthB; ++j)
- {
- SCORE INew = MPrev[j-1] + GapOpenB[j];
- if (INew > IPrev)
- {
- uInsertPos = j;
- IPrev = INew;
- }
- }
-
-// Special case for i=uLengthA, j=uLengthB
- SCORE scoreMax = MPrev[uLengthB-1];
- int iTraceBack = 0;
-
- SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
- if (scoreD > scoreMax)
- {
- scoreMax = scoreD;
- iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
- }
-
- SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
- if (scoreI > scoreMax)
- {
- scoreMax = scoreI;
- iTraceBack = (int) uInsertPos - (int) uLengthB;
- }
-
- TraceBack[uLengthA][uLengthB] = iTraceBack;
-
- TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);
-
- return scoreMax;
- }
Deleted: trunk/packages/muscle/trunk/glbalignss.cpp
===================================================================
--- trunk/packages/muscle/trunk/glbalignss.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/glbalignss.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,318 +0,0 @@
-#include "muscle.h"
-#include "profile.h"
-#include "pwpath.h"
-#include "seq.h"
-
-extern SCOREMATRIX VTML_SP;
-
-// #define SUBST(i, j) Subst(seqA, seqB, i, j)
-#define SUBST(i, j) MxRowA[i][seqB.GetLetter(j)]
-
-static SCORE Subst(const Seq &seqA, const Seq &seqB, unsigned i, unsigned j)
- {
- assert(i < seqA.Length());
- assert(j < seqB.Length());
-
- unsigned uLetterA = seqA.GetLetter(i);
- unsigned uLetterB = seqB.GetLetter(j);
- return VTML_SP[uLetterA][uLetterB] + g_scoreCenter;
- }
-
-struct DP_MEMORY
- {
- unsigned uLength;
- SCORE *MPrev;
- SCORE *MCurr;
- SCORE *MWork;
- SCORE *DPrev;
- SCORE *DCurr;
- SCORE *DWork;
- SCORE **MxRowA;
- unsigned *LettersB;
- unsigned *uDeletePos;
- int **TraceBack;
- };
-
-static struct DP_MEMORY DPM;
-
-static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
- {
-// Max prefix length
- unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
- if (uLength < DPM.uLength)
- return;
-
-// Add 256 to allow for future expansion and
-// round up to next multiple of 32.
- uLength += 256;
- uLength += 32 - uLength%32;
-
- const unsigned uOldLength = DPM.uLength;
- if (uOldLength > 0)
- {
- for (unsigned i = 0; i < uOldLength; ++i)
- delete[] DPM.TraceBack[i];
-
- delete[] DPM.MPrev;
- delete[] DPM.MCurr;
- delete[] DPM.MWork;
- delete[] DPM.DPrev;
- delete[] DPM.DCurr;
- delete[] DPM.DWork;
- delete[] DPM.MxRowA;
- delete[] DPM.LettersB;
- delete[] DPM.uDeletePos;
- delete[] DPM.TraceBack;
- }
-
- DPM.uLength = uLength;
-
- DPM.MPrev = new SCORE[uLength];
- DPM.MCurr = new SCORE[uLength];
- DPM.MWork = new SCORE[uLength];
-
- DPM.DPrev = new SCORE[uLength];
- DPM.DCurr = new SCORE[uLength];
- DPM.DWork = new SCORE[uLength];
- DPM.MxRowA = new SCORE *[uLength];
- DPM.LettersB = new unsigned[uLength];
- DPM.uDeletePos = new unsigned[uLength];
-
- DPM.TraceBack = new int*[uLength];
-
- for (unsigned i = 0; i < uLength; ++i)
- DPM.TraceBack[i] = new int[uLength];
- }
-
-static void RowFromSeq(const Seq &s, SCORE *Row[])
- {
- const unsigned uLength = s.Length();
- for (unsigned i = 0; i < uLength; ++i)
- {
- char c = s.GetChar(i);
- unsigned uLetter = CharToLetter(c);
- if (uLetter < 20)
- Row[i] = VTML_SP[uLetter];
- else
- Row[i] = VTML_SP[AX_X];
- }
- }
-
-static void LettersFromSeq(const Seq &s, unsigned Letters[])
- {
- const unsigned uLength = s.Length();
- for (unsigned i = 0; i < uLength; ++i)
- {
- char c = s.GetChar(i);
- unsigned uLetter = CharToLetter(c);
- if (uLetter < 20)
- Letters[i] = uLetter;
- else
- Letters[i] = AX_X;
- }
- }
-
-SCORE GlobalAlignSS(const Seq &seqA, const Seq &seqB, PWPath &Path)
- {
- const unsigned uLengthA = seqA.Length();
- const unsigned uLengthB = seqB.Length();
- const unsigned uPrefixCountA = uLengthA + 1;
- const unsigned uPrefixCountB = uLengthB + 1;
-
- AllocDPMem(uLengthA, uLengthB);
-
- SCORE *MPrev = DPM.MPrev;
- SCORE *MCurr = DPM.MCurr;
- SCORE *MWork = DPM.MWork;
-
- SCORE *DPrev = DPM.DPrev;
- SCORE *DCurr = DPM.DCurr;
- SCORE *DWork = DPM.DWork;
- SCORE **MxRowA = DPM.MxRowA;
- unsigned *LettersB = DPM.LettersB;
-
- RowFromSeq(seqA, MxRowA);
- LettersFromSeq(seqB, LettersB);
-
- unsigned *uDeletePos = DPM.uDeletePos;
-
- int **TraceBack = DPM.TraceBack;
-
-#if DEBUG
- for (unsigned i = 0; i < uPrefixCountA; ++i)
- memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
-#endif
-
-// Special case for i=0
- TraceBack[0][0] = 0;
- MPrev[0] = MxRowA[0][LettersB[0]];
-
-// D(0,0) is -infinity (requires I->D).
- DPrev[0] = MINUS_INFINITY;
-
- for (unsigned j = 1; j < uLengthB; ++j)
- {
- unsigned uLetterB = LettersB[j];
-
- // Only way to get M(0, j) looks like this:
- // A ----X
- // B XXXXX
- // 0 j
- // So gap-open at j=0, gap-close at j-1.
- MPrev[j] = MxRowA[0][uLetterB] + g_scoreGapOpen/2; // term gaps half
- TraceBack[0][j] = -(int) j;
-
- // Assume no D->I transitions, then can't be a delete if only
- // one letter from A.
- DPrev[j] = MINUS_INFINITY;
- }
-
- SCORE IPrev_j_1;
- for (unsigned i = 1; i < uLengthA; ++i)
- {
- SCORE *ptrMCurr_j = MCurr;
- memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
-
- const SCORE *RowA = MxRowA[i];
- const SCORE *ptrRowA = MxRowA[i];
- const SCORE *ptrMCurrEnd = ptrMCurr_j + uLengthB;
- unsigned *ptrLettersB = LettersB;
- for (; ptrMCurr_j != ptrMCurrEnd; ++ptrMCurr_j)
- {
- *ptrMCurr_j = RowA[*ptrLettersB];
- ++ptrLettersB;
- }
-
- unsigned *ptrDeletePos = uDeletePos;
-
- // Special case for j=0
- // Only way to get M(i, 0) looks like this:
- // 0 i
- // A XXXXX
- // B ----X
- // So gap-open at i=0, gap-close at i-1.
- ptrMCurr_j = MCurr;
- assert(ptrMCurr_j == &(MCurr[0]));
- *ptrMCurr_j += g_scoreGapOpen/2; // term gaps half
-
- ++ptrMCurr_j;
-
- int *ptrTraceBack_ij = TraceBack[i];
- *ptrTraceBack_ij++ = (int) i;
-
- SCORE *ptrMPrev_j = MPrev;
- SCORE *ptrDPrev = DPrev;
- SCORE d = *ptrDPrev;
- SCORE DNew = *ptrMPrev_j + g_scoreGapOpen;
- if (DNew > d)
- {
- d = DNew;
- *ptrDeletePos = i;
- }
-
- SCORE *ptrDCurr = DCurr;
-
- assert(ptrDCurr == &(DCurr[0]));
- *ptrDCurr = d;
-
- // Can't have an insert if no letters from B
- IPrev_j_1 = MINUS_INFINITY;
-
- unsigned uInsertPos;
-
- for (unsigned j = 1; j < uLengthB; ++j)
- {
- // Here, MPrev_j is preserved from previous
- // iteration so with current i,j is M[i-1][j-1]
- SCORE MPrev_j = *ptrMPrev_j;
- SCORE INew = MPrev_j + g_scoreGapOpen;
- if (INew > IPrev_j_1)
- {
- IPrev_j_1 = INew;
- uInsertPos = j;
- }
-
- SCORE scoreMax = MPrev_j;
-
- assert(ptrDPrev == &(DPrev[j-1]));
- SCORE scoreD = *ptrDPrev++;
- if (scoreD > scoreMax)
- {
- scoreMax = scoreD;
- assert(ptrDeletePos == &(uDeletePos[j-1]));
- *ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
- assert(*ptrTraceBack_ij > 0);
- }
- ++ptrDeletePos;
-
- SCORE scoreI = IPrev_j_1;
- if (scoreI > scoreMax)
- {
- scoreMax = scoreI;
- *ptrTraceBack_ij = (int) uInsertPos - (int) j;
- assert(*ptrTraceBack_ij < 0);
- }
-
- *ptrMCurr_j += scoreMax;
- assert(ptrMCurr_j == &(MCurr[j]));
- ++ptrMCurr_j;
-
- MPrev_j = *(++ptrMPrev_j);
- assert(ptrDPrev == &(DPrev[j]));
- SCORE d = *ptrDPrev;
- SCORE DNew = MPrev_j + g_scoreGapOpen;
- if (DNew > d)
- {
- d = DNew;
- assert(ptrDeletePos == &uDeletePos[j]);
- *ptrDeletePos = i;
- }
- assert(ptrDCurr + 1 == &(DCurr[j]));
- *(++ptrDCurr) = d;
-
- ++ptrTraceBack_ij;
- }
-
- Rotate(MPrev, MCurr, MWork);
- Rotate(DPrev, DCurr, DWork);
- }
-
-// Special case for i=uLengthA
- SCORE IPrev = MINUS_INFINITY;
-
- unsigned uInsertPos;
-
- for (unsigned j = 1; j < uLengthB; ++j)
- {
- SCORE INew = MPrev[j-1];
- if (INew > IPrev)
- {
- uInsertPos = j;
- IPrev = INew;
- }
- }
-
-// Special case for i=uLengthA, j=uLengthB
- SCORE scoreMax = MPrev[uLengthB-1];
- int iTraceBack = 0;
-
- SCORE scoreD = DPrev[uLengthB-1] - g_scoreGapOpen/2; // term gaps half
- if (scoreD > scoreMax)
- {
- scoreMax = scoreD;
- iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
- }
-
- SCORE scoreI = IPrev - g_scoreGapOpen/2;
- if (scoreI > scoreMax)
- {
- scoreMax = scoreI;
- iTraceBack = (int) uInsertPos - (int) uLengthB;
- }
-
- TraceBack[uLengthA][uLengthB] = iTraceBack;
-
- TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);
-
- return scoreMax;
- }
Deleted: trunk/packages/muscle/trunk/glbalndimer.cpp
===================================================================
--- trunk/packages/muscle/trunk/glbalndimer.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/glbalndimer.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,390 +0,0 @@
-#include "muscle.h"
-#include <math.h>
-#include <stdio.h> // for sprintf
-#include "pwpath.h"
-#include "profile.h"
-#include "gapscoredimer.h"
-
-#define TRACE 0
-
-static SCORE TraceBackDimer( const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
- const char *TBM_, const char *TBD_, const char *TBI_,
- unsigned uLengthA, unsigned uLengthB, PWPath &Path);
-
-static const char *LocalScoreToStr(SCORE s)
- {
- static char str[16];
- if (MINUS_INFINITY == s)
- return " *";
- sprintf(str, "%6.3g", s);
- return str;
- }
-
-#if TRACE
-static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- Log(" ");
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = ' ';
- if (uPrefixLengthB > 0)
- c = ConsensusChar(PB[uPrefixLengthB - 1]);
- Log(" %4u:%c", uPrefixLengthB, c);
- }
- Log("\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));
- Log("\n");
- }
- }
-
-static void ListTB(const char *TBM_, const ProfPos *PA, const ProfPos *PB,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- Log(" ");
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- Log("%2d", uPrefixLengthB);
- Log("\n");
- Log(" ");
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = ' ';
- if (uPrefixLengthB > 0)
- c = ConsensusChar(PB[uPrefixLengthB - 1]);
- Log(" %c", c);
- }
- Log("\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- Log(" %c", TBM(uPrefixLengthA, uPrefixLengthB));
- Log("\n");
- }
- }
-#endif // TRACE
-
-static ProfPos PPTerm;
-static bool InitializePPTerm()
- {
- PPTerm.m_bAllGaps = false;
- PPTerm.m_LL = 1;
- PPTerm.m_LG = 0;
- PPTerm.m_GL = 0;
- PPTerm.m_GG = 0;
- PPTerm.m_fOcc = 1;
- return true;
- }
-static bool PPTermInitialized = InitializePPTerm();
-
-static SCORE ScoreProfPosDimerLE(const ProfPos &PPA, const ProfPos &PPB)
- {
- SCORE Score = 0;
- for (unsigned n = 0; n < 20; ++n)
- {
- const unsigned uLetter = PPA.m_uSortOrder[n];
- const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];
- if (0 == fcLetter)
- break;
- Score += fcLetter*PPB.m_AAScores[uLetter];
- }
- if (0 == Score)
- return -2.5;
- SCORE logScore = logf(Score);
- return (SCORE) (logScore*(PPA.m_fOcc * PPB.m_fOcc));
- }
-
-static SCORE ScoreProfPosDimerPSP(const ProfPos &PPA, const ProfPos &PPB)
- {
- SCORE Score = 0;
- for (unsigned n = 0; n < 20; ++n)
- {
- const unsigned uLetter = PPA.m_uSortOrder[n];
- const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];
- if (0 == fcLetter)
- break;
- Score += fcLetter*PPB.m_AAScores[uLetter];
- }
- return Score;
- }
-
-static SCORE ScoreProfPosDimer(const ProfPos &PPA, const ProfPos &PPB)
- {
- switch (g_PPScore)
- {
- case PPSCORE_LE:
- return ScoreProfPosDimerLE(PPA, PPB);
-
- case PPSCORE_SP:
- case PPSCORE_SV:
- return ScoreProfPosDimerPSP(PPA, PPB);
- }
- Quit("Invalid g_PPScore");
- return 0;
- }
-
-// Global alignment dynamic programming
-// This variant optimizes the profile-profile SP score under the
-// dimer approximation.
-SCORE GlobalAlignDimer(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path)
- {
- assert(uLengthB > 0 && uLengthA > 0);
-
- const unsigned uPrefixCountA = uLengthA + 1;
- const unsigned uPrefixCountB = uLengthB + 1;
-
-// Allocate DP matrices
- const size_t LM = uPrefixCountA*uPrefixCountB;
- SCORE *DPM_ = new SCORE[LM];
- SCORE *DPD_ = new SCORE[LM];
- SCORE *DPI_ = new SCORE[LM];
-
- char *TBM_ = new char[LM];
- char *TBD_ = new char[LM];
- char *TBI_ = new char[LM];
-
- DPM(0, 0) = 0;
- DPD(0, 0) = MINUS_INFINITY;
- DPI(0, 0) = MINUS_INFINITY;
-
- TBM(0, 0) = 'S';
- TBD(0, 0) = '?';
- TBI(0, 0) = '?';
-
- DPM(1, 0) = MINUS_INFINITY;
- DPD(1, 0) = GapScoreMD(PA[0], PPTerm);
- DPI(1, 0) = MINUS_INFINITY;
-
- TBM(1, 0) = '?';
- TBD(1, 0) = 'S';
- TBI(1, 0) = '?';
-
- DPM(0, 1) = MINUS_INFINITY;
- DPD(0, 1) = MINUS_INFINITY;
- DPI(0, 1) = GapScoreMI(PPTerm, PB[0]);
-
- TBM(0, 1) = '?';
- TBD(0, 1) = '?';
- TBI(0, 1) = 'S';
-
-// Empty prefix of B is special case
- for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- // M=LetterA+LetterB, impossible with empty prefix
- DPM(uPrefixLengthA, 0) = MINUS_INFINITY;
- TBM(uPrefixLengthA, 0) = '?';
-
- // D=LetterA+GapB
- DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) +
- GapScoreDD(PA[uPrefixLengthA - 1], PPTerm);
- TBD(uPrefixLengthA, 0) = 'D';
-
- // I=GapA+LetterB, impossible with empty prefix
- DPI(uPrefixLengthA, 0) = MINUS_INFINITY;
- TBI(uPrefixLengthA, 0) = '?';
- }
-
-// Empty prefix of A is special case
- for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- // M=LetterA+LetterB, impossible with empty prefix
- DPM(0, uPrefixLengthB) = MINUS_INFINITY;
- TBM(0, uPrefixLengthB) = '?';
-
- // D=LetterA+GapB, impossible with empty prefix
- DPD(0, uPrefixLengthB) = MINUS_INFINITY;
- TBD(0, uPrefixLengthB) = '?';
-
- // I=GapA+LetterB
- DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) +
- GapScoreII(PPTerm, PB[uPrefixLengthB - 1]);
- TBI(0, uPrefixLengthB) = 'I';
- }
-
-// ============
-// Main DP loop
-// ============
- for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- const ProfPos &PPB = PB[uPrefixLengthB - 1];
- for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- const ProfPos &PPA = PA[uPrefixLengthA - 1];
- {
- // Match M=LetterA+LetterB
- SCORE scoreLL = ScoreProfPosDimer(PPA, PPB);
-
- SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreMM(PPA, PPB);
- SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreDM(PPA, PPB);
- SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreIM(PPA, PPB);
-
- SCORE scoreBest = scoreMM;
- char c = 'M';
- if (scoreDM > scoreBest)
- {
- scoreBest = scoreDM;
- c = 'D';
- }
- if (scoreIM > scoreBest)
- {
- scoreBest = scoreIM;
- c = 'I';
- }
-
- DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;
- TBM(uPrefixLengthA, uPrefixLengthB) = c;
- }
- {
- // Delete D=LetterA+GapB
- SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) + GapScoreMD(PPA, PPB);
- SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + GapScoreDD(PPA, PPB);
- SCORE scoreID = DPI(uPrefixLengthA-1, uPrefixLengthB) + GapScoreID(PPA, PPB);
-
- SCORE scoreBest = scoreMD;
- char c = 'M';
- if (scoreDD > scoreBest)
- {
- scoreBest = scoreDD;
- c = 'D';
- }
- if (scoreID > scoreBest)
- {
- scoreBest = scoreID;
- c = 'I';
- }
-
- DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;
- TBD(uPrefixLengthA, uPrefixLengthB) = c;
- }
- {
- // Insert I=GapA+LetterB
- SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) + GapScoreMI(PPA, PPB);
- SCORE scoreDI = DPD(uPrefixLengthA, uPrefixLengthB-1) + GapScoreDI(PPA, PPB);
- SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + GapScoreII(PPA, PPB);
-
- SCORE scoreBest = scoreMI;
- char c = 'M';
- if (scoreDI > scoreBest)
- {
- scoreBest = scoreDI;
- c = 'D';
- }
- if (scoreII > scoreBest)
- {
- scoreBest = scoreII;
- c = 'I';
- }
-
- DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;
- TBI(uPrefixLengthA, uPrefixLengthB) = c;
- }
- }
- }
-
-#if TRACE
- Log("DPM:\n");
- ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("DPD:\n");
- ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("DPI:\n");
- ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("TBM:\n");
- ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("TBD:\n");
- ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("TBI:\n");
- ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);
-#endif
-
- SCORE Score = TraceBackDimer(DPM_, DPD_, DPI_, TBM_, TBD_, TBI_,
- uLengthA, uLengthB, Path);
-
-#if TRACE
- Log("GlobalAlignDimer score = %.3g\n", Score);
-#endif
-
- delete[] DPM_;
- delete[] DPD_;
- delete[] DPI_;
-
- delete[] TBM_;
- delete[] TBD_;
- delete[] TBI_;
-
- return Score;
- }
-
-static SCORE TraceBackDimer( const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
- const char *TBM_, const char *TBD_, const char *TBI_,
- unsigned uLengthA, unsigned uLengthB, PWPath &Path)
- {
- const unsigned uPrefixCountA = uLengthA + 1;
-
- unsigned uPrefixLengthA = uLengthA;
- unsigned uPrefixLengthB = uLengthB;
-
- char cEdge = 'M';
- SCORE scoreMax = DPM(uLengthA, uLengthB);
- if (DPD(uLengthA, uLengthB) > scoreMax)
- {
- scoreMax = DPD(uLengthA, uLengthB);
- cEdge = 'D';
- }
- if (DPI(uLengthA, uLengthB) > scoreMax)
- {
- scoreMax = DPI(uLengthA, uLengthB);
- cEdge = 'I';
- }
-
- for (;;)
- {
- if (0 == uPrefixLengthA && 0 == uPrefixLengthB)
- break;
-
- PWEdge Edge;
- Edge.cType = cEdge;
- Edge.uPrefixLengthA = uPrefixLengthA;
- Edge.uPrefixLengthB = uPrefixLengthB;
- Path.PrependEdge(Edge);
-
-#if TRACE
- Log("PLA=%u PLB=%u Edge=%c\n", uPrefixLengthA, uPrefixLengthB, cEdge);
-#endif
- switch (cEdge)
- {
- case 'M':
- assert(uPrefixLengthA > 0 && uPrefixLengthB > 0);
- cEdge = TBM(uPrefixLengthA, uPrefixLengthB);
- --uPrefixLengthA;
- --uPrefixLengthB;
- break;
- case 'D':
- assert(uPrefixLengthA > 0);
- cEdge = TBD(uPrefixLengthA, uPrefixLengthB);
- --uPrefixLengthA;
- break;
- case 'I':
- assert(uPrefixLengthB > 0);
- cEdge = TBI(uPrefixLengthA, uPrefixLengthB);
- --uPrefixLengthB;
- break;
- default:
- Quit("Invalid edge PLA=%u PLB=%u %c", uPrefixLengthA, uPrefixLengthB, cEdge);
- }
- }
-#if TRACE
- Path.LogMe();
-#endif
- return scoreMax;
- }
Deleted: trunk/packages/muscle/trunk/globals.cpp
===================================================================
--- trunk/packages/muscle/trunk/globals.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/globals.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,267 +0,0 @@
-#include "muscle.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#include <math.h>
-#include <assert.h>
-#include <time.h>
-#include <errno.h>
-
-#if WIN32
-#include <windows.h>
-#include <share.h>
-#endif
-
-#ifndef MAX_PATH
-#define MAX_PATH 260
-#endif
-
-static char g_strListFileName[MAX_PATH];
-static bool g_bListFileAppend = false;
-
-static SEQWEIGHT g_SeqWeight = SEQWEIGHT_Undefined;
-
-void SetSeqWeightMethod(SEQWEIGHT Method)
- {
- g_SeqWeight = Method;
- }
-
-SEQWEIGHT GetSeqWeightMethod()
- {
- return g_SeqWeight;
- }
-
-void SetListFileName(const char *ptrListFileName, bool bAppend)
- {
- assert(strlen(ptrListFileName) < MAX_PATH);
- strcpy(g_strListFileName, ptrListFileName);
- g_bListFileAppend = bAppend;
- }
-
-void Log(const char szFormat[], ...)
- {
- if (0 == g_strListFileName[0])
- return;
-
- static FILE *f = NULL;
- char *mode;
- if (g_bListFileAppend)
- mode = "a";
- else
- mode = "w";
- if (NULL == f)
- f = _fsopen(g_strListFileName, mode, _SH_DENYNO);
- if (NULL == f)
- {
- perror(g_strListFileName);
- exit(EXIT_NotStarted);
- }
-
- char szStr[4096];
- va_list ArgList;
- va_start(ArgList, szFormat);
- vsprintf(szStr, szFormat, ArgList);
- fprintf(f, "%s", szStr);
- fflush(f);
- }
-
-const char *GetTimeAsStr()
- {
- static char szStr[32];
- time_t t;
- time(&t);
- struct tm *ptmCurrentTime = localtime(&t);
- strcpy(szStr, asctime(ptmCurrentTime));
- assert('\n' == szStr[24]);
- szStr[24] = 0;
- return szStr;
- }
-
-// Exit immediately with error message, printf-style.
-void Quit(const char szFormat[], ...)
- {
- va_list ArgList;
- char szStr[4096];
-
- va_start(ArgList, szFormat);
- vsprintf(szStr, szFormat, ArgList);
-
- fprintf(stderr, "\n*** ERROR *** %s\n", szStr);
-
- Log("\n*** FATAL ERROR *** ");
- Log("%s\n", szStr);
- Log("Stopped %s\n", GetTimeAsStr());
-
-#ifdef WIN32
- if (IsDebuggerPresent())
- {
- int iBtn = MessageBox(NULL, szStr, "muscle", MB_ICONERROR | MB_OKCANCEL);
- if (IDCANCEL == iBtn)
- Break();
- }
-#endif
- exit(EXIT_FatalError);
- }
-
-void Warning(const char szFormat[], ...)
- {
- va_list ArgList;
- char szStr[4096];
-
- va_start(ArgList, szFormat);
- vsprintf(szStr, szFormat, ArgList);
-
- fprintf(stderr, "\n*** WARNING *** %s\n", szStr);
- Log("\n*** WARNING *** %s\n", szStr);
- }
-
-// Remove leading and trailing blanks from string
-void TrimBlanks(char szStr[])
- {
- TrimLeadingBlanks(szStr);
- TrimTrailingBlanks(szStr);
- }
-
-void TrimLeadingBlanks(char szStr[])
- {
- size_t n = strlen(szStr);
- while (szStr[0] == ' ')
- {
- memmove(szStr, szStr+1, n);
- szStr[--n] = 0;
- }
- }
-
-void TrimTrailingBlanks(char szStr[])
- {
- size_t n = strlen(szStr);
- while (n > 0 && szStr[n-1] == ' ')
- szStr[--n] = 0;
- }
-
-bool Verbose()
- {
- return true;
- }
-
-SCORE StrToScore(const char *pszStr)
- {
- return (SCORE) atof(pszStr);
- }
-
-void StripWhitespace(char szStr[])
- {
- unsigned uOutPos = 0;
- unsigned uInPos = 0;
- while (char c = szStr[uInPos++])
- if (' ' != c && '\t' != c && '\n' != c && '\r' != c)
- szStr[uOutPos++] = c;
- szStr[uOutPos] = 0;
- }
-
-void StripGaps(char szStr[])
- {
- unsigned uOutPos = 0;
- unsigned uInPos = 0;
- while (char c = szStr[uInPos++])
- if ('-' != c)
- szStr[uOutPos++] = c;
- szStr[uOutPos] = 0;
- }
-
-bool IsValidSignedInteger(const char *Str)
- {
- if (0 == strlen(Str))
- return false;
- if ('+' == *Str || '-' == *Str)
- ++Str;
- while (char c = *Str++)
- if (!isdigit(c))
- return false;
- return true;
- }
-
-bool IsValidInteger(const char *Str)
- {
- if (0 == strlen(Str))
- return false;
- while (char c = *Str++)
- if (!isdigit(c))
- return false;
- return true;
- }
-
-// Is c valid as first character in an identifier?
-bool isidentf(char c)
- {
- return isalpha(c) || '_' == c;
- }
-
-// Is c valid character in an identifier?
-bool isident(char c)
- {
- return isalpha(c) || isdigit(c) || '_' == c;
- }
-
-bool IsValidIdentifier(const char *Str)
- {
- if (!isidentf(Str[0]))
- return false;
- while (char c = *Str++)
- if (!isident(c))
- return false;
- return true;
- }
-
-void SetLogFile()
- {
- const char *strFileName = ValueOpt("loga");
- if (0 != strFileName)
- g_bListFileAppend = true;
- else
- strFileName = ValueOpt("log");
- if (0 == strFileName)
- return;
- strcpy(g_strListFileName, strFileName);
- }
-
-// Get filename, stripping any extension and directory parts.
-void NameFromPath(const char szPath[], char szName[], unsigned uBytes)
- {
- if (0 == uBytes)
- return;
- const char *pstrLastSlash = strrchr(szPath, '/');
- const char *pstrLastBackslash = strrchr(szPath, '\\');
- const char *pstrLastDot = strrchr(szPath, '.');
- const char *pstrLastSep = pstrLastSlash > pstrLastBackslash ?
- pstrLastSlash : pstrLastBackslash;
- const char *pstrBegin = pstrLastSep ? pstrLastSep + 1 : szPath;
- const char *pstrEnd = pstrLastDot ? pstrLastDot - 1 : szPath + strlen(szPath);
- unsigned uNameLength = (unsigned) (pstrEnd - pstrBegin + 1);
- if (uNameLength > uBytes - 1)
- uNameLength = uBytes - 1;
- memcpy(szName, pstrBegin, uNameLength);
- szName[uNameLength] = 0;
- }
-
-char *strsave(const char *s)
- {
- char *ptrCopy = strdup(s);
- if (0 == ptrCopy)
- Quit("Out of memory");
- return ptrCopy;
- }
-
-bool IsValidFloatChar(char c)
- {
- return isdigit(c) || '.' == c || 'e' == c || 'E' == c || 'd' == c ||
- 'D' == c || '.' == c || '+' == c || '-' == c;
- }
-
-void Call_MY_ASSERT(const char *file, int line, bool b, const char *msg)
- {
- if (b)
- return;
- Quit("%s(%d): MY_ASSERT(%s)", file, line, msg);
- }
Deleted: trunk/packages/muscle/trunk/globalslinux.cpp
===================================================================
--- trunk/packages/muscle/trunk/globalslinux.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/globalslinux.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,163 +0,0 @@
-#include "muscle.h"
-
-#ifndef WIN32
-#include <sys/time.h>
-#include <sys/resource.h>
-#include <unistd.h>
-#include <errno.h>
-#include <stdio.h>
-#include <fcntl.h>
-
-const int ONE_MB = 1000000;
-const int MEM_WARNING_THRESHOLD = 20*ONE_MB;
-
-double GetNAN()
- {
- static unsigned long nan[2]={0xffffffff, 0x7fffffff};
- double dNAN = *( double* )nan;
- return dNAN;
- }
-
-double g_dNAN = GetNAN();
-
-void chkmem(const char szMsg[])
- {
- //assert(_CrtCheckMemory());
- }
-
-void Break()
- {
- //DebugBreak();
- }
-
-static char szCmdLine[4096];
-
-void *ptrStartBreak = sbrk(0);
-
-const char *GetCmdLine()
- {
- return szCmdLine;
- }
-
-double GetMemUseMB()
- {
- static char statm[64];
- static int PageSize;
- if (0 == statm[0])
- {
- PageSize = sysconf(_SC_PAGESIZE);
- pid_t pid = getpid();
- sprintf(statm, "/proc/%d/statm", (int) pid);
- }
-
- int fd = open(statm, O_RDONLY);
- if (-1 == fd)
- return -1;
- char Buffer[64];
- int n = read(fd, Buffer, sizeof(Buffer) - 1);
- close(fd);
- fd = -1;
-
- if (n <= 0)
- {
- static bool Warned = false;
- if (!Warned)
- {
- Warned = true;
- Warning("*Warning* Cannot read %s errno=%d %s",
- statm, errno, strerror(errno));
- }
- return 0;
- }
- Buffer[n] = 0;
- int Pages = atoi(Buffer);
-
- return ((double) Pages * (double) PageSize)/1e6;
- }
-
-void SaveCmdLine(int argc, char *argv[])
- {
- for (int i = 0; i < argc; ++i)
- {
- if (i > 0)
- strcat(szCmdLine, " ");
- strcat(szCmdLine, argv[i]);
- }
- }
-
-double dPeakMemUseMB = 0;
-
-double GetPeakMemUseMB()
- {
- CheckMemUse();
- return dPeakMemUseMB;
- }
-
-double GetCPUGHz()
- {
- double dGHz = 2.5;
- const char *e = getenv("CPUGHZ");
- if (0 != e)
- dGHz = atof(e);
- return dGHz;
- }
-
-void CheckMemUse()
- {
- double dMB = GetMemUseMB();
- if (dMB > dPeakMemUseMB)
- dPeakMemUseMB = dMB;
- }
-
-double GetRAMSizeMB()
- {
- const double DEFAULT_RAM = 500;
- static double RAMMB = 0;
- if (RAMMB != 0)
- return RAMMB;
-
- int fd = open("/proc/meminfo", O_RDONLY);
- if (-1 == fd)
- {
- static bool Warned = false;
- if (!Warned)
- {
- Warned = true;
- Warning("*Warning* Cannot open /proc/meminfo errno=%d %s",
- errno, strerror(errno));
- }
- return DEFAULT_RAM;
- }
- char Buffer[1024];
- int n = read(fd, Buffer, sizeof(Buffer) - 1);
- close(fd);
- fd = -1;
-
- if (n <= 0)
- {
- static bool Warned = false;
- if (!Warned)
- {
- Warned = true;
- Warning("*Warning* Cannot read /proc/meminfo errno=%d %s",
- errno, strerror(errno));
- }
- return DEFAULT_RAM;
- }
- Buffer[n] = 0;
- char *pMem = strstr(Buffer, "MemTotal: ");
- if (0 == pMem)
- {
- static bool Warned = false;
- if (!Warned)
- {
- Warned = true;
- Warning("*Warning* 'MemTotal:' not found in /proc/meminfo");
- }
- return DEFAULT_RAM;
- }
- int Bytes = atoi(pMem+9)*1000;
- return ((double) Bytes)/1e6;
- }
-
-#endif // !WIN32
Deleted: trunk/packages/muscle/trunk/globalswin32.cpp
===================================================================
--- trunk/packages/muscle/trunk/globalswin32.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/globalswin32.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,100 +0,0 @@
-#include "muscle.h"
-
-#if WIN32
-#include <windows.h>
-#include <crtdbg.h>
-#include <psapi.h>
-#include <float.h>
-#include <stdio.h>
-
-void DebugPrintf(const char *szFormat, ...)
- {
- va_list ArgList;
- char szStr[4096];
-
- va_start(ArgList, szFormat);
- vsprintf(szStr, szFormat, ArgList);
-
- OutputDebugString(szStr);
- }
-
-double GetNAN()
- {
- static unsigned long nan[2]={0xffffffff, 0x7fffffff};
- double dNAN = *( double* )nan;
- assert(_isnan(dNAN));
- return dNAN;
- }
-
-double g_dNAN = GetNAN();
-
-void chkmem(const char szMsg[])
- {
- if (!_CrtCheckMemory())
- Quit("chkmem(%s)", szMsg);
- }
-
-void Break()
- {
- if (IsDebuggerPresent())
- DebugBreak();
- }
-
-const char *GetCmdLine()
- {
- return GetCommandLine();
- }
-
-static unsigned uPeakMemUseBytes;
-
-double GetRAMSizeMB()
- {
- MEMORYSTATUS MS;
- GlobalMemoryStatus(&MS);
- return MS.dwAvailPhys/1e6;
- }
-
-double GetMemUseMB()
- {
- HANDLE hProc = GetCurrentProcess();
- PROCESS_MEMORY_COUNTERS PMC;
- BOOL bOk = GetProcessMemoryInfo(hProc, &PMC, sizeof(PMC));
- assert(bOk);
- //printf("GetMemUseMB()\n");
- //printf("%12u PageFaultCount\n", (unsigned) PMC.PageFaultCount);
- //printf("%12u PagefileUsage\n", (unsigned) PMC.PagefileUsage);
- //printf("%12u PeakPagefileUsage\n", (unsigned) PMC.PeakPagefileUsage);
- //printf("%12u WorkingSetSize\n", (unsigned) PMC.WorkingSetSize);
- //printf("%12u PeakWorkingSetSize\n", (unsigned) PMC.PeakWorkingSetSize);
- //printf("%12u QuotaPagedPoolUsage\n", (unsigned) PMC.QuotaPagedPoolUsage);
- //printf("%12u QuotaPeakPagedPoolUsage\n", (unsigned) PMC.QuotaPeakPagedPoolUsage);
- //printf("%12u QuotaNonPagedPoolUsage\n", (unsigned) PMC.QuotaNonPagedPoolUsage);
- //printf("%12u QuotaPeakNonPagedPoolUsage\n", (unsigned) PMC.QuotaPeakNonPagedPoolUsage);
- unsigned uBytes = (unsigned) PMC.WorkingSetSize;
- if (uBytes > uPeakMemUseBytes)
- uPeakMemUseBytes = uBytes;
- return (uBytes + 500000.0)/1000000.0;
- }
-
-double GetPeakMemUseMB()
- {
- return (uPeakMemUseBytes + 500000.0)/1000000.0;
- }
-
-void CheckMemUse()
- {
-// Side-effect: sets peak usage in uPeakMemUseBytes
- GetMemUseMB();
- }
-
-double GetCPUGHz()
- {
- double dGHz = 2.5;
- const char *e = getenv("CPUGHZ");
- if (0 != e)
- dGHz = atof(e);
- if (dGHz < 0.1 || dGHz > 1000.0)
- Quit("Invalid value '%s' for environment variable CPUGHZ", e);
- return dGHz;
- }
-#endif // WIN32
Deleted: trunk/packages/muscle/trunk/gonnet.cpp
===================================================================
--- trunk/packages/muscle/trunk/gonnet.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/gonnet.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,499 +0,0 @@
-#include "muscle.h"
-#include "gonnet.h"
-
-#define ROW(A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
- { A/4.0, C/4.0, D/4.0, E/4.0, F/4.0, G/4.0, H/4.0, I/4.0, K/4.0, L/4.0, M/4.0, N/4.0, P/4.0, Q/4.0, R/4.0, S/4.0, T/4.0, V/4.0, W/4.0, Y/4.0 },
-
-static double Gonnet80[20][20] =
- {
-// A C D E F G H I K L
-// M N P Q R S T V W Y
-ROW( 1990, 1140, 930, 1070, 600, 1130, 850, 810, 940, 810,
- 980, 900, 1080, 1020, 880, 1380, 1190, 1180, 370, 590) // A
-
-ROW( 1140, 2780, 310, 300, 850, 630, 810, 700, 360, 690,
- 850, 690, 310, 480, 640, 1090, 900, 1030, 810, 920) // C
-
-ROW( 930, 310, 2200, 1550, 130, 980, 1070, 180, 1030, 150,
- 360, 1450, 820, 1150, 800, 1100, 1000, 350, 0, 550) // D
-
-ROW( 1070, 300, 1550, 2120, 220, 770, 1070, 510, 1280, 490,
- 710, 1110, 890, 1470, 1010, 1050, 970, 730, 260, 500) // E
-
-ROW( 600, 850, 130, 220, 2380, 90, 980, 1090, 350, 1310,
- 1270, 490, 310, 540, 340, 470, 620, 930, 1400, 1730) // F
-
-ROW( 1130, 630, 980, 770, 90, 2210, 710, 100, 740, 200,
- 410, 1060, 660, 800, 810, 1080, 720, 380, 430, 300) // G
-
-ROW( 850, 810, 1070, 1070, 980, 710, 2510, 600, 1120, 670,
- 860, 1330, 790, 1380, 1140, 990, 1000, 590, 810, 1450) // H
-
-ROW( 810, 700, 180, 510, 1090, 100, 600, 2100, 650, 1460,
- 1490, 530, 490, 640, 530, 620, 960, 1650, 610, 770) // I
-
-ROW( 940, 360, 1030, 1280, 350, 740, 1120, 650, 2090, 660,
- 870, 1220, 870, 1410, 1570, 1040, 1090, 700, 350, 640) // K
-
-ROW( 810, 690, 150, 490, 1310, 200, 670, 1460, 660, 2010,
- 1550, 450, 660, 850, 660, 600, 750, 1270, 800, 890) // L
-
-ROW( 980, 850, 360, 710, 1270, 410, 860, 1490, 870, 1550,
- 2410, 620, 460, 1050, 710, 830, 990, 1250, 790, 870) // M
-
-ROW( 900, 690, 1450, 1110, 490, 1060, 1330, 530, 1220, 450,
- 620, 2210, 760, 1180, 1020, 1290, 1170, 550, 380, 850) // N
-
-ROW( 1080, 310, 820, 890, 310, 660, 790, 490, 870, 660,
- 460, 760, 2380, 1000, 790, 1100, 1040, 670, 120, 480) // P
-
-ROW( 1020, 480, 1150, 1470, 540, 800, 1380, 640, 1410, 850,
- 1050, 1180, 1000, 2190, 1350, 1090, 1060, 730, 620, 710) // Q
-
-ROW( 880, 640, 800, 1010, 340, 810, 1140, 530, 1570, 660,
- 710, 1020, 790, 1350, 2210, 970, 970, 640, 830, 740) // R
-
-ROW( 1380, 1090, 1100, 1050, 470, 1080, 990, 620, 1040, 600,
- 830, 1290, 1100, 1090, 970, 2020, 1490, 810, 520, 780) // S
-
-ROW( 1190, 900, 1000, 970, 620, 720, 1000, 960, 1090, 750,
- 990, 1170, 1040, 1060, 970, 1490, 2050, 1150, 370, 660) // T
-
-ROW( 1180, 1030, 350, 730, 930, 380, 590, 1650, 700, 1270,
- 1250, 550, 670, 730, 640, 810, 1150, 2040, 440, 770) // V
-
-ROW( 370, 810, 0, 260, 1400, 430, 810, 610, 350, 800,
- 790, 380, 120, 620, 830, 520, 370, 440, 2970, 1470) // W
-
-ROW( 590, 920, 550, 500, 1730, 300, 1450, 770, 640, 890,
- 870, 850, 480, 710, 740, 780, 660, 770, 1470, 2470) // Y
- };
-
-static double Gonnet120[20][20] =
- {
-// A C D E F G H I K L
-// M N P Q R S T V W Y
-ROW( 1550, 950, 780, 870, 480, 930, 700, 690, 770, 660,
- 790, 760, 900, 840, 730, 1120, 980, 960, 280, 480) // A
-
-ROW( 950, 2400, 270, 280, 700, 510, 650, 600, 320, 570,
- 700, 550, 280, 400, 510, 890, 750, 850, 670, 760) // C
-
-ROW( 780, 270, 1780, 1310, 90, 820, 890, 160, 880, 140,
- 320, 1220, 680, 970, 690, 910, 830, 310, 0, 430) // D
-
-ROW( 870, 280, 1310, 1680, 180, 650, 900, 410, 1070, 390,
- 560, 950, 740, 1210, 860, 870, 810, 580, 180, 400) // E
-
-ROW( 480, 700, 90, 180, 1980, 40, 820, 930, 290, 1110,
- 1070, 380, 240, 430, 280, 380, 490, 790, 1230, 1510) // F
-
-ROW( 930, 510, 820, 650, 40, 1860, 590, 90, 620, 140,
- 310, 890, 550, 660, 660, 900, 610, 310, 300, 220) // G
-
-ROW( 700, 650, 890, 900, 820, 590, 2060, 480, 940, 540,
- 680, 1100, 650, 1130, 950, 820, 820, 490, 680, 1220) // H
-
-ROW( 690, 600, 160, 410, 930, 90, 480, 1680, 520, 1240,
- 1250, 410, 400, 530, 430, 520, 790, 1380, 500, 650) // I
-
-ROW( 770, 320, 880, 1070, 290, 620, 940, 520, 1650, 520,
- 690, 1010, 720, 1160, 1320, 860, 900, 570, 280, 520) // K
-
-ROW( 660, 570, 140, 390, 1110, 140, 540, 1240, 520, 1620,
- 1300, 350, 520, 660, 520, 490, 620, 1090, 670, 760) // L
-
-ROW( 790, 700, 320, 560, 1070, 310, 680, 1250, 690, 1300,
- 1910, 500, 400, 820, 580, 670, 800, 1060, 650, 740) // M
-
-ROW( 760, 550, 1220, 950, 380, 890, 1100, 410, 1010, 350,
- 500, 1760, 640, 970, 860, 1060, 960, 460, 280, 680) // N
-
-ROW( 900, 280, 680, 740, 240, 550, 650, 400, 720, 520,
- 400, 640, 2010, 820, 660, 910, 860, 540, 70, 370) // P
-
-ROW( 840, 400, 970, 1210, 430, 660, 1130, 530, 1160, 660,
- 820, 970, 820, 1700, 1120, 890, 870, 600, 470, 580) // Q
-
-ROW( 730, 510, 690, 860, 280, 660, 950, 430, 1320, 520,
- 580, 860, 660, 1120, 1790, 810, 800, 520, 660, 590) // R
-
-ROW( 1120, 890, 910, 870, 380, 900, 820, 520, 860, 490,
- 670, 1060, 910, 890, 810, 1560, 1220, 680, 390, 610) // S
-
-ROW( 980, 750, 830, 810, 490, 610, 820, 790, 900, 620,
- 800, 960, 860, 870, 800, 1220, 1600, 930, 290, 540) // T
-
-ROW( 960, 850, 310, 580, 790, 310, 490, 1380, 570, 1090,
- 1060, 460, 540, 600, 520, 680, 930, 1610, 370, 630) // V
-
-ROW( 280, 670, 0, 180, 1230, 300, 680, 500, 280, 670,
- 650, 280, 70, 470, 660, 390, 290, 370, 2620, 1290) // W
-
-ROW( 480, 760, 430, 400, 1510, 220, 1220, 650, 520, 760,
- 740, 680, 370, 580, 590, 610, 540, 630, 1290, 2070) // Y
- };
-
-static SCORE Gonnet160[20][20] =
- {
-// A C D E F G H I K L
-// M N P Q R S T V W Y
-ROW( 1240, 810, 670, 740, 400, 800, 600, 600, 660, 560,
- 660, 660, 770, 710, 620, 940, 830, 790, 230, 410) // A
-
-ROW( 810, 2130, 250, 260, 600, 440, 550, 530, 300, 490,
- 590, 470, 260, 360, 430, 760, 640, 720, 570, 650) // C
-
-ROW( 670, 250, 1480, 1120, 80, 710, 770, 160, 770, 130,
- 280, 1040, 590, 840, 620, 780, 720, 290, 0, 360) // D
-
-ROW( 740, 260, 1120, 1370, 160, 570, 770, 350, 910, 330,
- 470, 830, 640, 1010, 750, 750, 700, 480, 140, 340) // E
-
-ROW( 400, 600, 80, 160, 1690, 20, 710, 810, 250, 970,
- 920, 310, 200, 370, 250, 330, 420, 700, 1100, 1340) // F
-
-ROW( 800, 440, 710, 570, 20, 1600, 510, 80, 540, 110,
- 260, 760, 480, 570, 570, 770, 540, 260, 230, 180) // G
-
-ROW( 600, 550, 770, 770, 710, 510, 1710, 410, 800, 460,
- 570, 930, 560, 950, 810, 700, 700, 430, 590, 1050) // H
-
-ROW( 600, 530, 160, 350, 810, 80, 410, 1370, 430, 1080,
- 1070, 340, 350, 460, 370, 450, 660, 1180, 440, 580) // I
-
-ROW( 660, 300, 770, 910, 250, 540, 800, 430, 1330, 440,
- 570, 860, 620, 980, 1130, 740, 760, 480, 240, 430) // K
-
-ROW( 560, 490, 130, 330, 970, 110, 460, 1080, 440, 1350,
- 1120, 300, 430, 540, 430, 420, 540, 950, 580, 670) // L
-
-ROW( 660, 590, 280, 470, 920, 260, 570, 1070, 570, 1120,
- 1540, 420, 360, 660, 490, 550, 670, 920, 560, 650) // M
-
-ROW( 660, 470, 1040, 830, 310, 760, 930, 340, 860, 300,
- 420, 1430, 560, 830, 740, 890, 810, 400, 230, 560) // N
-
-ROW( 770, 260, 590, 640, 200, 480, 560, 350, 620, 430,
- 360, 560, 1740, 700, 570, 780, 740, 460, 40, 300) // P
-
-ROW( 710, 360, 840, 1010, 370, 570, 950, 460, 980, 540,
- 660, 830, 700, 1340, 950, 760, 740, 510, 380, 490) // Q
-
-ROW( 620, 430, 620, 750, 250, 570, 810, 370, 1130, 430,
- 490, 740, 570, 950, 1490, 690, 690, 440, 540, 490) // R
-
-ROW( 940, 760, 780, 750, 330, 770, 700, 450, 740, 420,
- 550, 890, 780, 760, 690, 1220, 1010, 580, 310, 500) // S
-
-ROW( 830, 640, 720, 700, 420, 540, 700, 660, 760, 540,
- 670, 810, 740, 740, 690, 1010, 1280, 780, 240, 460) // T
-
-ROW( 790, 720, 290, 480, 700, 260, 430, 1180, 480, 950,
- 920, 400, 460, 510, 440, 580, 780, 1310, 330, 540) // V
-
-ROW( 230, 570, 0, 140, 1100, 230, 590, 440, 240, 580,
- 560, 230, 40, 380, 540, 310, 240, 330, 2360, 1160) // W
-
-ROW( 410, 650, 360, 340, 1340, 180, 1050, 580, 430, 670,
- 650, 560, 300, 490, 490, 500, 460, 540, 1160, 1780) // Y
- };
-
-double Gonnet16[21][21] =
- {
-// A C D E F G H I K L
-// M N P Q R S T V W Y
-ROW( 124, 81, 67, 74, 40, 80, 60, 60, 66, 56,
- 66, 66, 77, 71, 62, 94, 83, 79, 23, 41) // A
-
-ROW( 81, 213, 25, 26, 60, 44, 55, 53, 30, 49,
- 59, 47, 26, 36, 43, 76, 64, 72, 57, 65) // C
-
-ROW( 67, 25, 148, 112, 8, 71, 77, 16, 77, 13,
- 28, 104, 59, 84, 62, 78, 72, 29, 0, 36) // D
-
-ROW( 74, 26, 112, 137, 16, 57, 77, 35, 91, 33,
- 47, 83, 64, 101, 75, 75, 70, 48, 14, 34) // E
-
-ROW( 40, 60, 8, 16, 169, 2, 71, 81, 25, 97,
- 92, 31, 20, 37, 25, 33, 42, 70, 110, 134) // F
-
-ROW( 80, 44, 71, 57, 2, 160, 51, 8, 54, 11,
- 26, 76, 48, 57, 57, 77, 54, 26, 23, 18) // G
-
-ROW( 60, 55, 77, 77, 71, 51, 171, 41, 80, 46,
- 57, 93, 56, 95, 81, 70, 70, 43, 59, 105) // H
-
-ROW( 60, 53, 16, 35, 81, 8, 41, 137, 43, 108,
- 107, 34, 35, 46, 37, 45, 66, 118, 44, 58) // I
-
-ROW( 66, 30, 77, 91, 25, 54, 80, 43, 133, 44,
- 57, 86, 62, 98, 113, 74, 76, 48, 24, 43) // K
-
-ROW( 56, 49, 13, 33, 97, 11, 46, 108, 44, 135,
- 112, 30, 43, 54, 43, 42, 54, 95, 58, 67) // L
-
-ROW( 66, 59, 28, 47, 92, 26, 57, 107, 57, 112,
- 154, 42, 36, 66, 49, 55, 67, 92, 56, 65) // M
-
-ROW( 66, 47, 104, 83, 31, 76, 93, 34, 86, 30,
- 42, 143, 56, 83, 74, 89, 81, 40, 23, 56) // N
-
-ROW( 77, 26, 59, 64, 20, 48, 56, 35, 62, 43,
- 36, 56, 174, 70, 57, 78, 74, 46, 4, 30) // P
-
-ROW( 71, 36, 84, 101, 37, 57, 95, 46, 98, 54,
- 66, 83, 70, 134, 95, 76, 74, 51, 38, 49) // Q
-
-ROW( 62, 43, 62, 75, 25, 57, 81, 37, 113, 43,
- 49, 74, 57, 95, 149, 69, 69, 44, 54, 49) // R
-
-ROW( 94, 76, 78, 75, 33, 77, 70, 45, 74, 42,
- 55, 89, 78, 76, 69, 122, 101, 58, 31, 50) // S
-
-ROW( 83, 64, 72, 70, 42, 54, 70, 66, 76, 54,
- 67, 81, 74, 74, 69, 101, 128, 78, 24, 46) // T
-
-ROW( 79, 72, 29, 48, 70, 26, 43, 118, 48, 95,
- 92, 40, 46, 51, 44, 58, 78, 131, 33, 54) // V
-
-ROW( 23, 57, 0, 14, 110, 23, 59, 44, 24, 58,
- 56, 23, 4, 38, 54, 31, 24, 33, 236, 116) // W
-
-ROW( 41, 65, 36, 34, 134, 18, 105, 58, 43, 67,
- 65, 56, 30, 49, 49, 50, 46, 54, 116, 178) // Y
- };
-
-static double Gonnet250[20][20] =
- {
-// A C D E F G H I K L
-// M N P Q R S T V W Y
-ROW( 760, 570, 490, 520, 290, 570, 440, 440, 480, 400,
- 450, 490, 550, 500, 460, 630, 580, 530, 160, 300) // A
-
-ROW( 570, 1670, 200, 220, 440, 320, 390, 410, 240, 370,
- 430, 340, 210, 280, 300, 530, 470, 520, 420, 470) // C
-
-ROW( 490, 200, 990, 790, 70, 530, 560, 140, 570, 120,
- 220, 740, 450, 610, 490, 570, 520, 230, 0, 240) // D
-
-ROW( 520, 220, 790, 880, 130, 440, 560, 250, 640, 240,
- 320, 610, 470, 690, 560, 540, 510, 330, 90, 250) // E
-
-ROW( 290, 440, 70, 130, 1220, 0, 510, 620, 190, 720,
- 680, 210, 140, 260, 200, 240, 300, 530, 880, 1030) // F
-
-ROW( 570, 320, 530, 440, 0, 1180, 380, 70, 410, 80,
- 170, 560, 360, 420, 420, 560, 410, 190, 120, 120) // G
-
-ROW( 440, 390, 560, 560, 510, 380, 1120, 300, 580, 330,
- 390, 640, 410, 640, 580, 500, 490, 320, 440, 740) // H
-
-ROW( 440, 410, 140, 250, 620, 70, 300, 920, 310, 800,
- 770, 240, 260, 330, 280, 340, 460, 830, 340, 450) // I
-
-ROW( 480, 240, 570, 640, 190, 410, 580, 310, 840, 310,
- 380, 600, 460, 670, 790, 530, 530, 350, 170, 310) // K
-
-ROW( 400, 370, 120, 240, 720, 80, 330, 800, 310, 920,
- 800, 220, 290, 360, 300, 310, 390, 700, 450, 520) // L
-
-ROW( 450, 430, 220, 320, 680, 170, 390, 770, 380, 800,
- 950, 300, 280, 420, 350, 380, 460, 680, 420, 500) // M
-
-ROW( 490, 340, 740, 610, 210, 560, 640, 240, 600, 220,
- 300, 900, 430, 590, 550, 610, 570, 300, 160, 380) // N
-
-ROW( 550, 210, 450, 470, 140, 360, 410, 260, 460, 290,
- 280, 430, 1280, 500, 430, 560, 530, 340, 20, 210) // P
-
-ROW( 500, 280, 610, 690, 260, 420, 640, 330, 670, 360,
- 420, 590, 500, 790, 670, 540, 520, 370, 250, 350) // Q
-
-ROW( 460, 300, 490, 560, 200, 420, 580, 280, 790, 300,
- 350, 550, 430, 670, 990, 500, 500, 320, 360, 340) // R
-
-ROW( 630, 530, 570, 540, 240, 560, 500, 340, 530, 310,
- 380, 610, 560, 540, 500, 740, 670, 420, 190, 330) // S
-
-ROW( 580, 470, 520, 510, 300, 410, 490, 460, 530, 390,
- 460, 570, 530, 520, 500, 670, 770, 520, 170, 330) // T
-
-ROW( 530, 520, 230, 330, 530, 190, 320, 830, 350, 700,
- 680, 300, 340, 370, 320, 420, 520, 860, 260, 410) // V
-
-ROW( 160, 420, 0, 90, 880, 120, 440, 340, 170, 450,
- 420, 160, 20, 250, 360, 190, 170, 260, 1940, 930) // W
-
-ROW( 300, 470, 240, 250, 1030, 120, 740, 450, 310, 520,
- 500, 380, 210, 350, 340, 330, 330, 410, 930, 1300) // Y
- };
-
-static double Gonnet350[20][20] =
- {
-// A C D E F G H I K L
-// M N P Q R S T V W Y
-ROW( 450, 390, 350, 360, 210, 400, 310, 310, 340, 280,
- 310, 350, 380, 350, 330, 410, 390, 350, 110, 210) // A
-
-ROW( 390, 1280, 160, 180, 320, 230, 270, 300, 190, 280,
- 310, 240, 170, 210, 220, 360, 330, 370, 310, 340) // C
-
-ROW( 350, 160, 640, 540, 50, 390, 400, 110, 410, 100,
- 160, 500, 330, 430, 370, 400, 370, 170, 0, 170) // D
-
-ROW( 360, 180, 540, 550, 100, 330, 390, 180, 440, 170,
- 220, 440, 350, 460, 410, 380, 360, 230, 60, 180) // E
-
-ROW( 210, 320, 50, 100, 860, 0, 360, 460, 140, 530,
- 490, 150, 100, 190, 150, 170, 220, 400, 700, 770) // F
-
-ROW( 400, 230, 390, 330, 0, 860, 280, 60, 310, 50,
- 120, 400, 280, 310, 310, 400, 300, 140, 50, 80) // G
-
-ROW( 310, 270, 400, 390, 360, 280, 680, 220, 400, 240,
- 270, 430, 300, 420, 410, 350, 340, 240, 320, 500) // H
-
-ROW( 310, 300, 110, 180, 460, 60, 220, 620, 220, 570,
- 540, 170, 190, 240, 200, 240, 320, 570, 260, 340) // I
-
-ROW( 340, 190, 410, 440, 140, 310, 400, 220, 530, 210,
- 260, 420, 330, 450, 530, 370, 370, 250, 120, 210) // K
-
-ROW( 280, 280, 100, 170, 530, 50, 240, 570, 210, 630,
- 560, 160, 200, 240, 210, 220, 280, 510, 340, 400) // L
-
-ROW( 310, 310, 160, 220, 490, 120, 270, 540, 260, 560,
- 580, 210, 210, 280, 240, 260, 310, 490, 320, 370) // M
-
-ROW( 350, 240, 500, 440, 150, 400, 430, 170, 420, 160,
- 210, 550, 320, 410, 390, 410, 390, 220, 110, 250) // N
-
-ROW( 380, 170, 330, 350, 100, 280, 300, 190, 330, 200,
- 210, 320, 910, 350, 310, 390, 370, 240, 10, 150) // P
-
-ROW( 350, 210, 430, 460, 190, 310, 420, 240, 450, 240,
- 280, 410, 350, 470, 450, 370, 360, 260, 160, 240) // Q
-
-ROW( 330, 220, 370, 410, 150, 310, 410, 200, 530, 210,
- 240, 390, 310, 450, 630, 360, 350, 230, 230, 230) // R
-
-ROW( 410, 360, 400, 380, 170, 400, 350, 240, 370, 220,
- 260, 410, 390, 370, 360, 450, 430, 290, 130, 230) // S
-
-ROW( 390, 330, 370, 360, 220, 300, 340, 320, 370, 280,
- 310, 390, 370, 360, 350, 430, 460, 350, 120, 230) // T
-
-ROW( 350, 370, 170, 230, 400, 140, 240, 570, 250, 510,
- 490, 220, 240, 260, 230, 290, 350, 560, 210, 310) // V
-
-ROW( 110, 310, 0, 60, 700, 50, 320, 260, 120, 340,
- 320, 110, 10, 160, 230, 130, 120, 210, 1590, 740) // W
-
-ROW( 210, 340, 170, 180, 770, 80, 500, 340, 210, 400,
- 370, 250, 150, 240, 230, 230, 230, 310, 740, 920) // Y
- };
-
-const t_ROW *GetGonnetMatrix(unsigned N)
- {
- switch (N)
- {
- case 80:
- return Gonnet80;
- case 120:
- return Gonnet120;
- //case 16:
- // return Gonnet16;
- //case 160:
- // return Gonnet160;
- case 250:
- return Gonnet250;
- case 350:
- return Gonnet350;
- }
- Quit("Invalid Gonnet%u", N);
- return 0;
- }
-
-//SCORE GetGonnetGapOpen(unsigned N)
-// {
-// switch (N)
-// {
-// case 80:
-// return -639;
-// case 120:
-// return -863;
-// case 160:
-// return -611;
-// case 250:
-// return -308;
-// case 350:
-// return -158;
-// }
-// Quit("Invalid Gonnet%u", N);
-// return 0;
-// }
-
-SCORE GetGonnetGapOpen(unsigned N)
- {
- switch (N)
- {
- case 80:
- return -1000;
- case 120:
- return -800;
- case 160:
- return -700;
- case 250:
- return -200;
- case 350:
- return -175;
- }
- Quit("Invalid Gonnet%u", N);
- return 0;
- }
-
-SCORE GetGonnetGapExtend(unsigned N)
- {
- switch (N)
- {
- case 80:
- return 350;
- case 120:
- return 200;
- case 160:
- return 175;
- case 250:
- return 20;
- case 350:
- return 20;
- }
- Quit("Invalid Gonnet%u", N);
- return 0;
- }
-
-//double GonnetLookup[400][400];
-//
-//static bool InitGonnetLookup()
-// {
-// for (unsigned i = 0; i < 400; ++i)
-// {
-// const unsigned A1 = i/20;
-// const unsigned A2 = i%20;
-// for (unsigned j = 0; j <= i; ++j)
-// {
-// const unsigned B1 = j/20;
-// const unsigned B2 = j%20;
-//
-// const double s00 = Gonnet16[A1][B1];
-// const double s01 = Gonnet16[A1][B2];
-// const double s10 = Gonnet16[A2][B1];
-// const double s11 = Gonnet16[A2][B2];
-//
-// GonnetLookup[i][j] = GonnetLookup[j][i] = (s00 + s01 + s10 + s11)/4;
-// }
-// }
-// return true;
-// }
-//
-//static bool bGonnetLookupInitialized = InitGonnetLookup();
Deleted: trunk/packages/muscle/trunk/gonnet.h
===================================================================
--- trunk/packages/muscle/trunk/gonnet.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/gonnet.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,12 +0,0 @@
-#ifndef Gonnet_h
-#define Gonnet_h
-
-typedef double t_ROW[20];
-
-const t_ROW *GetGonnetMatrix(unsigned N);
-SCORE GetGonnetGapOpen(unsigned N);
-SCORE GetGonnetGapExtend(unsigned N);
-
-extern double GonnetLookup[400][400];
-
-#endif // Gonnet_h
Deleted: trunk/packages/muscle/trunk/gotowt.cpp
===================================================================
Deleted: trunk/packages/muscle/trunk/henikoffweight.cpp
===================================================================
--- trunk/packages/muscle/trunk/henikoffweight.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/henikoffweight.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,84 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-
-/***
-Compute Henikoff weights.
-Steven Henikoff and Jorja G. Henikoff (1994), Position-based sequence weights.
-J. Mol. Biol., 243(4):574-578.
-
-Award each different residue an equal share of the weight, and then to divide up
-that weight equally among the sequences sharing the same residue. So if in a
-position of a multiple alignment, r different residues are represented, a residue
-represented in only one sequence contributes a score of 1/r to that sequence, whereas a
-residue represented in s sequences contributes a score of 1/rs to each of the s
-sequences. For each sequence, the contributions from each position are summed to give
-a sequence weight.
-
-See also HenikoffWeightPB.
-***/
-
-void MSA::CalcHenikoffWeightsCol(unsigned uColIndex) const
- {
- const unsigned uSeqCount = GetSeqCount();
-
-// Compute letter counts in this column
- unsigned uLetterCount[MAX_ALPHA];
- memset(uLetterCount, 0, sizeof(uLetterCount));
- unsigned uDifferentLetterCount = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
- if (uLetter >= 20)
- continue;
- unsigned uNewCount = uLetterCount[uLetter] + 1;
- uLetterCount[uLetter] = uNewCount;
- if (1 == uNewCount)
- ++uDifferentLetterCount;
- }
-
-// Compute weight contributions
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
- if (uLetter >= 20)
- continue;
- const unsigned uCount = uLetterCount[uLetter];
- unsigned uDenom = uCount*uDifferentLetterCount;
- if (uDenom == 0)
- continue;
- m_Weights[uSeqIndex] += (WEIGHT) (1.0/uDenom);
- }
- }
-
-void MSA::SetHenikoffWeights() const
- {
- const unsigned uColCount = GetColCount();
- const unsigned uSeqCount = GetSeqCount();
-
- if (0 == uSeqCount)
- return;
- else if (1 == uSeqCount)
- {
- m_Weights[0] = (WEIGHT) 1.0;
- return;
- }
- else if (2 == uSeqCount)
- {
- m_Weights[0] = (WEIGHT) 0.5;
- m_Weights[1] = (WEIGHT) 0.5;
- return;
- }
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- m_Weights[uSeqIndex] = 0.0;
-
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- CalcHenikoffWeightsCol(uColIndex);
-
-// Set all-gap seqs weight to 0
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- if (IsGapSeq(uSeqIndex))
- m_Weights[uSeqIndex] = 0.0;
-
- Normalize(m_Weights, uSeqCount);
- }
Deleted: trunk/packages/muscle/trunk/henikoffweightpb.cpp
===================================================================
--- trunk/packages/muscle/trunk/henikoffweightpb.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/henikoffweightpb.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,124 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-
-/***
-Compute Henikoff weights.
-Steven Henikoff and Jorja G. Henikoff (1994), Position-based sequence weights.
-J. Mol. Biol., 243(4):574-578.
-
-Award each different residue an equal share of the weight, and then to divide up
-that weight equally among the sequences sharing the same residue. So if in a
-position of a multiple alignment, r different residues are represented, a residue
-represented in only one sequence contributes a score of 1/r to that sequence, whereas a
-residue represented in s sequences contributes a score of 1/rs to each of the s
-sequences. For each sequence, the contributions from each position are summed to give
-a sequence weight.
-
-Here we use the variant from PSI-BLAST, which (a) treats gaps as a 21st letter,
-and (b) ignores columns that are perfectly conserved.
-
->>> WARNING -- I SUSPECT THIS DOESN'T WORK CORRECTLY <<<
-***/
-
-void MSA::CalcHenikoffWeightsColPB(unsigned uColIndex) const
- {
- const unsigned uSeqCount = GetSeqCount();
-
-// Compute letter counts in this column
- unsigned uLetterCount[MAX_ALPHA+1];
- memset(uLetterCount, 0, (MAX_ALPHA+1)*sizeof(unsigned));
- unsigned uLetter;
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- if (IsGap(uSeqIndex, uColIndex) || IsWildcard(uSeqIndex, uColIndex))
- uLetter = MAX_ALPHA;
- else
- uLetter = GetLetter(uSeqIndex, uColIndex);
- ++(uLetterCount[uLetter]);
- }
-
-// Check for special case of perfect conservation
- for (unsigned uLetter = 0; uLetter < MAX_ALPHA+1; ++uLetter)
- {
- unsigned uCount = uLetterCount[uLetter];
- if (uCount > 0)
- {
- // Perfectly conserved?
- if (uCount == uSeqCount)
- return;
- else
- // If count > 0 but less than nr. sequences, can't be conserved
- break;
- }
- }
-
-// Compute weight contributions
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- unsigned uLetter;
- if (IsGap(uSeqIndex, uColIndex) || IsWildcard(uSeqIndex, uColIndex))
- uLetter = MAX_ALPHA;
- else
- uLetter = GetLetter(uSeqIndex, uColIndex);
- const unsigned uCount = uLetterCount[uLetter];
- m_Weights[uSeqIndex] += (WEIGHT) (1.0/uCount);
- }
- }
-
-bool MSA::IsGapSeq(unsigned uSeqIndex) const
- {
- const unsigned uColCount = GetColCount();
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- if (!IsGap(uSeqIndex, uColIndex))
- return false;
- return true;
- }
-
-void MSA::SetUniformWeights() const
- {
- const unsigned uSeqCount = GetSeqCount();
- if (0 == uSeqCount)
- return;
-
- const WEIGHT w = (WEIGHT) (1.0 / uSeqCount);
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- m_Weights[uSeqIndex] = w;
- }
-
-void MSA::SetHenikoffWeightsPB() const
- {
- const unsigned uColCount = GetColCount();
- const unsigned uSeqCount = GetSeqCount();
-
- if (0 == uSeqCount)
- return;
- else if (1 == uSeqCount)
- {
- m_Weights[0] = 1.0;
- return;
- }
- else if (2 == uSeqCount)
- {
- m_Weights[0] = (WEIGHT) 0.5;
- m_Weights[1] = (WEIGHT) 0.5;
- return;
- }
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- m_Weights[uSeqIndex] = 0.0;
-
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- CalcHenikoffWeightsColPB(uColIndex);
-
-// Set all-gap seqs weight to 0
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- if (IsGapSeq(uSeqIndex))
- m_Weights[uSeqIndex] = 0.0;
-
-// Check for special case of identical sequences, which will cause all
-// columns to be skipped becasue they're perfectly conserved.
- if (VectorIsZero(m_Weights, uSeqCount))
- VectorSet(m_Weights, uSeqCount, 1.0);
-
- Normalize(m_Weights, uSeqCount);
- }
Deleted: trunk/packages/muscle/trunk/html.cpp
===================================================================
--- trunk/packages/muscle/trunk/html.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/html.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,136 +0,0 @@
-#include "muscle.h"
-#include <stdio.h>
-#include <ctype.h>
-#include "msa.h"
-#include "textfile.h"
-
-const unsigned uCharsPerLine = 60;
-const int MIN_NAME = 10;
-const int MAX_NAME = 32;
-
-extern void AssignColors(const MSA &a, int **Colors);
-
-static int **MakeColors(const MSA &a)
- {
- const unsigned uSeqCount = a.GetSeqCount();
- const unsigned uColCount = a.GetColCount();
-
- int **Colors = new int *[uSeqCount];
- for (unsigned i = 0; i < uSeqCount; ++i)
- {
- Colors[i] = new int[uColCount];
- memset(Colors[i], 0, uColCount*sizeof(int));
- }
- AssignColors(a, Colors);
- return Colors;
- }
-
-static void ChangeColor(TextFile &File, int From, int To)
- {
- if (From == To)
- return;
-
-#define COLOR_WHITE "FFFFFF"
-#define COLOR_GRAY "C0C0C0"
-#define COLOR_BLACK "000000"
-#define COLOR_RED "FF0000"
-#define COLOR_GREEN "00FF00"
-#define COLOR_BLUE "5590FF"
-#define COLOR_LIGHTBLUE "77FFFF"
-
-#define X(c) File.PutString("</SPAN><SPAN STYLE=\"background-color:#" c "\">");
- switch (To)
- {
- case 0:
- X(COLOR_WHITE)
- break;
- case 1:
- X(COLOR_GRAY)
- break;
- case 2:
- X(COLOR_BLUE)
- break;
- case 3:
- X(COLOR_LIGHTBLUE)
- break;
- }
- }
-
-#define COLOR_WINDOW "FFEEE0"
-
-void MSA::ToHTMLFile(TextFile &File) const
- {
- File.PutString("<HTML>\n");
- File.PutString("<BODY BGCOLOR=\"#" COLOR_WINDOW "\">\n");
- File.PutString("<PRE>");
-
- int **Colors = MakeColors(*this);
-
- int iLongestNameLength = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
- {
- const char *ptrName = GetSeqName(uSeqIndex);
- const char *ptrBlank = strchr(ptrName, ' ');
- int iLength;
- if (0 != ptrBlank)
- iLength = (int) (ptrBlank - ptrName);
- else
- iLength = (int) strlen(ptrName);
- if (iLength > iLongestNameLength)
- iLongestNameLength = iLength;
- }
- if (iLongestNameLength > MAX_NAME)
- iLongestNameLength = MAX_NAME;
- if (iLongestNameLength < MIN_NAME)
- iLongestNameLength = MIN_NAME;
-
- unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;
- int CurrentColor = -1;
- for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)
- {
- File.PutString("\n");
- unsigned uStartColIndex = uLineIndex*uCharsPerLine;
- unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;
- if (uEndColIndex >= GetColCount())
- uEndColIndex = GetColCount() - 1;
- char Name[MAX_NAME+1];
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
- {
- const char *ptrName = GetSeqName(uSeqIndex);
- const char *ptrBlank = strchr(ptrName, ' ');
- int iLength;
- if (0 != ptrBlank)
- iLength = (int) (ptrBlank - ptrName);
- else
- iLength = (int) strlen(ptrName);
- if (iLength > MAX_NAME)
- iLength = MAX_NAME;
- memset(Name, ' ', MAX_NAME);
- memcpy(Name, ptrName, iLength);
- Name[iLongestNameLength] = 0;
-
-// File.PutString("<FONT COLOR=\"#000000\">");
- CurrentColor = -1;
- File.PutString("<SPAN STYLE=\"background-color:#" COLOR_WINDOW "\">");
- File.PutFormat("%s ", Name);
- File.PutString("<SPAN STYLE=\"background-color:#FFFFFF\">");
- for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
- ++uColIndex)
- {
- const int Color = Colors[uSeqIndex][uColIndex];
- ChangeColor(File, CurrentColor, Color);
- CurrentColor = Color;
- const char c = GetChar(uSeqIndex, uColIndex);
- if (Color == 0)
- File.PutFormat("%c", tolower(c));
- else
- File.PutFormat("%c", toupper(c));
- }
- File.PutString("\n");
- }
- }
- File.PutString("</SPAN>\n");
- File.PutString("</PRE>\n");
- File.PutString("</BODY>\n");
- File.PutString("</HTML>\n");
- }
Deleted: trunk/packages/muscle/trunk/hydro.cpp
===================================================================
--- trunk/packages/muscle/trunk/hydro.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/hydro.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,37 +0,0 @@
-#include "muscle.h"
-#include "profile.h"
-
-// Apply hydrophobicity heuristic to a profile
-void Hydro(ProfPos *Prof, unsigned uLength)
- {
- if (0 == g_uHydrophobicRunLength)
- return;
-
- unsigned uRunLength = 0;
- for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)
- {
- ProfPos &PP = Prof[uColIndex];
- bool bHydro = (PP.m_fOcc > 0.999 && IsHydrophobic(PP.m_fcCounts));
- if (bHydro)
- {
- ++uRunLength;
- if (uRunLength > g_uHydrophobicRunLength)
- {
- PP.m_scoreGapOpen *= (SCORE) g_dHydroFactor;
- PP.m_scoreGapClose *= (SCORE) g_dHydroFactor;
- }
- else if (uRunLength == g_uHydrophobicRunLength)
- {
- for (unsigned n = uColIndex - g_uHydrophobicRunLength - 1;
- n <= uColIndex; ++n)
- {
- ProfPos &PP = Prof[n];
- PP.m_scoreGapOpen *= (SCORE) g_dHydroFactor;
- PP.m_scoreGapClose *= (SCORE) g_dHydroFactor;
- }
- }
- }
- else
- uRunLength = 0;
- }
- }
Deleted: trunk/packages/muscle/trunk/intmath.cpp
===================================================================
--- trunk/packages/muscle/trunk/intmath.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/intmath.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,352 +0,0 @@
-#include "muscle.h"
-#include <math.h>
-
-PROB ScoreToProb(SCORE Score)
- {
- if (MINUS_INFINITY >= Score)
- return 0.0;
- return (PROB) pow(2.0, (double) Score/INTSCALE);
- }
-
-static const double log2e = log2(exp(1.0));
-
-double lnTolog2(double ln)
- {
- return ln*log2e;
- }
-
-double log2(double x)
- {
- if (0 == x)
- return MINUS_INFINITY;
-
- static const double dInvLn2 = 1.0/log(2);
-// Multiply by inverse of log(2) just in case multiplication
-// is faster than division.
- return log(x)*dInvLn2;
- }
-
-SCORE ProbToScore(PROB Prob)
- {
- if (0.0 == Prob)
- return MINUS_INFINITY;
-// return (SCORE) floor(INTSCALE*log2(Prob));
- return (SCORE) log2(Prob);
- }
-
-WEIGHT DoubleToWeight(double d)
- {
- assert(d >= 0);
- return (WEIGHT) (INTSCALE*d);
- }
-
-double WeightToDouble(WEIGHT w)
- {
- return (double) w / (double) INTSCALE;
- }
-
-SCORE DoubleToScore(double d)
- {
- return (SCORE)(d*(double) INTSCALE);
- }
-
-bool ScoreEq(SCORE s1, SCORE s2)
- {
- return BTEq(s1, s2);
- }
-
-static bool BTEq2(BASETYPE b1, BASETYPE b2)
- {
- double diff = fabs(b1 - b2);
- if (diff < 0.0001)
- return true;
- double sum = fabs(b1) + fabs(b2);
- return diff/sum < 0.005;
- }
-
-bool BTEq(double b1, double b2)
- {
- return BTEq2((BASETYPE) b1, (BASETYPE) b2);
- }
-
-const double dLn2 = log(2);
-
-// pow2(x)=2^x
-double pow2(double x)
- {
- if (MINUS_INFINITY == x)
- return 0;
- return exp(x*dLn2);
- }
-
-// lp2(x) = log2(1 + 2^-x), x >= 0
-double lp2(double x)
- {
- return log2(1 + pow2(-x));
- }
-
-// SumLog(x, y) = log2(2^x + 2^y)
-SCORE SumLog(SCORE x, SCORE y)
- {
- return (SCORE) log2(pow2(x) + pow2(y));
- }
-
-// SumLog(x, y, z) = log2(2^x + 2^y + 2^z)
-SCORE SumLog(SCORE x, SCORE y, SCORE z)
- {
- return (SCORE) log2(pow2(x) + pow2(y) + pow2(z));
- }
-
-// SumLog(w, x, y, z) = log2(2^w + 2^x + 2^y + 2^z)
-SCORE SumLog(SCORE w, SCORE x, SCORE y, SCORE z)
- {
- return (SCORE) log2(pow2(w) + pow2(x) + pow2(y) + pow2(z));
- }
-
-SCORE lp2Fast(SCORE x)
- {
- assert(x >= 0);
- const int iTableSize = 1000;
- const double dRange = 20.0;
- const double dScale = dRange/iTableSize;
- static SCORE dValue[iTableSize];
- static bool bInit = false;
- if (!bInit)
- {
- for (int i = 0; i < iTableSize; ++i)
- dValue[i] = (SCORE) lp2(i*dScale);
- bInit = true;
- }
- if (x >= dRange)
- return 0.0;
- int i = (int) (x/dScale);
- assert(i >= 0 && i < iTableSize);
- SCORE dResult = dValue[i];
- assert(BTEq(dResult, lp2(x)));
- return dResult;
- }
-
-// SumLog(x, y) = log2(2^x + 2^y)
-SCORE SumLogFast(SCORE x, SCORE y)
- {
- if (MINUS_INFINITY == x)
- {
- if (MINUS_INFINITY == y)
- return MINUS_INFINITY;
- return y;
- }
- else if (MINUS_INFINITY == y)
- return x;
-
- SCORE dResult;
- if (x > y)
- dResult = x + lp2Fast(x-y);
- else
- dResult = y + lp2Fast(y-x);
- assert(SumLog(x, y) == dResult);
- return dResult;
- }
-
-SCORE SumLogFast(SCORE x, SCORE y, SCORE z)
- {
- SCORE dResult = SumLogFast(x, SumLogFast(y, z));
- assert(SumLog(x, y, z) == dResult);
- return dResult;
- }
-
-SCORE SumLogFast(SCORE w, SCORE x, SCORE y, SCORE z)
- {
- SCORE dResult = SumLogFast(SumLogFast(w, x), SumLogFast(y, z));
- assert(SumLog(w, x, y, z) == dResult);
- return dResult;
- }
-
-double VecSum(const double v[], unsigned n)
- {
- double dSum = 0.0;
- for (unsigned i = 0; i < n; ++i)
- dSum += v[i];
- return dSum;
- }
-
-void Normalize(PROB p[], unsigned n)
- {
- unsigned i;
- PROB dSum = 0.0;
- for (i = 0; i < n; ++i)
- dSum += p[i];
- if (0.0 == dSum)
- Quit("Normalize, sum=0");
- for (i = 0; i < n; ++i)
- p[i] /= dSum;
- }
-
-void NormalizeUnlessZero(PROB p[], unsigned n)
- {
- unsigned i;
- PROB dSum = 0.0;
- for (i = 0; i < n; ++i)
- dSum += p[i];
- if (0.0 == dSum)
- return;
- for (i = 0; i < n; ++i)
- p[i] /= dSum;
- }
-
-void Normalize(PROB p[], unsigned n, double dRequiredTotal)
- {
- unsigned i;
- double dSum = 0.0;
- for (i = 0; i < n; ++i)
- dSum += p[i];
- if (0.0 == dSum)
- Quit("Normalize, sum=0");
- double dFactor = dRequiredTotal / dSum;
- for (i = 0; i < n; ++i)
- p[i] *= (PROB) dFactor;
- }
-
-bool VectorIsZero(const double dValues[], unsigned n)
- {
- for (unsigned i = 0; i < n; ++i)
- if (dValues[i] != 0.0)
- return false;
- return true;
- }
-
-void VectorSet(double dValues[], unsigned n, double d)
- {
- for (unsigned i = 0; i < n; ++i)
- dValues[i] = d;
- }
-
-bool VectorIsZero(const float dValues[], unsigned n)
- {
- for (unsigned i = 0; i < n; ++i)
- if (dValues[i] != 0.0)
- return false;
- return true;
- }
-
-void VectorSet(float dValues[], unsigned n, float d)
- {
- for (unsigned i = 0; i < n; ++i)
- dValues[i] = d;
- }
-
-double Correl(const double P[], const double Q[], unsigned uCount)
- {
- double dSumP = 0.0;
- double dSumQ = 0.0;
- for (unsigned n = 0; n < uCount; ++n)
- {
- dSumP += P[n];
- dSumQ += Q[n];
- }
- const double dMeanP = dSumP/uCount;
- const double dMeanQ = dSumQ/uCount;
-
- double dSum1 = 0.0;
- double dSum2 = 0.0;
- double dSum3 = 0.0;
- for (unsigned n = 0; n < uCount; ++n)
- {
- const double dDiffP = P[n] - dMeanP;
- const double dDiffQ = Q[n] - dMeanQ;
- dSum1 += dDiffP*dDiffQ;
- dSum2 += dDiffP*dDiffP;
- dSum3 += dDiffQ*dDiffQ;
- }
- if (0 == dSum1)
- return 0;
- const double dCorrel = dSum1 / sqrt(dSum2*dSum3);
- return dCorrel;
- }
-
-float Correl(const float P[], const float Q[], unsigned uCount)
- {
- float dSumP = 0.0;
- float dSumQ = 0.0;
- for (unsigned n = 0; n < uCount; ++n)
- {
- dSumP += P[n];
- dSumQ += Q[n];
- }
- const float dMeanP = dSumP/uCount;
- const float dMeanQ = dSumQ/uCount;
-
- float dSum1 = 0.0;
- float dSum2 = 0.0;
- float dSum3 = 0.0;
- for (unsigned n = 0; n < uCount; ++n)
- {
- const float dDiffP = P[n] - dMeanP;
- const float dDiffQ = Q[n] - dMeanQ;
- dSum1 += dDiffP*dDiffQ;
- dSum2 += dDiffP*dDiffP;
- dSum3 += dDiffQ*dDiffQ;
- }
- if (0 == dSum1)
- return 0;
- const float dCorrel = dSum1 / (float) sqrt(dSum2*dSum3);
- return dCorrel;
- }
-
-// Simple (but slow) function to compute Pearson ranks
-// that allows for ties. Correctness and simplicity
-// are priorities over speed here.
-void Rank(const float P[], float Ranks[], unsigned uCount)
- {
- for (unsigned n = 0; n < uCount; ++n)
- {
- unsigned uNumberGreater = 0;
- unsigned uNumberEqual = 0;
- unsigned uNumberLess = 0;
- double dValue = P[n];
- for (unsigned i = 0; i < uCount; ++i)
- {
- double v = P[i];
- if (v == dValue)
- ++uNumberEqual;
- else if (v < dValue)
- ++uNumberLess;
- else
- ++uNumberGreater;
- }
- assert(uNumberEqual >= 1);
- assert(uNumberEqual + uNumberLess + uNumberGreater == uCount);
- Ranks[n] = (float) (1 + uNumberLess + (uNumberEqual - 1)/2.0);
- }
- }
-
-void Rank(const double P[], double Ranks[], unsigned uCount)
- {
- for (unsigned n = 0; n < uCount; ++n)
- {
- unsigned uNumberGreater = 0;
- unsigned uNumberEqual = 0;
- unsigned uNumberLess = 0;
- double dValue = P[n];
- for (unsigned i = 0; i < uCount; ++i)
- {
- double v = P[i];
- if (v == dValue)
- ++uNumberEqual;
- else if (v < dValue)
- ++uNumberLess;
- else
- ++uNumberGreater;
- }
- assert(uNumberEqual >= 1);
- assert(uNumberEqual + uNumberLess + uNumberGreater == uCount);
- Ranks[n] = (double) (1 + uNumberLess + (uNumberEqual - 1)/2.0);
- }
- }
-
-FCOUNT SumCounts(const FCOUNT Counts[])
- {
- FCOUNT Sum = 0;
- for (int i = 0; i < 20; ++i)
- Sum += Counts[i];
- return Sum;
- }
Deleted: trunk/packages/muscle/trunk/intmath.h
===================================================================
--- trunk/packages/muscle/trunk/intmath.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/intmath.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,210 +0,0 @@
-// IntMath.h: Header for doing fractional math with integers for speed.
-
-#ifndef IntMath_h
-#define IntMath_h
-
-typedef float BASETYPE;
-//typedef double BASETYPE;
-
-// Scaling factor used to store certain floating point
-// values as integers to a few significant figures.
-//const int INTSCALE = 1000;
-const int INTSCALE = 1;
-
-// Type for a probability in range 0.0 to 1.0.
-typedef BASETYPE PROB;
-
-// Type for an log-odds integer score.
-// Stored as log2(PROB)*INTSCALE.
-//typedef int SCORE;
-typedef BASETYPE SCORE;
-
-// Type for a weight.
-// Stored as w*INTSCALE where w is in range 0.0 to 1.0.
-//typedef unsigned WEIGHT;
-typedef BASETYPE WEIGHT;
-
-// Type for a fractional weighted count stored as n*WEIGHT/N
-// where n=measured count (integer >= 0) and N is total for
-// the distribution (e.g., n=number of residues of a given
-// type in a column, N=number of residues in the column).
-// Hence values in an FCOUNT variable range from 0..INTSCALE
-// as an integer, representing "true" values 0.0 to 1.0.
-//typedef unsigned FCOUNT;
-typedef BASETYPE FCOUNT;
-
-// Representation of -infinity. Value should
-// be large and negative, but not so large
-// that adding a few of them overflows.
-// TODO: Multiplied by 10 to work around bug
-// when aligning Bali 1ckaA in ref4, which is
-// so long that B->Mmax got to -infinity, causing
-// traceback to fail.
-//const int MINUS_INFINITY = -10000000;
-const BASETYPE MINUS_INFINITY = (BASETYPE) -1e37;
-const BASETYPE PLUS_INFINITY = (BASETYPE) 1e37;
-
-// Probability relative to a null model
-typedef double RPROB;
-
-PROB ScoreToProb(SCORE Score);
-SCORE ProbToScore(PROB Prob);
-SCORE DoubleToScore(double d);
-WEIGHT DoubleToWeight(double d);
-double WeightToDouble(WEIGHT w);
-SCORE MulScoreWeight(SCORE Score, WEIGHT Weight);
-bool ScoreEq(SCORE s1, SCORE s2);
-bool BTEq(double b1, double b2);
-
-static double ScoreToDouble(SCORE Score)
- {
- return (double) Score / (double) INTSCALE;
- }
-
-#if 0
-// In-line assembler for Result = (x*y)/z
-// Note that imul and idiv will do 64-bit arithmetic
-// on 32-bit operands, so this shouldn't overflow
-// Can't write this efficiently in C/C++ (would
-// often overlow 32 bits).
-#define MulDivAssign(Result, x, y, z) \
- { \
- int X = (x); \
- int Y = (y); \
- int Z = (z); \
- _asm mov eax,X \
- _asm imul Y \
- _asm mov ecx,Z \
- _asm idiv ecx \
- _asm mov Result,eax \
- }
-#else
-#define MulDivAssign(Result, x, y, z) Result = (((x)*(y))/(z))
-#endif
-
-#define MulScoreWeight(r, s, w) MulDivAssign(r, s, w, INTSCALE)
-#define MulWeightWCount(r, wt, wc) MulDivAssign(r, wt, wc, INTSCALE)
-#define MulFCountScore(r, fc, sc) MulDivAssign(r, fc, sc, INTSCALE)
-
-#if _DEBUG
-
-static inline SCORE Add2(SCORE a, SCORE b)
- {
- if (MINUS_INFINITY == a)
- return MINUS_INFINITY;
- if (MINUS_INFINITY == b)
- return MINUS_INFINITY;
- SCORE sum = a + b;
- if (sum < MINUS_INFINITY)
- return MINUS_INFINITY;
-// assert(sum < OVERFLOW_WARN);
- return sum;
- }
-
-static inline SCORE Add3(SCORE a, SCORE b, SCORE c)
- {
- return Add2(Add2(a, b), c);
- }
-
-static inline SCORE Add4(SCORE a, SCORE b, SCORE c, SCORE d)
- {
- return Add2(Add2(a, b), Add2(c, d));
- }
-
-static inline SCORE Add5(SCORE a, SCORE b, SCORE c, SCORE d, SCORE e)
- {
- return Add3(Add2(a, b), Add2(c, d), e);
- }
-
-static inline SCORE Add6(SCORE a, SCORE b, SCORE c, SCORE d, SCORE e, SCORE f)
- {
- return Add3(Add2(a, b), Add2(c, d), Add2(e, f));
- }
-
-static inline SCORE Add7(SCORE a, SCORE b, SCORE c, SCORE d, SCORE e, SCORE f, SCORE g)
- {
- return Add4(Add2(a, b), Add2(c, d), Add2(e, f), g);
- }
-
-static inline SCORE Mul2(SCORE a, SCORE b)
- {
- if (MINUS_INFINITY == a)
- return MINUS_INFINITY;
- if (MINUS_INFINITY == b)
- return MINUS_INFINITY;
- //__int64 prod = (__int64) a * (__int64) b;
- //assert((SCORE) prod == prod);
- //return (SCORE) prod;
- return a*b;
- }
-
-static inline SCORE Sub2(SCORE a, SCORE b)
- {
- if (MINUS_INFINITY == a)
- return MINUS_INFINITY;
- if (MINUS_INFINITY == b)
- return MINUS_INFINITY;
- SCORE diff = a - b;
- if (diff < MINUS_INFINITY)
- return MINUS_INFINITY;
-// assert(diff < OVERFLOW_WARN);
- return diff;
- }
-
-static inline SCORE Div2(SCORE a, int b)
- {
- if (MINUS_INFINITY == a)
- return MINUS_INFINITY;
- return a/b;
- }
-
-//static inline SCORE MulScoreWeight(SCORE s, WEIGHT w)
-// {
-// SCORE Prod = s*(SCORE) w;
-// assert(Prod < OVERFLOW_WARN);
-// extern void Log(const char Format[], ...);
-// if (Prod/(SCORE) w != s)
-// Log("**WARRNING MulScoreWeight Prod=%d w=%d Prod/w=%d s=%d\n",
-// Prod,
-// w,
-// Prod/(SCORE) w,
-// s);
-// assert(Prod/ (SCORE) w == s);
-// return Prod/INTSCALE;
-// }
-//
-//static inline WCOUNT MulWeightWCount(WEIGHT wt, WCOUNT wc)
-// {
-// return (wt*wc)/INTSCALE;
-// }
-
-#else
-#define Add2(a, b) ((a) + (b))
-#define Sub2(a, b) ((MINUS_INFINITY == (a)) ? MINUS_INFINITY : ((a) - (b)))
-#define Div2(a, b) ((MINUS_INFINITY == (a)) ? MINUS_INFINITY : ((a) / (b)))
-#define Add3(a, b, c) ((a) + (b) + (c))
-#define Add4(a, b, c, d) ((a) + (b) + (c) + (d))
-#define Add5(a, b, c, d, e) ((a) + (b) + (c) + (d) + (e))
-#define Add6(a, b, c, d, e, f) ((a) + (b) + (c) + (d) + (e) + (f))
-#define Add7(a, b, c, d, e, f, g) ((a) + (b) + (c) + (d) + (e) + (f) + (g))
-//#define MulScoreWeight(s, w) (((s)*(SCORE) (w))/INTSCALE)
-#define Mul2(a, b) ((a)*(b))
-#endif
-
-//static inline SCORE MulFCountScore(FCOUNT fc, SCORE sc)
-// {
-//// Fast way to say "if (fc >= 2^15 || sc >= 2^15)":
-// if ((fc | sc) & 0xffff1000)
-// {
-// SCORE Score = ((fc+5)/10)*sc;
-// assert(Score < assert);
-// OVERFLOW_WARN(Score > MINUS_INFINITY);
-// return Score/(INTSCALE/10);
-// }
-// SCORE Score = fc*sc;
-// assert(Score < OVERFLOW_WARN);
-// assert(Score > MINUS_INFINITY);
-// return Score/INTSCALE;
-// }
-
-#endif // IntMath_h
Deleted: trunk/packages/muscle/trunk/local.cpp
===================================================================
--- trunk/packages/muscle/trunk/local.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/local.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,100 +0,0 @@
-#include "muscle.h"
-#include "textfile.h"
-#include "msa.h"
-#include "profile.h"
-#include "pwpath.h"
-#include "tree.h"
-
-#define TRACE 0
-
-static void MSAFromFileName(const char *FileName, MSA &a)
- {
- TextFile File(FileName);
- a.FromFile(File);
- }
-
-static ProfPos *ProfileFromMSALocal(MSA &msa, Tree &tree)
- {
- const unsigned uSeqCount = msa.GetSeqCount();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- msa.SetSeqId(uSeqIndex, uSeqIndex);
-
- TreeFromMSA(msa, tree, g_Cluster1, g_Distance1, g_Root1);
- SetMuscleTree(tree);
- return ProfileFromMSA(msa);
- }
-
-void Local()
- {
- if (0 == g_pstrFileName1 || 0 == g_pstrFileName2)
- Quit("Must specify both -in1 and -in2 for -sw");
-
- SetSeqWeightMethod(g_SeqWeight1);
-
- MSA msa1;
- MSA msa2;
-
- MSAFromFileName(g_pstrFileName1, msa1);
- MSAFromFileName(g_pstrFileName2, msa2);
-
- ALPHA Alpha = ALPHA_Undefined;
- switch (g_SeqType)
- {
- case SEQTYPE_Auto:
- Alpha = msa1.GuessAlpha();
- break;
-
- case SEQTYPE_Protein:
- Alpha = ALPHA_Amino;
- break;
-
- case SEQTYPE_DNA:
- Alpha = ALPHA_DNA;
- break;
-
- case SEQTYPE_RNA:
- Alpha = ALPHA_RNA;
- break;
-
- default:
- Quit("Invalid SeqType");
- }
- SetAlpha(Alpha);
-
- msa1.FixAlpha();
- msa2.FixAlpha();
-
- if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
- SetPPScore(PPSCORE_SPN);
-
- const unsigned uSeqCount1 = msa1.GetSeqCount();
- const unsigned uSeqCount2 = msa2.GetSeqCount();
- const unsigned uMaxSeqCount = (uSeqCount1 > uSeqCount2 ? uSeqCount1 : uSeqCount2);
- MSA::SetIdCount(uMaxSeqCount);
-
- unsigned uLength1 = msa1.GetColCount();
- unsigned uLength2 = msa2.GetColCount();
-
- Tree tree1;
- Tree tree2;
-
- ProfPos *Prof1 = ProfileFromMSALocal(msa1, tree1);
- ProfPos *Prof2 = ProfileFromMSALocal(msa2, tree2);
-
- PWPath Path;
- SW(Prof1, uLength1, Prof2, uLength2, Path);
-
-#if TRACE
- Path.LogMe();
-#endif
-
- MSA msaOut;
- AlignTwoMSAsGivenPathSW(Path, msa1, msa2, msaOut);
-
-#if TRACE
- msaOut.LogMe();
-#endif
-
- TextFile fileOut(g_pstrOutFileName, true);
- msaOut.ToFile(fileOut);
- }
Deleted: trunk/packages/muscle/trunk/main.cpp
===================================================================
--- trunk/packages/muscle/trunk/main.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/main.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,66 +0,0 @@
-#include "muscle.h"
-#include <stdio.h>
-#ifdef WIN32
-#include <windows.h> // for SetPriorityClass()
-#include <io.h> // for isatty()
-#else
-#include <unistd.h> // for isatty()
-#endif
-
-int g_argc;
-char **g_argv;
-
-int main(int argc, char **argv)
- {
-#if WIN32
-// Multi-tasking does not work well in CPU-bound
-// console apps running under Win32.
-// Reducing the process priority allows GUI apps
-// to run responsively in parallel.
- SetPriorityClass(GetCurrentProcess(), BELOW_NORMAL_PRIORITY_CLASS);
-#endif
- g_argc = argc;
- g_argv = argv;
-
- SetNewHandler();
- SetStartTime();
- ProcessArgVect(argc - 1, argv + 1);
- SetParams();
- SetLogFile();
-
- //extern void TestSubFams(const char *);
- //TestSubFams(g_pstrInFileName);
- //return 0;
-
- if (g_bVersion)
- {
- printf(MUSCLE_LONG_VERSION "\n");
- exit(EXIT_SUCCESS);
- }
-
- if (!g_bQuiet)
- Credits();
-
- if (MissingCommand() && isatty(0))
- {
- Usage();
- exit(EXIT_SUCCESS);
- }
-
- if (g_bCatchExceptions)
- {
- try
- {
- Run();
- }
- catch (...)
- {
- OnException();
- exit(EXIT_Except);
- }
- }
- else
- Run();
-
- exit(EXIT_Success);
- }
Deleted: trunk/packages/muscle/trunk/makerootmsa.cpp
===================================================================
--- trunk/packages/muscle/trunk/makerootmsa.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/makerootmsa.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,230 +0,0 @@
-#include "muscle.h"
-#include "tree.h"
-#include "seqvect.h"
-#include "profile.h"
-#include "msa.h"
-#include "pwpath.h"
-#include "estring.h"
-
-#define TRACE 0
-#define VALIDATE 0
-
-static void PathSeq(const Seq &s, const PWPath &Path, bool bRight, Seq &sOut)
- {
- short *esA;
- short *esB;
- PathToEstrings(Path, &esA, &esB);
-
- const unsigned uSeqLength = s.Length();
- const unsigned uEdgeCount = Path.GetEdgeCount();
-
- sOut.Clear();
- sOut.SetName(s.GetName());
- unsigned uPos = 0;
- for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
- char cType = Edge.cType;
- if (bRight)
- {
- if (cType == 'I')
- cType = 'D';
- else if (cType == 'D')
- cType = 'I';
- }
- switch (cType)
- {
- case 'M':
- sOut.AppendChar(s[uPos++]);
- break;
- case 'D':
- sOut.AppendChar('-');
- break;
- case 'I':
- sOut.AppendChar(s[uPos++]);
- break;
- default:
- Quit("PathSeq, invalid edge type %c", cType);
- }
- }
- }
-
-#if VALIDATE
-
-static void MakeRootSeq(const Seq &s, const Tree &GuideTree, unsigned uLeafNodeIndex,
- const ProgNode Nodes[], Seq &sRoot)
- {
- sRoot.Copy(s);
- unsigned uNodeIndex = uLeafNodeIndex;
- for (;;)
- {
- unsigned uParent = GuideTree.GetParent(uNodeIndex);
- if (NULL_NEIGHBOR == uParent)
- break;
- bool bRight = (GuideTree.GetLeft(uParent) == uNodeIndex);
- uNodeIndex = uParent;
- const PWPath &Path = Nodes[uNodeIndex].m_Path;
- Seq sTmp;
- PathSeq(sRoot, Path, bRight, sTmp);
- sRoot.Copy(sTmp);
- }
- }
-
-#endif // VALIDATE
-
-static short *MakeRootSeqE(const Seq &s, const Tree &GuideTree, unsigned uLeafNodeIndex,
- const ProgNode Nodes[], Seq &sRoot, short *Estring1, short *Estring2)
- {
- short *EstringCurr = Estring1;
- short *EstringNext = Estring2;
-
- const unsigned uSeqLength = s.Length();
- EstringCurr[0] = uSeqLength;
- EstringCurr[1] = 0;
-
- unsigned uNodeIndex = uLeafNodeIndex;
- for (;;)
- {
- unsigned uParent = GuideTree.GetParent(uNodeIndex);
- if (NULL_NEIGHBOR == uParent)
- break;
- bool bRight = (GuideTree.GetLeft(uParent) == uNodeIndex);
- uNodeIndex = uParent;
- const PWPath &Path = Nodes[uNodeIndex].m_Path;
- const short *EstringNode = bRight ?
- Nodes[uNodeIndex].m_EstringL : Nodes[uNodeIndex].m_EstringR;
-
- MulEstrings(EstringCurr, EstringNode, EstringNext);
-#if TRACE
- Log("\n");
- Log("Curr=");
- LogEstring(EstringCurr);
- Log("\n");
- Log("Node=");
- LogEstring(EstringNode);
- Log("\n");
- Log("Prod=");
- LogEstring(EstringNext);
- Log("\n");
-#endif
- short *EstringTmp = EstringNext;
- EstringNext = EstringCurr;
- EstringCurr = EstringTmp;
- }
- EstringOp(EstringCurr, s, sRoot);
-
-#if TRACE
- Log("Root estring=");
- LogEstring(EstringCurr);
- Log("\n");
- Log("Root seq=");
- sRoot.LogMe();
-#endif
- return EstringCurr;
- }
-
-static unsigned GetFirstNodeIndex(const Tree &tree)
- {
- if (g_bStable)
- return 0;
- return tree.FirstDepthFirstNode();
- }
-
-static unsigned GetNextNodeIndex(const Tree &tree, unsigned uPrevNodeIndex)
- {
- if (g_bStable)
- {
- const unsigned uNodeCount = tree.GetNodeCount();
- unsigned uNodeIndex = uPrevNodeIndex;
- for (;;)
- {
- ++uNodeIndex;
- if (uNodeIndex >= uNodeCount)
- return NULL_NEIGHBOR;
- if (tree.IsLeaf(uNodeIndex))
- return uNodeIndex;
- }
- }
- unsigned uNodeIndex = uPrevNodeIndex;
- for (;;)
- {
- uNodeIndex = tree.NextDepthFirstNode(uNodeIndex);
- if (NULL_NEIGHBOR == uNodeIndex || tree.IsLeaf(uNodeIndex))
- return uNodeIndex;
- }
- }
-
-void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[],
- MSA &a)
- {
-#if TRACE
- Log("MakeRootMSA Tree=");
- GuideTree.LogMe();
-#endif
- const unsigned uSeqCount = v.GetSeqCount();
- unsigned uColCount = uInsane;
- unsigned uSeqIndex = 0;
- const unsigned uTreeNodeCount = GuideTree.GetNodeCount();
- const unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
- const PWPath &RootPath = Nodes[uRootNodeIndex].m_Path;
- const unsigned uRootColCount = RootPath.GetEdgeCount();
- const unsigned uEstringSize = uRootColCount + 1;
- short *Estring1 = new short[uEstringSize];
- short *Estring2 = new short[uEstringSize];
- SetProgressDesc("Root alignment");
-
- unsigned uTreeNodeIndex = GetFirstNodeIndex(GuideTree);
- do
- {
- Progress(uSeqIndex, uSeqCount);
-
- unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);
- const Seq &s = *(v[uId]);
-
- Seq sRootE;
- short *es = MakeRootSeqE(s, GuideTree, uTreeNodeIndex, Nodes, sRootE,
- Estring1, Estring2);
- Nodes[uTreeNodeIndex].m_EstringL = EstringNewCopy(es);
-
-#if VALIDATE
- Seq sRoot;
- MakeRootSeq(s, GuideTree, uTreeNodeIndex, Nodes, sRoot);
- if (!sRoot.Eq(sRootE))
- {
- Log("sRoot=");
- sRoot.LogMe();
- Log("sRootE=");
- sRootE.LogMe();
- Quit("Root seqs differ");
- }
-#endif
-
-#if TRACE
- Log("MakeRootSeq=\n");
- sRoot.LogMe();
-#endif
- if (uInsane == uColCount)
- {
- uColCount = sRootE.Length();
- a.SetSize(uSeqCount, uColCount);
- }
- else
- {
- assert(uColCount == sRootE.Length());
- }
- a.SetSeqName(uSeqIndex, s.GetName());
- a.SetSeqId(uSeqIndex, uId);
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- a.SetChar(uSeqIndex, uColIndex, sRootE[uColIndex]);
- ++uSeqIndex;
-
- uTreeNodeIndex = GetNextNodeIndex(GuideTree, uTreeNodeIndex);
- }
- while (NULL_NEIGHBOR != uTreeNodeIndex);
-
- delete[] Estring1;
- delete[] Estring2;
-
- ProgressStepsDone();
- assert(uSeqIndex == uSeqCount);
- }
Deleted: trunk/packages/muscle/trunk/makerootmsab.cpp
===================================================================
--- trunk/packages/muscle/trunk/makerootmsab.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/makerootmsab.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,62 +0,0 @@
-#include "muscle.h"
-#include "tree.h"
-#include "profile.h"
-#include "msa.h"
-#include "seqvect.h"
-#include "pwpath.h"
-
-static void DoSeq(Seq &s, unsigned uSeqIndex, const ProfPos *RootProf,
- unsigned uRootProfLength, MSA &msaOut)
- {
- MSA msaSeq;
- msaSeq.FromSeq(s);
- const unsigned uSeqLength = s.Length();
-
- MSA msaDummy;
- msaDummy.SetSize(1, uRootProfLength);
- msaDummy.SetSeqId(0, 0);
- msaDummy.SetSeqName(0, "Dummy0");
- for (unsigned uColIndex = 0; uColIndex < uRootProfLength; ++uColIndex)
- msaDummy.SetChar(0, uColIndex, '?');
-
- ProfPos *SeqProf = ProfileFromMSA(msaSeq);
- for (unsigned uColIndex = 0; uColIndex < uSeqLength; ++uColIndex)
- {
- ProfPos &PP = SeqProf[uColIndex];
- PP.m_scoreGapOpen = MINUS_INFINITY;
- PP.m_scoreGapClose = MINUS_INFINITY;
- }
-
- ProfPos *ProfOut;
- unsigned uLengthOut;
- PWPath Path;
- AlignTwoProfs(SeqProf, uSeqLength, 1.0, RootProf, uRootProfLength, 1.0,
- Path, &ProfOut, &uLengthOut);
- assert(uLengthOut = uRootProfLength);
- delete[] ProfOut;
-
- MSA msaCombined;
- AlignTwoMSAsGivenPath(Path, msaSeq, msaDummy, msaCombined);
-
- msaCombined.LogMe();
- msaOut.SetSeqName(uSeqIndex, s.GetName());
- msaOut.SetSeqId(uSeqIndex, s.GetId());
- for (unsigned uColIndex = 0; uColIndex < uRootProfLength; ++uColIndex)
- msaOut.SetChar(uSeqIndex, uColIndex, msaCombined.GetChar(0, uColIndex));
- }
-
-// Steven Brenner's O(NL^2) proposal for creating a root alignment
-// Align each sequence to the profile at the root.
-// Compare the e-string solution, which is O(NL log N).
-void MakeRootMSABrenner(SeqVect &v, const Tree &GuideTree, ProgNode Nodes[],
- MSA &a)
- {
- const unsigned uSeqCount = v.Length();
- const unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
- const ProfPos *RootProfile = Nodes[uRootNodeIndex].m_Prof;
- const unsigned uRootColCount = Nodes[uRootNodeIndex].m_uLength;
- a.SetSize(uSeqCount, uRootColCount);
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- DoSeq(*v[uSeqIndex], uSeqIndex, RootProfile, uRootColCount, a);
- }
Deleted: trunk/packages/muscle/trunk/mhack.cpp
===================================================================
--- trunk/packages/muscle/trunk/mhack.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/mhack.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,64 +0,0 @@
-#include "muscle.h"
-#include "seqvect.h"
-#include "msa.h"
-
-/***
-Methionine hack.
-Most proteins start with M.
-This results in odd-looking alignments with the terminal Ms aligned followed
-immediately by gaps.
-Hack this by treating terminal M like X.
-***/
-
-static bool *M;
-
-void MHackStart(SeqVect &v)
- {
- if (ALPHA_Amino != g_Alpha)
- return;
-
- const unsigned uSeqCount = v.Length();
- M = new bool[uSeqCount];
- memset(M, 0, uSeqCount*sizeof(bool));
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- Seq &s = v.GetSeq(uSeqIndex);
- if (0 == s.Length())
- continue;
- unsigned uId = s.GetId();
- if (s[0] == 'M' || s[0] == 'm')
- {
- M[uId] = true;
- s[0] = 'X';
- }
- }
- }
-
-void MHackEnd(MSA &msa)
- {
- if (ALPHA_Amino != g_Alpha)
- return;
- if (0 == M)
- return;
-
- const unsigned uSeqCount = msa.GetSeqCount();
- const unsigned uColCount = msa.GetColCount();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- unsigned uId = msa.GetSeqId(uSeqIndex);
- if (M[uId])
- {
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- if (!msa.IsGap(uSeqIndex, uColIndex))
- {
- msa.SetChar(uSeqIndex, uColIndex, 'M');
- break;
- }
- }
- }
- }
-
- delete[] M;
- M = 0;
- }
Deleted: trunk/packages/muscle/trunk/mk
===================================================================
--- trunk/packages/muscle/trunk/mk 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/mk 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,10 +0,0 @@
-ofiles=`echo *.o`
-
-# find -name "*.o" -exec "rm" "{}" ";"
-
-make -f Makefile 2> make.err
-
-# rm *.o
-
-cat make.err
-ls -l muscle
Deleted: trunk/packages/muscle/trunk/mpam200.cpp
===================================================================
--- trunk/packages/muscle/trunk/mpam200.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/mpam200.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,107 +0,0 @@
-#include "muscle.h"
-
-const float PAM_200_CENTER = (float) 20.0;
-
-#define v(x) ((float) x + PAM_200_CENTER)
-#define ROW(A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
- { v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), \
- v(M), v(N), v(P), v(Q), v(R), v(S), v(T), v(V), v(W), v(Y) },
-
-float PAM200[32][32] =
- {
-// A C D E F G H I K L
-// M N P Q R S T V W Y
-ROW( 388, -0, 34, 32, -202, 159, -88, 89, -55, -67,
- 19, 86, 186, -34, -32, 237, 273, 171, -326, -239) // A
-ROW( -0, 1170, -248, -315, 74, -14, 43, -151, -204, -196,
- -132, -49, -142, -215, 29, 165, -7, -69, 179, 313) // C
-ROW( 34, -248, 625, 496, -419, 148, 78, -245, 55, -361,
- -255, 332, -169, 122, -64, 45, -13, -167, -438, -148) // D
-ROW( 32, -315, 496, 610, -480, 125, 25, -245, 175, -327,
- -242, 166, -141, 279, 34, -30, -56, -150, -386, -305) // E
-ROW( -202, 74, -419, -480, 888, -407, 62, 80, -443, 320,
- 67, -236, -180, -294, -327, -51, -173, 31, -1, 584) // F
-ROW( 159, -14, 148, 125, -407, 662, -114, -216, -34, -324,
- -246, 79, -77, -68, 97, 155, 21, -93, -58, -349) // G
-ROW( -88, 43, 78, 25, 62, -114, 766, -205, 144, -92,
- -152, 238, 66, 368, 257, 35, -35, -217, -201, 468) // H
-ROW( 89, -151, -245, -245, 80, -216, -205, 554, -224, 288,
- 391, -114, -115, -222, -208, -19, 162, 469, -274, -153) // I
-ROW( -55, -204, 55, 175, -443, -34, 144, -224, 632, -249,
- -118, 186, -86, 315, 466, 2, 19, -227, -216, -264) // K
-ROW( -67, -196, -361, -327, 320, -324, -92, 288, -249, 591,
- 369, -223, 53, -86, -170, -69, -41, 239, -66, -29) // L
-ROW( 19, -132, -255, -242, 67, -246, -152, 391, -118, 369,
- 756, -131, -98, -124, -129, -49, 129, 331, -229, -182) // M
-ROW( 86, -49, 332, 166, -236, 79, 238, -114, 186, -223,
- -131, 516, -21, 88, 73, 240, 168, -118, -379, -8) // N
-ROW( 186, -142, -169, -141, -180, -77, 66, -115, -86, 53,
- -98, -21, 736, 122, 5, 221, 139, -75, -373, -226) // P
-ROW( -34, -215, 122, 279, -294, -68, 368, -222, 315, -86,
- -124, 88, 122, 635, 301, -13, -35, -195, -243, -73) // Q
-ROW( -32, 29, -64, 34, -327, 97, 257, -208, 466, -170,
- -129, 73, 5, 301, 606, 28, -4, -201, 104, -133) // R
-ROW( 237, 165, 45, -30, -51, 155, 35, -19, 2, -69,
- -49, 240, 221, -13, 28, 353, 259, 8, -213, -55) // S
-ROW( 273, -7, -13, -56, -173, 21, -35, 162, 19, -41,
- 129, 168, 139, -35, -4, 259, 422, 143, -343, -190) // T
-ROW( 171, -69, -167, -150, 31, -93, -217, 469, -227, 239,
- 331, -118, -75, -195, -201, 8, 143, 505, -245, -197) // V
-ROW( -326, 179, -438, -386, -1, -58, -201, -274, -216, -66,
- -229, -379, -373, -243, 104, -213, -343, -245, 1475, 63) // W
-ROW( -239, 313, -148, -305, 584, -349, 468, -153, -264, -29,
- -182, -8, -226, -73, -133, -55, -190, -197, 63, 979) // Y
- };
-
-#undef v
-#define v(x) ((float) x)
-#define RNC(A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
- { v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), \
- v(M), v(N), v(P), v(Q), v(R), v(S), v(T), v(V), v(W), v(Y) },
-
-float PAM200NoCenter[32][32] =
-
- {
-// A C D E F G H I K L
-// M N P Q R S T V W Y
-RNC( 388, -0, 34, 32, -202, 159, -88, 89, -55, -67,
- 19, 86, 186, -34, -32, 237, 273, 171, -326, -239) // A
-RNC( -0, 1170, -248, -315, 74, -14, 43, -151, -204, -196,
- -132, -49, -142, -215, 29, 165, -7, -69, 179, 313) // C
-RNC( 34, -248, 625, 496, -419, 148, 78, -245, 55, -361,
- -255, 332, -169, 122, -64, 45, -13, -167, -438, -148) // D
-RNC( 32, -315, 496, 610, -480, 125, 25, -245, 175, -327,
- -242, 166, -141, 279, 34, -30, -56, -150, -386, -305) // E
-RNC( -202, 74, -419, -480, 888, -407, 62, 80, -443, 320,
- 67, -236, -180, -294, -327, -51, -173, 31, -1, 584) // F
-RNC( 159, -14, 148, 125, -407, 662, -114, -216, -34, -324,
- -246, 79, -77, -68, 97, 155, 21, -93, -58, -349) // G
-RNC( -88, 43, 78, 25, 62, -114, 766, -205, 144, -92,
- -152, 238, 66, 368, 257, 35, -35, -217, -201, 468) // H
-RNC( 89, -151, -245, -245, 80, -216, -205, 554, -224, 288,
- 391, -114, -115, -222, -208, -19, 162, 469, -274, -153) // I
-RNC( -55, -204, 55, 175, -443, -34, 144, -224, 632, -249,
- -118, 186, -86, 315, 466, 2, 19, -227, -216, -264) // K
-RNC( -67, -196, -361, -327, 320, -324, -92, 288, -249, 591,
- 369, -223, 53, -86, -170, -69, -41, 239, -66, -29) // L
-RNC( 19, -132, -255, -242, 67, -246, -152, 391, -118, 369,
- 756, -131, -98, -124, -129, -49, 129, 331, -229, -182) // M
-RNC( 86, -49, 332, 166, -236, 79, 238, -114, 186, -223,
- -131, 516, -21, 88, 73, 240, 168, -118, -379, -8) // N
-RNC( 186, -142, -169, -141, -180, -77, 66, -115, -86, 53,
- -98, -21, 736, 122, 5, 221, 139, -75, -373, -226) // P
-RNC( -34, -215, 122, 279, -294, -68, 368, -222, 315, -86,
- -124, 88, 122, 635, 301, -13, -35, -195, -243, -73) // Q
-RNC( -32, 29, -64, 34, -327, 97, 257, -208, 466, -170,
- -129, 73, 5, 301, 606, 28, -4, -201, 104, -133) // R
-RNC( 237, 165, 45, -30, -51, 155, 35, -19, 2, -69,
- -49, 240, 221, -13, 28, 353, 259, 8, -213, -55) // S
-RNC( 273, -7, -13, -56, -173, 21, -35, 162, 19, -41,
- 129, 168, 139, -35, -4, 259, 422, 143, -343, -190) // T
-RNC( 171, -69, -167, -150, 31, -93, -217, 469, -227, 239,
- 331, -118, -75, -195, -201, 8, 143, 505, -245, -197) // V
-RNC( -326, 179, -438, -386, -1, -58, -201, -274, -216, -66,
- -229, -379, -373, -243, 104, -213, -343, -245, 1475, 63) // W
-RNC( -239, 313, -148, -305, 584, -349, 468, -153, -264, -29,
- -182, -8, -226, -73, -133, -55, -190, -197, 63, 979) // Y
- };
Deleted: trunk/packages/muscle/trunk/msa.cpp
===================================================================
--- trunk/packages/muscle/trunk/msa.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/msa.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,851 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "textfile.h"
-#include "seq.h"
-#include <math.h>
-
-const unsigned DEFAULT_SEQ_LENGTH = 500;
-
-unsigned MSA::m_uIdCount = 0;
-
-MSA::MSA()
- {
- m_uSeqCount = 0;
- m_uColCount = 0;
-
- m_szSeqs = 0;
- m_szNames = 0;
- m_Weights = 0;
-
- m_IdToSeqIndex = 0;
- m_SeqIndexToId = 0;
-
- m_uCacheSeqCount = 0;
- m_uCacheSeqLength = 0;
- }
-
-MSA::~MSA()
- {
- Free();
- }
-
-void MSA::Free()
- {
- for (unsigned n = 0; n < m_uSeqCount; ++n)
- {
- delete[] m_szSeqs[n];
- delete[] m_szNames[n];
- }
-
- delete[] m_szSeqs;
- delete[] m_szNames;
- delete[] m_Weights;
- delete[] m_IdToSeqIndex;
- delete[] m_SeqIndexToId;
-
- m_uSeqCount = 0;
- m_uColCount = 0;
-
- m_szSeqs = 0;
- m_szNames = 0;
- m_Weights = 0;
-
- m_IdToSeqIndex = 0;
- m_SeqIndexToId = 0;
- }
-
-void MSA::SetSize(unsigned uSeqCount, unsigned uColCount)
- {
- Free();
-
- m_uSeqCount = uSeqCount;
- m_uCacheSeqLength = uColCount;
- m_uColCount = 0;
-
- if (0 == uSeqCount && 0 == uColCount)
- return;
-
- m_szSeqs = new char *[uSeqCount];
- m_szNames = new char *[uSeqCount];
- m_Weights = new WEIGHT[uSeqCount];
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- m_szSeqs[uSeqIndex] = new char[uColCount+1];
- m_szNames[uSeqIndex] = 0;
-#if DEBUG
- m_Weights[uSeqIndex] = BTInsane;
- memset(m_szSeqs[uSeqIndex], '?', uColCount);
-#endif
- m_szSeqs[uSeqIndex][uColCount] = 0;
- }
-
- if (m_uIdCount > 0)
- {
- m_IdToSeqIndex = new unsigned[m_uIdCount];
- m_SeqIndexToId = new unsigned[m_uSeqCount];
-#if DEBUG
- memset(m_IdToSeqIndex, 0xff, m_uIdCount*sizeof(unsigned));
- memset(m_SeqIndexToId, 0xff, m_uSeqCount*sizeof(unsigned));
-#endif
- }
- }
-
-void MSA::LogMe() const
- {
- if (0 == GetColCount())
- {
- Log("MSA empty\n");
- return;
- }
-
- const unsigned uColsPerLine = 50;
- unsigned uLinesPerSeq = (GetColCount() - 1)/uColsPerLine + 1;
- for (unsigned n = 0; n < uLinesPerSeq; ++n)
- {
- unsigned i;
- unsigned iStart = n*uColsPerLine;
- unsigned iEnd = GetColCount();
- if (iEnd - iStart + 1 > uColsPerLine)
- iEnd = iStart + uColsPerLine;
- Log(" ");
- for (i = iStart; i < iEnd; ++i)
- Log("%u", i%10);
- Log("\n");
- Log(" ");
- for (i = iStart; i + 9 < iEnd; i += 10)
- Log("%-10u", i);
- if (n == uLinesPerSeq - 1)
- Log(" %-10u", GetColCount());
- Log("\n");
- for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
- {
- Log("%12.12s", m_szNames[uSeqIndex]);
- if (m_Weights[uSeqIndex] != BTInsane)
- Log(" (%5.3f)", m_Weights[uSeqIndex]);
- else
- Log(" ");
- Log(" ");
- for (i = iStart; i < iEnd; ++i)
- Log("%c", GetChar(uSeqIndex, i));
- if (0 != m_SeqIndexToId)
- Log(" [%5u]", m_SeqIndexToId[uSeqIndex]);
- Log("\n");
- }
- Log("\n\n");
- }
- }
-
-char MSA::GetChar(unsigned uSeqIndex, unsigned uIndex) const
- {
-// TODO: Performance cost?
- if (uSeqIndex >= m_uSeqCount || uIndex >= m_uColCount)
- Quit("MSA::GetChar(%u/%u,%u/%u)",
- uSeqIndex, m_uSeqCount, uIndex, m_uColCount);
-
- char c = m_szSeqs[uSeqIndex][uIndex];
-// assert(IsLegalChar(c));
- return c;
- }
-
-unsigned MSA::GetLetter(unsigned uSeqIndex, unsigned uIndex) const
- {
-// TODO: Performance cost?
- char c = GetChar(uSeqIndex, uIndex);
- unsigned uLetter = CharToLetter(c);
- if (uLetter >= 20)
- {
- char c = ' ';
- if (uSeqIndex < m_uSeqCount && uIndex < m_uColCount)
- c = m_szSeqs[uSeqIndex][uIndex];
- Quit("MSA::GetLetter(%u/%u, %u/%u)='%c'/%u",
- uSeqIndex, m_uSeqCount, uIndex, m_uColCount, c, uLetter);
- }
- return uLetter;
- }
-
-unsigned MSA::GetLetterEx(unsigned uSeqIndex, unsigned uIndex) const
- {
-// TODO: Performance cost?
- char c = GetChar(uSeqIndex, uIndex);
- unsigned uLetter = CharToLetterEx(c);
- return uLetter;
- }
-
-void MSA::SetSeqName(unsigned uSeqIndex, const char szName[])
- {
- if (uSeqIndex >= m_uSeqCount)
- Quit("MSA::SetSeqName(%u, %s), count=%u", uSeqIndex, m_uSeqCount);
- delete[] m_szNames[uSeqIndex];
- int n = (int) strlen(szName) + 1;
- m_szNames[uSeqIndex] = new char[n];
- memcpy(m_szNames[uSeqIndex], szName, n);
- }
-
-const char *MSA::GetSeqName(unsigned uSeqIndex) const
- {
- if (uSeqIndex >= m_uSeqCount)
- Quit("MSA::GetSeqName(%u), count=%u", uSeqIndex, m_uSeqCount);
- return m_szNames[uSeqIndex];
- }
-
-bool MSA::IsGap(unsigned uSeqIndex, unsigned uIndex) const
- {
- char c = GetChar(uSeqIndex, uIndex);
- return IsGapChar(c);
- }
-
-bool MSA::IsWildcard(unsigned uSeqIndex, unsigned uIndex) const
- {
- char c = GetChar(uSeqIndex, uIndex);
- return IsWildcardChar(c);
- }
-
-void MSA::SetChar(unsigned uSeqIndex, unsigned uIndex, char c)
- {
- if (uSeqIndex >= m_uSeqCount || uIndex > m_uCacheSeqLength)
- Quit("MSA::SetChar(%u,%u)", uSeqIndex, uIndex);
-
- if (uIndex == m_uCacheSeqLength)
- {
- const unsigned uNewCacheSeqLength = m_uCacheSeqLength + DEFAULT_SEQ_LENGTH;
- for (unsigned n = 0; n < m_uSeqCount; ++n)
- {
- char *ptrNewSeq = new char[uNewCacheSeqLength+1];
- memcpy(ptrNewSeq, m_szSeqs[n], m_uCacheSeqLength);
- memset(ptrNewSeq + m_uCacheSeqLength, '?', DEFAULT_SEQ_LENGTH);
- ptrNewSeq[uNewCacheSeqLength] = 0;
- delete[] m_szSeqs[n];
- m_szSeqs[n] = ptrNewSeq;
- }
-
- m_uColCount = uIndex;
- m_uCacheSeqLength = uNewCacheSeqLength;
- }
-
- if (uIndex >= m_uColCount)
- m_uColCount = uIndex + 1;
- m_szSeqs[uSeqIndex][uIndex] = c;
- }
-
-void MSA::GetSeq(unsigned uSeqIndex, Seq &seq) const
- {
- assert(uSeqIndex < m_uSeqCount);
-
- seq.Clear();
-
- for (unsigned n = 0; n < m_uColCount; ++n)
- if (!IsGap(uSeqIndex, n))
- {
- char c = GetChar(uSeqIndex, n);
- if (!isalpha(c))
- Quit("Invalid character '%c' in sequence", c);
- c = toupper(c);
- seq.push_back(c);
- }
- const char *ptrName = GetSeqName(uSeqIndex);
- seq.SetName(ptrName);
- }
-
-bool MSA::HasGap() const
- {
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
- for (unsigned n = 0; n < GetColCount(); ++n)
- if (IsGap(uSeqIndex, n))
- return true;
- return false;
- }
-
-bool MSA::IsLegalLetter(unsigned uLetter) const
- {
- return uLetter < 20;
- }
-
-void MSA::SetSeqCount(unsigned uSeqCount)
- {
- Free();
- SetSize(uSeqCount, DEFAULT_SEQ_LENGTH);
- }
-
-void MSA::CopyCol(unsigned uFromCol, unsigned uToCol)
- {
- assert(uFromCol < GetColCount());
- assert(uToCol < GetColCount());
- if (uFromCol == uToCol)
- return;
-
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
- {
- const char c = GetChar(uSeqIndex, uFromCol);
- SetChar(uSeqIndex, uToCol, c);
- }
- }
-
-void MSA::Copy(const MSA &msa)
- {
- Free();
- const unsigned uSeqCount = msa.GetSeqCount();
- const unsigned uColCount = msa.GetColCount();
- SetSize(uSeqCount, uColCount);
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- SetSeqName(uSeqIndex, msa.GetSeqName(uSeqIndex));
- const unsigned uId = msa.GetSeqId(uSeqIndex);
- SetSeqId(uSeqIndex, uId);
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- const char c = msa.GetChar(uSeqIndex, uColIndex);
- SetChar(uSeqIndex, uColIndex, c);
- }
- }
- }
-
-bool MSA::IsGapColumn(unsigned uColIndex) const
- {
- assert(GetSeqCount() > 0);
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
- if (!IsGap(uSeqIndex, uColIndex))
- return false;
- return true;
- }
-
-bool MSA::GetSeqIndex(const char *ptrSeqName, unsigned *ptruSeqIndex) const
- {
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
- if (0 == stricmp(ptrSeqName, GetSeqName(uSeqIndex)))
- {
- *ptruSeqIndex = uSeqIndex;
- return true;
- }
- return false;
- }
-
-void MSA::DeleteCol(unsigned uColIndex)
- {
- assert(uColIndex < m_uColCount);
- size_t n = m_uColCount - uColIndex;
- if (n > 0)
- {
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
- {
- char *ptrSeq = m_szSeqs[uSeqIndex];
- memmove(ptrSeq + uColIndex, ptrSeq + uColIndex + 1, n);
- }
- }
- --m_uColCount;
- }
-
-void MSA::DeleteColumns(unsigned uColIndex, unsigned uColCount)
- {
- for (unsigned n = 0; n < uColCount; ++n)
- DeleteCol(uColIndex);
- }
-
-void MSA::FromFile(TextFile &File)
- {
- FromFASTAFile(File);
- }
-
-// Weights sum to 1, WCounts sum to NIC
-WEIGHT MSA::GetSeqWeight(unsigned uSeqIndex) const
- {
- assert(uSeqIndex < m_uSeqCount);
- WEIGHT w = m_Weights[uSeqIndex];
- if (w == wInsane)
- Quit("Seq weight not set");
- return w;
- }
-
-void MSA::SetSeqWeight(unsigned uSeqIndex, WEIGHT w) const
- {
- assert(uSeqIndex < m_uSeqCount);
- m_Weights[uSeqIndex] = w;
- }
-
-void MSA::NormalizeWeights(WEIGHT wDesiredTotal) const
- {
- WEIGHT wTotal = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
- wTotal += m_Weights[uSeqIndex];
-
- if (0 == wTotal)
- return;
-
- const WEIGHT f = wDesiredTotal/wTotal;
- for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
- m_Weights[uSeqIndex] *= f;
- }
-
-void MSA::CalcWeights() const
- {
- Quit("Calc weights not implemented");
- }
-
-static void FmtChar(char c, unsigned uWidth)
- {
- Log("%c", c);
- for (unsigned n = 0; n < uWidth - 1; ++n)
- Log(" ");
- }
-
-static void FmtInt(unsigned u, unsigned uWidth)
- {
- static char szStr[1024];
- assert(uWidth < sizeof(szStr));
- if (u > 0)
- sprintf(szStr, "%u", u);
- else
- strcpy(szStr, ".");
- Log(szStr);
- unsigned n = (unsigned) strlen(szStr);
- if (n < uWidth)
- for (unsigned i = 0; i < uWidth - n; ++i)
- Log(" ");
- }
-
-static void FmtInt0(unsigned u, unsigned uWidth)
- {
- static char szStr[1024];
- assert(uWidth < sizeof(szStr));
- sprintf(szStr, "%u", u);
- Log(szStr);
- unsigned n = (unsigned) strlen(szStr);
- if (n < uWidth)
- for (unsigned i = 0; i < uWidth - n; ++i)
- Log(" ");
- }
-
-static void FmtPad(unsigned n)
- {
- for (unsigned i = 0; i < n; ++i)
- Log(" ");
- }
-
-void MSA::FromSeq(const Seq &s)
- {
- unsigned uSeqLength = s.Length();
- SetSize(1, uSeqLength);
- SetSeqName(0, s.GetName());
- if (0 != m_SeqIndexToId)
- SetSeqId(0, s.GetId());
- for (unsigned n = 0; n < uSeqLength; ++n)
- SetChar(0, n, s[n]);
- }
-
-unsigned MSA::GetCharCount(unsigned uSeqIndex, unsigned uColIndex) const
- {
- assert(uSeqIndex < GetSeqCount());
- assert(uColIndex < GetColCount());
-
- unsigned uCol = 0;
- for (unsigned n = 0; n <= uColIndex; ++n)
- if (!IsGap(uSeqIndex, n))
- ++uCol;
- return uCol;
- }
-
-void MSA::CopySeq(unsigned uToSeqIndex, const MSA &msaFrom, unsigned uFromSeqIndex)
- {
- assert(uToSeqIndex < m_uSeqCount);
- const unsigned uColCount = msaFrom.GetColCount();
- assert(m_uColCount == uColCount ||
- (0 == m_uColCount && uColCount <= m_uCacheSeqLength));
-
- memcpy(m_szSeqs[uToSeqIndex], msaFrom.GetSeqBuffer(uFromSeqIndex), uColCount);
- SetSeqName(uToSeqIndex, msaFrom.GetSeqName(uFromSeqIndex));
- if (0 == m_uColCount)
- m_uColCount = uColCount;
- }
-
-const char *MSA::GetSeqBuffer(unsigned uSeqIndex) const
- {
- assert(uSeqIndex < m_uSeqCount);
- return m_szSeqs[uSeqIndex];
- }
-
-void MSA::DeleteSeq(unsigned uSeqIndex)
- {
- assert(uSeqIndex < m_uSeqCount);
-
- delete m_szSeqs[uSeqIndex];
- delete m_szNames[uSeqIndex];
-
- const unsigned uBytesToMove = (m_uSeqCount - uSeqIndex)*sizeof(char *);
- if (uBytesToMove > 0)
- {
- memmove(m_szSeqs + uSeqIndex, m_szSeqs + uSeqIndex + 1, uBytesToMove);
- memmove(m_szNames + uSeqIndex, m_szNames + uSeqIndex + 1, uBytesToMove);
- }
-
- --m_uSeqCount;
-
- delete[] m_Weights;
- m_Weights = 0;
- }
-
-bool MSA::IsEmptyCol(unsigned uColIndex) const
- {
- const unsigned uSeqCount = GetSeqCount();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- if (!IsGap(uSeqIndex, uColIndex))
- return false;
- return true;
- }
-
-//void MSA::DeleteEmptyCols(bool bProgress)
-// {
-// unsigned uColCount = GetColCount();
-// for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
-// {
-// if (IsEmptyCol(uColIndex))
-// {
-// if (bProgress)
-// {
-// Log("Deleting col %u of %u\n", uColIndex, uColCount);
-// printf("Deleting col %u of %u\n", uColIndex, uColCount);
-// }
-// DeleteCol(uColIndex);
-// --uColCount;
-// }
-// }
-// }
-
-unsigned MSA::AlignedColIndexToColIndex(unsigned uAlignedColIndex) const
- {
- Quit("MSA::AlignedColIndexToColIndex not implemented");
- return 0;
- }
-
-WEIGHT MSA::GetTotalSeqWeight() const
- {
- WEIGHT wTotal = 0;
- const unsigned uSeqCount = GetSeqCount();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- wTotal += m_Weights[uSeqIndex];
- return wTotal;
- }
-
-bool MSA::SeqsEq(const MSA &a1, unsigned uSeqIndex1, const MSA &a2,
- unsigned uSeqIndex2)
- {
- Seq s1;
- Seq s2;
-
- a1.GetSeq(uSeqIndex1, s1);
- a2.GetSeq(uSeqIndex2, s2);
-
- s1.StripGaps();
- s2.StripGaps();
-
- return s1.EqIgnoreCase(s2);
- }
-
-unsigned MSA::GetSeqLength(unsigned uSeqIndex) const
- {
- assert(uSeqIndex < GetSeqCount());
-
- const unsigned uColCount = GetColCount();
- unsigned uLength = 0;
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- if (!IsGap(uSeqIndex, uColIndex))
- ++uLength;
- return uLength;
- }
-
-void MSA::GetPWID(unsigned uSeqIndex1, unsigned uSeqIndex2, double *ptrPWID,
- unsigned *ptruPosCount) const
- {
- assert(uSeqIndex1 < GetSeqCount());
- assert(uSeqIndex2 < GetSeqCount());
-
- unsigned uSameCount = 0;
- unsigned uPosCount = 0;
- const unsigned uColCount = GetColCount();
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- char c1 = GetChar(uSeqIndex1, uColIndex);
- if (IsGapChar(c1))
- continue;
- char c2 = GetChar(uSeqIndex2, uColIndex);
- if (IsGapChar(c2))
- continue;
- ++uPosCount;
- if (c1 == c2)
- ++uSameCount;
- }
- *ptruPosCount = uPosCount;
- if (uPosCount > 0)
- *ptrPWID = 100.0 * (double) uSameCount / (double) uPosCount;
- else
- *ptrPWID = 0;
- }
-
-void MSA::UnWeight()
- {
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
- m_Weights[uSeqIndex] = BTInsane;
- }
-
-unsigned MSA::UniqueResidueTypes(unsigned uColIndex) const
- {
- assert(uColIndex < GetColCount());
-
- unsigned Counts[MAX_ALPHA];
- memset(Counts, 0, sizeof(Counts));
- const unsigned uSeqCount = GetSeqCount();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- if (IsGap(uSeqIndex, uColIndex) || IsWildcard(uSeqIndex, uColIndex))
- continue;
- const unsigned uLetter = GetLetter(uSeqIndex, uColIndex);
- ++(Counts[uLetter]);
- }
- unsigned uUniqueCount = 0;
- for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
- if (Counts[uLetter] > 0)
- ++uUniqueCount;
- return uUniqueCount;
- }
-
-double MSA::GetOcc(unsigned uColIndex) const
- {
- unsigned uGapCount = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
- if (IsGap(uSeqIndex, uColIndex))
- ++uGapCount;
- unsigned uSeqCount = GetSeqCount();
- return (double) (uSeqCount - uGapCount) / (double) uSeqCount;
- }
-
-void MSA::ToFile(TextFile &File) const
- {
- if (g_bMSF)
- ToMSFFile(File);
- else if (g_bAln)
- ToAlnFile(File);
- else if (g_bHTML)
- ToHTMLFile(File);
- else if (g_bPHYS)
- ToPhySequentialFile(File);
- else if (g_bPHYI)
- ToPhyInterleavedFile(File);
- else
- ToFASTAFile(File);
- if (0 != g_pstrScoreFileName)
- WriteScoreFile(*this);
- }
-
-bool MSA::ColumnHasGap(unsigned uColIndex) const
- {
- const unsigned uSeqCount = GetSeqCount();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- if (IsGap(uSeqIndex, uColIndex))
- return true;
- return false;
- }
-
-void MSA::SetIdCount(unsigned uIdCount)
- {
- //if (m_uIdCount != 0)
- // Quit("MSA::SetIdCount: may only be called once");
-
- if (m_uIdCount > 0)
- {
- if (uIdCount > m_uIdCount)
- Quit("MSA::SetIdCount: cannot increase count");
- return;
- }
- m_uIdCount = uIdCount;
- }
-
-void MSA::SetSeqId(unsigned uSeqIndex, unsigned uId)
- {
- assert(uSeqIndex < m_uSeqCount);
- assert(uId < m_uIdCount);
- if (0 == m_SeqIndexToId)
- {
- if (0 == m_uIdCount)
- Quit("MSA::SetSeqId, SetIdCount has not been called");
- m_IdToSeqIndex = new unsigned[m_uIdCount];
- m_SeqIndexToId = new unsigned[m_uSeqCount];
-
- memset(m_IdToSeqIndex, 0xff, m_uIdCount*sizeof(unsigned));
- memset(m_SeqIndexToId, 0xff, m_uSeqCount*sizeof(unsigned));
- }
- m_SeqIndexToId[uSeqIndex] = uId;
- m_IdToSeqIndex[uId] = uSeqIndex;
- }
-
-unsigned MSA::GetSeqIndex(unsigned uId) const
- {
- assert(uId < m_uIdCount);
- assert(0 != m_IdToSeqIndex);
- unsigned uSeqIndex = m_IdToSeqIndex[uId];
- assert(uSeqIndex < m_uSeqCount);
- return uSeqIndex;
- }
-
-bool MSA::GetSeqIndex(unsigned uId, unsigned *ptruIndex) const
- {
- for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
- {
- if (uId == m_SeqIndexToId[uSeqIndex])
- {
- *ptruIndex = uSeqIndex;
- return true;
- }
- }
- return false;
- }
-
-unsigned MSA::GetSeqId(unsigned uSeqIndex) const
- {
- assert(uSeqIndex < m_uSeqCount);
- unsigned uId = m_SeqIndexToId[uSeqIndex];
- assert(uId < m_uIdCount);
- return uId;
- }
-
-bool MSA::WeightsSet() const
- {
- return BTInsane != m_Weights[0];
- }
-
-void MSASubsetByIds(const MSA &msaIn, const unsigned Ids[], unsigned uIdCount,
- MSA &msaOut)
- {
- const unsigned uColCount = msaIn.GetColCount();
- msaOut.SetSize(uIdCount, uColCount);
- for (unsigned uSeqIndexOut = 0; uSeqIndexOut < uIdCount; ++uSeqIndexOut)
- {
- const unsigned uId = Ids[uSeqIndexOut];
-
- const unsigned uSeqIndexIn = msaIn.GetSeqIndex(uId);
- const char *ptrName = msaIn.GetSeqName(uSeqIndexIn);
-
- msaOut.SetSeqId(uSeqIndexOut, uId);
- msaOut.SetSeqName(uSeqIndexOut, ptrName);
-
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- const char c = msaIn.GetChar(uSeqIndexIn, uColIndex);
- msaOut.SetChar(uSeqIndexOut, uColIndex, c);
- }
- }
- }
-
-// Caller must allocate ptrSeq and ptrLabel as new char[n].
-void MSA::AppendSeq(char *ptrSeq, unsigned uSeqLength, char *ptrLabel)
- {
- if (m_uSeqCount > m_uCacheSeqCount)
- Quit("Internal error MSA::AppendSeq");
- if (m_uSeqCount == m_uCacheSeqCount)
- ExpandCache(m_uSeqCount + 4, uSeqLength);
- m_szSeqs[m_uSeqCount] = ptrSeq;
- m_szNames[m_uSeqCount] = ptrLabel;
- ++m_uSeqCount;
- }
-
-void MSA::ExpandCache(unsigned uSeqCount, unsigned uColCount)
- {
- if (m_IdToSeqIndex != 0 || m_SeqIndexToId != 0 || uSeqCount < m_uSeqCount)
- Quit("Internal error MSA::ExpandCache");
-
- if (m_uSeqCount > 0 && uColCount != m_uColCount)
- Quit("Internal error MSA::ExpandCache, ColCount changed");
-
- char **NewSeqs = new char *[uSeqCount];
- char **NewNames = new char *[uSeqCount];
- WEIGHT *NewWeights = new WEIGHT[uSeqCount];
-
- for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
- {
- NewSeqs[uSeqIndex] = m_szSeqs[uSeqIndex];
- NewNames[uSeqIndex] = m_szNames[uSeqIndex];
- NewWeights[uSeqIndex] = m_Weights[uSeqIndex];
- }
-
- for (unsigned uSeqIndex = m_uSeqCount; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- char *Seq = new char[uColCount];
- NewSeqs[uSeqIndex] = Seq;
-#if DEBUG
- memset(Seq, '?', uColCount);
-#endif
- }
-
- delete[] m_szSeqs;
- delete[] m_szNames;
- delete[] m_Weights;
-
- m_szSeqs = NewSeqs;
- m_szNames = NewNames;
- m_Weights = NewWeights;
-
- m_uCacheSeqCount = uSeqCount;
- m_uCacheSeqLength = uColCount;
- m_uColCount = uColCount;
- }
-
-void MSA::FixAlpha()
- {
- ClearInvalidLetterWarning();
- for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
- {
- for (unsigned uColIndex = 0; uColIndex < m_uColCount; ++uColIndex)
- {
- char c = GetChar(uSeqIndex, uColIndex);
- if (!IsResidueChar(c) && !IsGapChar(c))
- {
- char w = GetWildcardChar();
- // Warning("Invalid letter '%c', replaced by '%c'", c, w);
- InvalidLetterWarning(c, w);
- SetChar(uSeqIndex, uColIndex, w);
- }
- }
- }
- ReportInvalidLetters();
- }
-
-ALPHA MSA::GuessAlpha() const
- {
-// If at least MIN_NUCLEO_PCT of the first CHAR_COUNT non-gap
-// letters belong to the nucleotide alphabet, guess nucleo.
-// Otherwise amino.
- const unsigned CHAR_COUNT = 100;
- const unsigned MIN_NUCLEO_PCT = 95;
-
- const unsigned uSeqCount = GetSeqCount();
- const unsigned uColCount = GetColCount();
- if (0 == uSeqCount)
- return ALPHA_Amino;
-
- unsigned uDNACount = 0;
- unsigned uRNACount = 0;
- unsigned uTotal = 0;
- unsigned i = 0;
- for (;;)
- {
- unsigned uSeqIndex = i/uColCount;
- if (uSeqIndex >= uSeqCount)
- break;
- unsigned uColIndex = i%uColCount;
- ++i;
- char c = GetChar(uSeqIndex, uColIndex);
- if (IsGapChar(c))
- continue;
- if (IsDNA(c))
- ++uDNACount;
- if (IsRNA(c))
- ++uRNACount;
- ++uTotal;
- if (uTotal >= CHAR_COUNT)
- break;
- }
- if (uTotal != 0 && ((uRNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
- return ALPHA_RNA;
- if (uTotal != 0 && ((uDNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
- return ALPHA_DNA;
- return ALPHA_Amino;
- }
Deleted: trunk/packages/muscle/trunk/msa.h
===================================================================
--- trunk/packages/muscle/trunk/msa.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/msa.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,179 +0,0 @@
-#ifndef MSA_h
-#define MSA_h
-
-const int MAX_SEQ_NAME = 63;
-struct PathEdge;
-class TextFile;
-class Seq;
-class ClusterNode;
-class NodeCounts;
-class DataBuffer;
-
-class MSA
- {
-public:
- MSA();
- virtual ~MSA();
-
-public:
-// Ways to create an MSA
- void FromFile(TextFile &File);
- void FromFASTAFile(TextFile &File);
- void FromSeq(const Seq &s);
-
- void ToFile(TextFile &File) const;
- void ToFASTAFile(TextFile &File) const;
- void ToMSFFile(TextFile &File, const char *ptrComment = 0) const;
- void ToAlnFile(TextFile &File) const;
- void ToHTMLFile(TextFile &File) const;
- void ToPhySequentialFile(TextFile &File) const;
- void ToPhyInterleavedFile(TextFile &File) const;
-
- void SetSize(unsigned uSeqCount, unsigned uColCount);
- void SetSeqCount(unsigned uSeqCount);
- char GetChar(unsigned uSeqIndex, unsigned uIndex) const;
- unsigned GetLetter(unsigned uSeqIndex, unsigned uIndex) const;
- unsigned GetLetterEx(unsigned uSeqIndex, unsigned uIndex) const;
- const char *GetSeqName(unsigned uSeqIndex) const;
- unsigned GetSeqId(unsigned uSeqIndex) const;
- unsigned GetSeqIndex(unsigned uId) const;
- bool GetSeqIndex(unsigned uId, unsigned *ptruIndex) const;
- double GetOcc(unsigned uColIndex) const;
- void GetFractionalWeightedCounts(unsigned uColIndex, bool bNormalize,
- FCOUNT fcCounts[], FCOUNT *ptrfcGapStart, FCOUNT *ptrfcGapEnd,
- FCOUNT *fcGapExtend, FCOUNT *ptrfOcc,
- FCOUNT *fcLL, FCOUNT *fcLG, FCOUNT *fcGL, FCOUNT *fcGG) const;
- bool IsGap(unsigned uSeqIndex, unsigned uColIndex) const;
- bool IsWildcard(unsigned uSeqIndex, unsigned uColIndex) const;
- bool IsGapColumn(unsigned uColIndex) const;
- bool ColumnHasGap(unsigned uColIndex) const;
- bool IsGapSeq(unsigned uSeqIndex) const;
-
- void SetChar(unsigned uSeqIndex, unsigned uColIndex, char c);
- void SetSeqName(unsigned uSeqIndex, const char szName[]);
- void SetSeqId(unsigned uSeqIndex, unsigned uId);
- bool HasGap() const;
- bool IsLegalLetter(unsigned uLetter) const;
- void GetSeq(unsigned uSeqIndex, Seq &seq) const;
- void Copy(const MSA &msa);
- double GetCons(unsigned uColIndex) const;
- double GetAvgCons() const;
- double GetPctIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const;
- bool GetSeqIndex(const char *ptrSeqName, unsigned *ptruSeqIndex) const;
- void DeleteCol(unsigned uColIndex);
- void DeleteColumns(unsigned uColIndex, unsigned uColCount);
- void CopySeq(unsigned uToSeqIndex, const MSA &msaFrom, unsigned uFromSeqIndex);
- void DeleteSeq(unsigned uSeqIndex);
-// void DeleteEmptyCols(bool bProgress = false);
- bool IsEmptyCol(unsigned uColIndex) const;
-
- WEIGHT GetSeqWeight(unsigned uSeqIndex) const;
- WEIGHT GetTotalSeqWeight() const;
- void SetSeqWeight(unsigned uSeqIndex, WEIGHT w) const;
- void NormalizeWeights(WEIGHT wTotal) const;
- bool WeightsSet() const;
-
- unsigned GetGCGCheckSum(unsigned uSeqIndex) const;
-
- ALPHA GuessAlpha() const;
- void FixAlpha();
-
- unsigned UniqueResidueTypes(unsigned uColIndex) const;
-
- void UnWeight();
-
- void GetNodeCounts(unsigned uAlignedColIndex, NodeCounts &Counts) const;
- void ValidateBreakMatrices() const;
- unsigned GetCharCount(unsigned uSeqIndex, unsigned uColIndex) const;
- const char *GetSeqBuffer(unsigned uSeqIndex) const;
- unsigned AlignedColIndexToColIndex(unsigned uAlignedColIndex) const;
- unsigned GetSeqLength(unsigned uSeqIndex) const;
- void GetPWID(unsigned uSeqIndex1, unsigned uSeqIndex2, double *ptrdPWID,
- unsigned *ptruPosCount) const;
-
- void GetPairMap(unsigned uSeqIndex1, unsigned uSeqIndex2, int iMap1[],
- int iMap2[]) const;
-
- void LogMe() const;
- void ListWeights() const;
-
- void GapInfoToDataBuffer(DataBuffer &Buffer) const;
- void GapInfoFromDataBuffer(const DataBuffer &Buffer);
- double GetPctGroupIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const;
-
- void Clear()
- {
- Free();
- }
- unsigned GetSeqCount() const
- {
- return m_uSeqCount;
- }
- unsigned GetColCount() const
- {
- return m_uColCount;
- }
-
- static bool SeqsEq(const MSA &a1, unsigned uSeqIndex1, const MSA &a2,
- unsigned uSeqIndex2);
-
- static void SetIdCount(unsigned uIdCount);
-
-private:
- friend void SetMSAWeightsMuscle(MSA &msa);
- friend void SetThreeWayWeightsMuscle(MSA &msa);
- void SetHenikoffWeightsPB() const;
- void SetHenikoffWeights() const;
- void SetGSCWeights() const;
- void SetUniformWeights() const;
- void SetClustalWWeights(const Tree &tree);
-
- void Free();
- void AppendSeq(char *ptrSeq, unsigned uSeqLength, char *ptrLabel);
- void ExpandCache(unsigned uSeqCount, unsigned uColCount);
- void CalcWeights() const;
- void GetNameFromFASTAAnnotationLine(const char szLine[],
- char szName[], unsigned uBytes);
- void CopyCol(unsigned uFromCol, unsigned uToCol);
- unsigned CalcBLOSUMWeights(ClusterTree &BlosumCluster) const;
- void SetBLOSUMSubtreeWeight(const ClusterNode *ptrNode, double dWeight) const;
- unsigned SetBLOSUMNodeWeight(const ClusterNode *ptrNode, double dMinDist) const;
- void SetSubtreeWeight2(const ClusterNode *ptrNode) const;
- void SetSubtreeGSCWeight(ClusterNode *ptrNode) const;
-
- void CalcHenikoffWeightsColPB(unsigned uColIndex) const;
- void CalcHenikoffWeightsCol(unsigned uColIndex) const;
-
-private:
- unsigned m_uSeqCount;
- unsigned m_uColCount;
- unsigned m_uCacheSeqLength;
- unsigned m_uCacheSeqCount;
- char **m_szSeqs;
- char **m_szNames;
-
- static unsigned m_uIdCount;
-
- unsigned *m_IdToSeqIndex;
- unsigned *m_SeqIndexToId;
-
- WEIGHT *m_Weights;
- };
-
-void SeqVectFromMSA(const MSA &msa, SeqVect &v);
-void DeleteGappedCols(MSA &msa);
-void MSAFromColRange(const MSA &msaIn, unsigned uFromColIndex, unsigned uColCount,
- MSA &msaOut);
-void MSACat(const MSA &msa1, const MSA &msa2, MSA &msaCat);
-void MSAAppend(MSA &msa1, const MSA &msa2);
-void MSAFromSeqSubset(const MSA &msaIn, const unsigned uSeqIndexes[], unsigned uSeqCount,
- MSA &msaOut);
-void AssertMSAEq(const MSA &msa1, const MSA &msa2);
-void AssertMSAEqIgnoreCaseAndGaps(const MSA &msa1, const MSA &msa2);
-void MSASubsetByIds(const MSA &msaIn, const unsigned Ids[], unsigned uIdCount,
- MSA &msaOut);
-void SetMSAWeightsMuscle(MSA &msa);
-void SetClustalWWeightsMuscle(MSA &msa);
-void SetThreeWayWeightsMuscle(MSA &msa);
-
-#endif // MSA_h
Deleted: trunk/packages/muscle/trunk/msa2.cpp
===================================================================
--- trunk/packages/muscle/trunk/msa2.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/msa2.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,531 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "seqvect.h"
-#include "profile.h"
-#include "tree.h"
-
-// These global variables are a hack to allow the tree
-// dependent iteration code to communicate the edge
-// used to divide the tree. The three-way weighting
-// scheme needs to know this edge in order to compute
-// sequence weights.
-static const Tree *g_ptrMuscleTree = 0;
-unsigned g_uTreeSplitNode1 = NULL_NEIGHBOR;
-unsigned g_uTreeSplitNode2 = NULL_NEIGHBOR;
-
-void MSA::GetFractionalWeightedCounts(unsigned uColIndex, bool bNormalize,
- FCOUNT fcCounts[], FCOUNT *ptrfcGapStart, FCOUNT *ptrfcGapEnd,
- FCOUNT *ptrfcGapExtend, FCOUNT *ptrfOcc,
- FCOUNT *ptrfcLL, FCOUNT *ptrfcLG, FCOUNT *ptrfcGL, FCOUNT *ptrfcGG) const
- {
- const unsigned uSeqCount = GetSeqCount();
- const unsigned uColCount = GetColCount();
-
- memset(fcCounts, 0, g_AlphaSize*sizeof(FCOUNT));
- WEIGHT wTotal = 0;
- FCOUNT fGap = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- const WEIGHT w = GetSeqWeight(uSeqIndex);
- if (IsGap(uSeqIndex, uColIndex))
- {
- fGap += w;
- continue;
- }
- else if (IsWildcard(uSeqIndex, uColIndex))
- {
- const unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
- switch (g_Alpha)
- {
- case ALPHA_Amino:
- switch (uLetter)
- {
- case AX_B: // D or N
- fcCounts[AX_D] += w/2;
- fcCounts[AX_N] += w/2;
- break;
- case AX_Z: // E or Q
- fcCounts[AX_E] += w/2;
- fcCounts[AX_Q] += w/2;
- break;
- default: // any
- {
- const FCOUNT f = w/20;
- for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
- fcCounts[uLetter] += f;
- break;
- }
- }
- break;
-
- case ALPHA_DNA:
- case ALPHA_RNA:
- switch (uLetter)
- {
- case AX_R: // G or A
- fcCounts[NX_G] += w/2;
- fcCounts[NX_A] += w/2;
- break;
- case AX_Y: // C or T/U
- fcCounts[NX_C] += w/2;
- fcCounts[NX_T] += w/2;
- break;
- default: // any
- const FCOUNT f = w/20;
- for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
- fcCounts[uLetter] += f;
- break;
- }
- break;
-
- default:
- Quit("Alphabet %d not supported", g_Alpha);
- }
- continue;
- }
- unsigned uLetter = GetLetter(uSeqIndex, uColIndex);
- fcCounts[uLetter] += w;
- wTotal += w;
- }
- *ptrfOcc = (float) (1.0 - fGap);
-
- if (bNormalize && wTotal > 0)
- {
- if (wTotal > 1.001)
- Quit("wTotal=%g\n", wTotal);
- for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
- fcCounts[uLetter] /= wTotal;
-// AssertNormalized(fcCounts);
- }
-
- FCOUNT fcStartCount = 0;
- if (uColIndex == 0)
- {
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- if (IsGap(uSeqIndex, uColIndex))
- fcStartCount += GetSeqWeight(uSeqIndex);
- }
- else
- {
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex - 1))
- fcStartCount += GetSeqWeight(uSeqIndex);
- }
-
- FCOUNT fcEndCount = 0;
- if (uColCount - 1 == uColIndex)
- {
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- if (IsGap(uSeqIndex, uColIndex))
- fcEndCount += GetSeqWeight(uSeqIndex);
- }
- else
- {
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex + 1))
- fcEndCount += GetSeqWeight(uSeqIndex);
- }
-
- FCOUNT LL = 0;
- FCOUNT LG = 0;
- FCOUNT GL = 0;
- FCOUNT GG = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- WEIGHT w = GetSeqWeight(uSeqIndex);
- bool bLetterHere = !IsGap(uSeqIndex, uColIndex);
- bool bLetterPrev = (uColIndex == 0 || !IsGap(uSeqIndex, uColIndex - 1));
- if (bLetterHere)
- {
- if (bLetterPrev)
- LL += w;
- else
- GL += w;
- }
- else
- {
- if (bLetterPrev)
- LG += w;
- else
- GG += w;
- }
- }
-
- FCOUNT fcExtendCount = 0;
- if (uColIndex > 0 && uColIndex < GetColCount() - 1)
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- if (IsGap(uSeqIndex, uColIndex) && IsGap(uSeqIndex, uColIndex - 1) &&
- IsGap(uSeqIndex, uColIndex + 1))
- fcExtendCount += GetSeqWeight(uSeqIndex);
-
- *ptrfcLL = LL;
- *ptrfcLG = LG;
- *ptrfcGL = GL;
- *ptrfcGG = GG;
- *ptrfcGapStart = fcStartCount;
- *ptrfcGapEnd = fcEndCount;
- *ptrfcGapExtend = fcExtendCount;
- }
-
-// Return true if the given column has no gaps and all
-// its residues are in the same biochemical group.
-bool MSAColIsConservative(const MSA &msa, unsigned uColIndex)
- {
- extern unsigned ResidueGroup[];
-
- const unsigned uSeqCount = msa.GetColCount();
- if (0 == uSeqCount)
- Quit("MSAColIsConservative: empty alignment");
-
- if (msa.IsGap(0, uColIndex))
- return false;
-
- unsigned uLetter = msa.GetLetterEx(0, uColIndex);
- const unsigned uGroup = ResidueGroup[uLetter];
-
- for (unsigned uSeqIndex = 1; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- if (msa.IsGap(uSeqIndex, uColIndex))
- return false;
- uLetter = msa.GetLetter(uSeqIndex, uColIndex);
- if (ResidueGroup[uLetter] != uGroup)
- return false;
- }
- return true;
- }
-
-void MSAFromSeqRange(const MSA &msaIn, unsigned uFromSeqIndex, unsigned uSeqCount,
- MSA &msaOut)
- {
- const unsigned uColCount = msaIn.GetColCount();
- msaOut.SetSize(uSeqCount, uColCount);
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- const char *ptrName = msaIn.GetSeqName(uFromSeqIndex + uSeqIndex);
- msaOut.SetSeqName(uSeqIndex, ptrName);
-
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- const char c = msaIn.GetChar(uFromSeqIndex + uSeqIndex, uColIndex);
- msaOut.SetChar(uSeqIndex, uColIndex, c);
- }
- }
- }
-
-void MSAFromColRange(const MSA &msaIn, unsigned uFromColIndex, unsigned uColCount,
- MSA &msaOut)
- {
- const unsigned uSeqCount = msaIn.GetSeqCount();
- const unsigned uInColCount = msaIn.GetColCount();
-
- if (uFromColIndex + uColCount - 1 > uInColCount)
- Quit("MSAFromColRange, out of bounds");
-
- msaOut.SetSize(uSeqCount, uColCount);
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- const char *ptrName = msaIn.GetSeqName(uSeqIndex);
- unsigned uId = msaIn.GetSeqId(uSeqIndex);
- msaOut.SetSeqName(uSeqIndex, ptrName);
- msaOut.SetSeqId(uSeqIndex, uId);
-
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- const char c = msaIn.GetChar(uSeqIndex, uFromColIndex + uColIndex);
- msaOut.SetChar(uSeqIndex, uColIndex, c);
- }
- }
- }
-
-void SeqVectFromMSA(const MSA &msa, SeqVect &v)
- {
- v.Clear();
- const unsigned uSeqCount = msa.GetSeqCount();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- Seq s;
- msa.GetSeq(uSeqIndex, s);
-
- s.StripGaps();
- //if (0 == s.Length())
- // continue;
-
- const char *ptrName = msa.GetSeqName(uSeqIndex);
- s.SetName(ptrName);
-
- v.AppendSeq(s);
- }
- }
-
-void DeleteGappedCols(MSA &msa)
- {
- unsigned uColIndex = 0;
- for (;;)
- {
- if (uColIndex >= msa.GetColCount())
- break;
- if (msa.IsGapColumn(uColIndex))
- msa.DeleteCol(uColIndex);
- else
- ++uColIndex;
- }
- }
-
-void MSAFromSeqSubset(const MSA &msaIn, const unsigned uSeqIndexes[], unsigned uSeqCount,
- MSA &msaOut)
- {
- const unsigned uColCount = msaIn.GetColCount();
- msaOut.SetSize(uSeqCount, uColCount);
- for (unsigned uSeqIndexOut = 0; uSeqIndexOut < uSeqCount; ++uSeqIndexOut)
- {
- unsigned uSeqIndexIn = uSeqIndexes[uSeqIndexOut];
- const char *ptrName = msaIn.GetSeqName(uSeqIndexIn);
- unsigned uId = msaIn.GetSeqId(uSeqIndexIn);
- msaOut.SetSeqName(uSeqIndexOut, ptrName);
- msaOut.SetSeqId(uSeqIndexOut, uId);
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- const char c = msaIn.GetChar(uSeqIndexIn, uColIndex);
- msaOut.SetChar(uSeqIndexOut, uColIndex, c);
- }
- }
- }
-
-void AssertMSAEqIgnoreCaseAndGaps(const MSA &msa1, const MSA &msa2)
- {
- const unsigned uSeqCount1 = msa1.GetSeqCount();
- const unsigned uSeqCount2 = msa2.GetSeqCount();
- if (uSeqCount1 != uSeqCount2)
- Quit("Seq count differs");
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount1; ++uSeqIndex)
- {
- Seq seq1;
- msa1.GetSeq(uSeqIndex, seq1);
-
- unsigned uId = msa1.GetSeqId(uSeqIndex);
- unsigned uSeqIndex2 = msa2.GetSeqIndex(uId);
-
- Seq seq2;
- msa2.GetSeq(uSeqIndex2, seq2);
-
- if (!seq1.EqIgnoreCaseAndGaps(seq2))
- {
- Log("Input:\n");
- seq1.LogMe();
- Log("Output:\n");
- seq2.LogMe();
- Quit("Seq %s differ ", msa1.GetSeqName(uSeqIndex));
- }
- }
- }
-
-void AssertMSAEq(const MSA &msa1, const MSA &msa2)
- {
- const unsigned uSeqCount1 = msa1.GetSeqCount();
- const unsigned uSeqCount2 = msa2.GetSeqCount();
- if (uSeqCount1 != uSeqCount2)
- Quit("Seq count differs");
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount1; ++uSeqIndex)
- {
- Seq seq1;
- msa1.GetSeq(uSeqIndex, seq1);
-
- unsigned uId = msa1.GetSeqId(uSeqIndex);
- unsigned uSeqIndex2 = msa2.GetSeqIndex(uId);
-
- Seq seq2;
- msa2.GetSeq(uSeqIndex2, seq2);
-
- if (!seq1.Eq(seq2))
- {
- Log("Input:\n");
- seq1.LogMe();
- Log("Output:\n");
- seq2.LogMe();
- Quit("Seq %s differ ", msa1.GetSeqName(uSeqIndex));
- }
- }
- }
-
-void SetMSAWeightsMuscle(MSA &msa)
- {
- SEQWEIGHT Method = GetSeqWeightMethod();
- switch (Method)
- {
- case SEQWEIGHT_None:
- msa.SetUniformWeights();
- return;
-
- case SEQWEIGHT_Henikoff:
- msa.SetHenikoffWeights();
- return;
-
- case SEQWEIGHT_HenikoffPB:
- msa.SetHenikoffWeightsPB();
- return;
-
- case SEQWEIGHT_GSC:
- msa.SetGSCWeights();
- return;
-
- case SEQWEIGHT_ClustalW:
- SetClustalWWeightsMuscle(msa);
- return;
-
- case SEQWEIGHT_ThreeWay:
- SetThreeWayWeightsMuscle(msa);
- return;
- }
- Quit("SetMSAWeightsMuscle, Invalid method=%d", Method);
- }
-
-static WEIGHT *g_MuscleWeights;
-static unsigned g_uMuscleIdCount;
-
-WEIGHT GetMuscleSeqWeightById(unsigned uId)
- {
- if (0 == g_MuscleWeights)
- Quit("g_MuscleWeights = 0");
- if (uId >= g_uMuscleIdCount)
- Quit("GetMuscleSeqWeightById(%u): count=%u",
- uId, g_uMuscleIdCount);
-
- return g_MuscleWeights[uId];
- }
-
-void SetMuscleTree(const Tree &tree)
- {
- g_ptrMuscleTree = &tree;
-
- if (SEQWEIGHT_ClustalW != GetSeqWeightMethod())
- return;
-
- delete[] g_MuscleWeights;
-
- const unsigned uLeafCount = tree.GetLeafCount();
- g_uMuscleIdCount = uLeafCount;
- g_MuscleWeights = new WEIGHT[uLeafCount];
- CalcClustalWWeights(tree, g_MuscleWeights);
- }
-
-void SetClustalWWeightsMuscle(MSA &msa)
- {
- if (0 == g_MuscleWeights)
- Quit("g_MuscleWeights = 0");
- const unsigned uSeqCount = msa.GetSeqCount();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- const unsigned uId = msa.GetSeqId(uSeqIndex);
- if (uId >= g_uMuscleIdCount)
- Quit("SetClustalWWeightsMuscle: id out of range");
- msa.SetSeqWeight(uSeqIndex, g_MuscleWeights[uId]);
- }
- msa.NormalizeWeights((WEIGHT) 1.0);
- }
-
-#define LOCAL_VERBOSE 0
-
-void SetThreeWayWeightsMuscle(MSA &msa)
- {
- if (NULL_NEIGHBOR == g_uTreeSplitNode1 || NULL_NEIGHBOR == g_uTreeSplitNode2)
- {
- msa.SetHenikoffWeightsPB();
- return;
- }
-
- const unsigned uMuscleSeqCount = g_ptrMuscleTree->GetLeafCount();
- WEIGHT *Weights = new WEIGHT[uMuscleSeqCount];
-
- CalcThreeWayWeights(*g_ptrMuscleTree, g_uTreeSplitNode1, g_uTreeSplitNode2,
- Weights);
-
- const unsigned uMSASeqCount = msa.GetSeqCount();
- for (unsigned uSeqIndex = 0; uSeqIndex < uMSASeqCount; ++uSeqIndex)
- {
- const unsigned uId = msa.GetSeqId(uSeqIndex);
- if (uId >= uMuscleSeqCount)
- Quit("SetThreeWayWeightsMuscle: id out of range");
- msa.SetSeqWeight(uSeqIndex, Weights[uId]);
- }
-#if LOCAL_VERBOSE
- {
- Log("SetThreeWayWeightsMuscle\n");
- for (unsigned n = 0; n < uMSASeqCount; ++n)
- {
- const unsigned uId = msa.GetSeqId(n);
- Log("%20.20s %6.3f\n", msa.GetSeqName(n), Weights[uId]);
- }
- }
-#endif
- msa.NormalizeWeights((WEIGHT) 1.0);
-
- delete[] Weights;
- }
-
-// Append msa2 at the end of msa1
-void MSAAppend(MSA &msa1, const MSA &msa2)
- {
- const unsigned uSeqCount = msa1.GetSeqCount();
-
- const unsigned uColCount1 = msa1.GetColCount();
- const unsigned uColCount2 = msa2.GetColCount();
- const unsigned uColCountCat = uColCount1 + uColCount2;
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- unsigned uId = msa1.GetSeqId(uSeqIndex);
- unsigned uSeqIndex2 = msa2.GetSeqIndex(uId);
- for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
- {
- const char c = msa2.GetChar(uSeqIndex2, uColIndex);
- msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
- }
- }
- }
-
-// "Catenate" two MSAs (by bad analogy with UNIX cat command).
-// msa1 and msa2 must have same sequence names, but possibly
-// in a different order.
-// msaCat is the combined alignment produce by appending
-// sequences in msa2 to sequences in msa1.
-void MSACat(const MSA &msa1, const MSA &msa2, MSA &msaCat)
- {
- const unsigned uSeqCount = msa1.GetSeqCount();
-
- const unsigned uColCount1 = msa1.GetColCount();
- const unsigned uColCount2 = msa2.GetColCount();
- const unsigned uColCountCat = uColCount1 + uColCount2;
-
- msaCat.SetSize(uSeqCount, uColCountCat);
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- for (unsigned uColIndex = 0; uColIndex < uColCount1; ++uColIndex)
- {
- const char c = msa1.GetChar(uSeqIndex, uColIndex);
- msaCat.SetChar(uSeqIndex, uColIndex, c);
- }
-
- const char *ptrSeqName = msa1.GetSeqName(uSeqIndex);
- unsigned uSeqIndex2;
- msaCat.SetSeqName(uSeqIndex, ptrSeqName);
- bool bFound = msa2.GetSeqIndex(ptrSeqName, &uSeqIndex2);
- if (bFound)
- {
- for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
- {
- const char c = msa2.GetChar(uSeqIndex2, uColIndex);
- msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
- }
- }
- else
- {
- for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
- msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');
- }
- }
- }
Deleted: trunk/packages/muscle/trunk/msadist.h
===================================================================
--- trunk/packages/muscle/trunk/msadist.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/msadist.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,34 +0,0 @@
-#ifndef MSADist_h
-#define MSADist_h
-
-#include <math.h>
-
-class MSADist
- {
-public:
- MSADist(DISTANCE Distance)
- {
- m_Distance = Distance;
- }
-
- double ComputeDist(const MSA &msa, unsigned uSeqIndex1, unsigned uSeqIndex2)
- {
- double dPctId = msa.GetPctIdentityPair(uSeqIndex1, uSeqIndex2);
- switch(m_Distance)
- {
- case DISTANCE_PctIdKimura:
- return KimuraDist(dPctId);
- case DISTANCE_PctIdLog:
- if (dPctId < 0.05)
- dPctId = 0.05;
- return -log(dPctId);
- }
- Quit("MSADist::ComputeDist, invalid DISTANCE_%u", m_Distance);
- return 0;
- }
-
-private:
- DISTANCE m_Distance;
- };
-
-#endif // MSADist_h
Deleted: trunk/packages/muscle/trunk/msadistkimura.cpp
===================================================================
--- trunk/packages/muscle/trunk/msadistkimura.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/msadistkimura.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,88 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include <math.h>
-
-// "Standard" NJ distance: the Kimura measure.
-// This is defined to be:
-//
-// log_e(1 - p - p*p/5)
-//
-// where p is the fraction of residues that differ, i.e.:
-//
-// p = (1 - fractional_conservation)
-//
-// This measure is infinite for p = 0.8541 and is considered
-// unreliable for p >= 0.75 (according to the ClustalW docs).
-// ClustalW uses a table lookup for values > 0.75.
-// The following table was copied from the ClustalW file dayhoff.h.
-
-static int dayhoff_pams[]={
- 195, /* 75.0% observed d; 195 PAMs estimated = 195% estimated d */
- 196, /* 75.1% observed d; 196 PAMs estimated */
- 197, 198, 199, 200, 200, 201, 202, 203,
- 204, 205, 206, 207, 208, 209, 209, 210, 211, 212,
- 213, 214, 215, 216, 217, 218, 219, 220, 221, 222,
- 223, 224, 226, 227, 228, 229, 230, 231, 232, 233,
- 234, 236, 237, 238, 239, 240, 241, 243, 244, 245,
- 246, 248, 249, 250, /* 250 PAMs = 80.3% observed d */
- 252, 253, 254, 255, 257, 258,
- 260, 261, 262, 264, 265, 267, 268, 270, 271, 273,
- 274, 276, 277, 279, 281, 282, 284, 285, 287, 289,
- 291, 292, 294, 296, 298, 299, 301, 303, 305, 307,
- 309, 311, 313, 315, 317, 319, 321, 323, 325, 328,
- 330, 332, 335, 337, 339, 342, 344, 347, 349, 352,
- 354, 357, 360, 362, 365, 368, 371, 374, 377, 380,
- 383, 386, 389, 393, 396, 399, 403, 407, 410, 414,
- 418, 422, 426, 430, 434, 438, 442, 447, 451, 456,
- 461, 466, 471, 476, 482, 487, 493, 498, 504, 511,
- 517, 524, 531, 538, 545, 553, 560, 569, 577, 586,
- 595, 605, 615, 626, 637, 649, 661, 675, 688, 703,
- 719, 736, 754, 775, 796, 819, 845, 874, 907, 945,
- /* 92.9% observed; 945 PAMs */
- 988 /* 93.0% observed; 988 PAMs */
-};
-static int iTableEntries = sizeof(dayhoff_pams)/sizeof(dayhoff_pams[0]);
-
-double KimuraDist(double dPctId)
- {
- double p = 1 - dPctId;
-// Typical case: use Kimura's empirical formula
- if (p < 0.75)
- return -log(1 - p - (p*p)/5);
-
-// Per ClustalW, return 10.0 for anything over 93%
- if (p > 0.93)
- return 10.0;
-
-// If p >= 0.75, use table lookup
- assert(p <= 1 && p >= 0.75);
-// Thanks for Michael Hoel for pointing out a bug
-// in the table index calculation in versions <= 3.52.
- int iTableIndex = (int) ((p - 0.75)*1000 + 0.5);
- if (iTableIndex < 0 || iTableIndex >= iTableEntries)
- Quit("Internal error in MSADistKimura::ComputeDist");
-
- return dayhoff_pams[iTableIndex] / 100.0;
- }
-
-//double MSADistKimura::ComputeDist(const MSA &msa, unsigned uSeqIndex1,
-// unsigned uSeqIndex2)
-// {
-// double dPctId = msa.GetPctIdentityPair(uSeqIndex1, uSeqIndex2);
-// return KimuraDist(dPctId);
-// }
-
-double KimuraDistToPctId(double dKimuraDist)
- {
-// Solve quadratic equation
- const double a = 0.2;
- const double b = 1;
- const double c = 1.0 - exp(-dKimuraDist);
- const double p = (-b + sqrt(b*b + 4*a*c))/(2*a);
- return 1 - p;
- }
-
-double PctIdToHeightKimura(double dPctId)
- {
- return KimuraDist(dPctId);
- }
Deleted: trunk/packages/muscle/trunk/msf.cpp
===================================================================
--- trunk/packages/muscle/trunk/msf.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/msf.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,121 +0,0 @@
-#include "muscle.h"
-#include <stdio.h>
-#include <ctype.h>
-#include "msa.h"
-#include "textfile.h"
-
-const int MAX_NAME = 63;
-
-const unsigned uCharsPerLine = 50;
-const unsigned uCharsPerBlock = 10;
-
-// Truncate at first white space or MAX_NAME, whichever comes
-// first, then pad with blanks up to PadLength.
-static const char *GetPaddedName(const char *Name, int PadLength)
- {
- static char PaddedName[MAX_NAME+1];
- memset(PaddedName, ' ', MAX_NAME);
- size_t n = strcspn(Name, " \t");
- memcpy(PaddedName, Name, n);
- PaddedName[PadLength] = 0;
- return PaddedName;
- }
-
-static const char *strfind(const char *s, const char *t)
- {
- size_t n = strcspn(s, t);
- if (0 == n)
- return 0;
- return s + n;
- }
-
-// GCG checksum code kindly provided by Eric Martel.
-unsigned MSA::GetGCGCheckSum(unsigned uSeqIndex) const
- {
- unsigned CheckSum = 0;
- const unsigned uColCount = GetColCount();
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- unsigned c = (unsigned) GetChar(uSeqIndex, uColIndex);
- CheckSum += c*(uColIndex%57 + 1);
- CheckSum %= 10000;
- }
- return CheckSum;
- }
-
-static void MSFFixGaps(MSA &a)
- {
- const int SeqCount = a.GetSeqCount();
- const int ColCount = a.GetColCount();
- for (int SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
- {
- for (int ColIndex = 0; ColIndex < ColCount; ++ColIndex)
- if (a.IsGap(SeqIndex, ColIndex))
- a.SetChar(SeqIndex, ColIndex, '.');
- }
- }
-
-void MSA::ToMSFFile(TextFile &File, const char *ptrComment) const
- {
-// Cast away const, yuck
- SetMSAWeightsMuscle((MSA &) *this);
- MSFFixGaps((MSA &) *this);
-
- File.PutString("PileUp\n");
-
- if (0 != ptrComment)
- File.PutFormat("Comment: %s\n", ptrComment);
- else
- File.PutString("\n");
-
- char seqtype = (g_Alpha == ALPHA_DNA || g_Alpha == ALPHA_RNA) ? 'N' : 'A';
- File.PutFormat(" MSF: %u Type: %c Check: 0000 ..\n\n",
- GetColCount(), seqtype);
-
- int iLongestNameLength = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
- {
- const char *Name = GetSeqName(uSeqIndex);
- const char *PaddedName = GetPaddedName(Name, MAX_NAME);
- int iLength = (int) strcspn(PaddedName, " \t");
- if (iLength > iLongestNameLength)
- iLongestNameLength = iLength;
- }
-
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
- {
- const char *Name = GetSeqName(uSeqIndex);
- const char *PaddedName = GetPaddedName(Name, iLongestNameLength);
- File.PutFormat(" Name: %s", PaddedName);
- File.PutFormat(" Len: %u Check: %5u Weight: %g\n",
- GetColCount(), GetGCGCheckSum(uSeqIndex), GetSeqWeight(uSeqIndex));
- }
- File.PutString("\n//\n");
- if (0 == GetColCount())
- return;
-
- unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;
- for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)
- {
- File.PutString("\n");
- unsigned uStartColIndex = uLineIndex*uCharsPerLine;
- unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;
- if (uEndColIndex >= GetColCount())
- uEndColIndex = GetColCount() - 1;
- for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
- {
- const char *Name = GetSeqName(uSeqIndex);
- const char *PaddedName = GetPaddedName(Name, iLongestNameLength);
- File.PutFormat("%s ", PaddedName);
- for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
- ++uColIndex)
- {
- if (0 == uColIndex%uCharsPerBlock)
- File.PutString(" ");
- char c = GetChar(uSeqIndex, uColIndex);
- File.PutFormat("%c", c);
- }
- File.PutString("\n");
- }
- }
- }
Deleted: trunk/packages/muscle/trunk/muscle.cpp
===================================================================
--- trunk/packages/muscle/trunk/muscle.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/muscle.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,130 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "seqvect.h"
-#include "msa.h"
-#include "tree.h"
-#include "profile.h"
-
-void MUSCLE(SeqVect &v, MSA &msaOut)
- {
- const unsigned uSeqCount = v.Length();
-
- if (0 == uSeqCount)
- Quit("No sequences in input file");
-
- ALPHA Alpha = ALPHA_Undefined;
- switch (g_SeqType)
- {
- case SEQTYPE_Auto:
- Alpha = v.GuessAlpha();
- break;
-
- case SEQTYPE_Protein:
- Alpha = ALPHA_Amino;
- break;
-
- case SEQTYPE_RNA:
- Alpha = ALPHA_RNA;
- break;
-
- case SEQTYPE_DNA:
- Alpha = ALPHA_DNA;
- break;
-
- default:
- Quit("Invalid seq type");
- }
- SetAlpha(Alpha);
- v.FixAlpha();
-
- if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
- {
- SetPPScore(PPSCORE_SPN);
- g_Distance1 = DISTANCE_Kmer4_6;
- }
-
- unsigned uMaxL = 0;
- unsigned uTotL = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- unsigned L = v.GetSeq(uSeqIndex).Length();
- uTotL += L;
- if (L > uMaxL)
- uMaxL = L;
- }
-
- SetIter(1);
- g_bDiags = g_bDiags1;
- SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);
-
- MSA::SetIdCount(uSeqCount);
-
-//// Initialize sequence ids.
-//// From this point on, ids must somehow propogate from here.
-// for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
-// v.SetSeqId(uSeqIndex, uSeqIndex);
-
- if (uSeqCount > 1)
- MHackStart(v);
-
- if (0 == uSeqCount)
- {
- msaOut.Clear();
- return;
- }
-
- if (1 == uSeqCount && ALPHA_Amino == Alpha)
- {
- const Seq &s = v.GetSeq(0);
- msaOut.FromSeq(s);
- return;
- }
-
-// First iteration
- Tree GuideTree;
- TreeFromSeqVect(v, GuideTree, g_Cluster1, g_Distance1, g_Root1);
-
- SetMuscleTree(GuideTree);
-
- ProgNode *ProgNodes = 0;
- if (g_bLow)
- ProgNodes = ProgressiveAlignE(v, GuideTree, msaOut);
- else
- ProgressiveAlign(v, GuideTree, msaOut);
- SetCurrentAlignment(msaOut);
-
- if (1 == g_uMaxIters || 2 == uSeqCount)
- {
- MHackEnd(msaOut);
- return;
- }
-
- g_bDiags = g_bDiags2;
- SetIter(2);
-
- if (g_bLow)
- {
- if (0 != g_uMaxTreeRefineIters)
- RefineTreeE(msaOut, v, GuideTree, ProgNodes);
- }
- else
- RefineTree(msaOut, GuideTree);
-
- extern void DeleteProgNode(ProgNode &Node);
- const unsigned uNodeCount = GuideTree.GetNodeCount();
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- DeleteProgNode(ProgNodes[uNodeIndex]);
-
- delete[] ProgNodes;
- ProgNodes = 0;
-
- SetSeqWeightMethod(g_SeqWeight2);
- SetMuscleTree(GuideTree);
-
- if (g_bAnchors)
- RefineVert(msaOut, GuideTree, g_uMaxIters - 2);
- else
- RefineHoriz(msaOut, GuideTree, g_uMaxIters - 2, false, false);
-
- MHackEnd(msaOut);
- }
Deleted: trunk/packages/muscle/trunk/muscle.h
===================================================================
--- trunk/packages/muscle/trunk/muscle.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/muscle.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,328 +0,0 @@
-#if DEBUG && !_DEBUG
-#define _DEBUG 1
-#endif
-
-#if _DEBUG && !DEBUG
-#define DEBUG 1
-#endif
-
-#if _MSC_VER
-#define TIMING 0
-#endif
-
-#define VER_3_52 0
-
-#ifdef _MSC_VER // Miscrosoft compiler
-#pragma warning(disable : 4800) // disable int-bool conversion warning
-#endif
-
-#define MUSCLE_LONG_VERSION "MUSCLE v3.6 by Robert C. Edgar"
-#define MUSCLE_MAJOR_VERSION "3"
-#define MUSCLE_MINOR_VERSION "6"
-
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <stdarg.h>
-#include <stdio.h>
-
-#define DOUBLE_AFFINE 0
-#define SINGLE_AFFINE 1
-#define PAF 0
-#define HYDRO 1
-
-#include "types.h"
-#include "intmath.h"
-#include "alpha.h"
-#include "params.h"
-
-#ifndef _WIN32
-#define stricmp strcasecmp
-#define strnicmp strncasecmp
-#define _snprintf snprintf
-#define _fsopen(name, mode, share) fopen((name), (mode))
-#endif
-
-#if DEBUG
-#undef assert
-#define assert(b) Call_MY_ASSERT(__FILE__, __LINE__, b, #b)
-void Call_MY_ASSERT(const char *file, int line, bool b, const char *msg);
-#else
-#define assert(exp) ((void)0)
-#endif
-
-extern int g_argc;
-extern char **g_argv;
-
-#define Rotate(a, b, c) { SCORE *tmp = a; a = b; b = c; c = tmp; }
-
-const double VERY_LARGE_DOUBLE = 1e20;
-
-extern unsigned g_uTreeSplitNode1;
-extern unsigned g_uTreeSplitNode2;
-
-// Number of elements in array a[]
-#define countof(a) (sizeof(a)/sizeof(a[0]))
-
-// Maximum of two of any type
-#define Max2(a, b) ((a) > (b) ? (a) : (b))
-
-// Maximum of three of any type
-#define Max3(a, b, c) Max2(Max2(a, b), c)
-
-// Minimum of two of any type
-#define Min2(a, b) ((a) < (b) ? (a) : (b))
-
-// Maximum of four of any type
-#define Max4(a, b, c, d) Max2(Max2(a, b), Max2(c, d))
-
-const double VERY_NEGATIVE_DOUBLE = -9e29;
-const float VERY_NEGATIVE_FLOAT = (float) -9e29;
-
-const double BLOSUM_DIST = 0.62; // todo settable
-
-// insane value for uninitialized variables
-const unsigned uInsane = 8888888;
-const int iInsane = 8888888;
-const SCORE scoreInsane = 8888888;
-const char cInsane = (char) 0xcd; // int 3 instruction, used e.g. for unint. memory
-const double dInsane = VERY_NEGATIVE_DOUBLE;
-const float fInsane = VERY_NEGATIVE_FLOAT;
-const char INVALID_STATE = '*';
-const BASETYPE BTInsane = (BASETYPE) dInsane;
-const WEIGHT wInsane = BTInsane;
-
-extern double g_dNAN;
-
-extern unsigned long g_tStart;
-
-void Quit(const char szFormat[], ...);
-void Warning(const char szFormat[], ...);
-void TrimBlanks(char szStr[]);
-void TrimLeadingBlanks(char szStr[]);
-void TrimTrailingBlanks(char szStr[]);
-void Log(const char szFormat[], ...);
-bool Verbose();
-const char *ScoreToStr(SCORE Score);
-const char *ScoreToStrL(SCORE Score);
-SCORE StrToScore(const char *pszStr);
-void Break();
-
-double VecSum(const double v[], unsigned n);
-bool IsValidInteger(const char *Str);
-bool IsValidSignedInteger(const char *Str);
-bool IsValidIdentifier(const char *Str);
-bool IsValidFloatChar(char c);
-bool isident(char c);
-bool isidentf(char c);
-
-void TreeFromSeqVect(const SeqVect &c, Tree &tree, CLUSTER Cluster,
- DISTANCE Distance, ROOT Root);
-void TreeFromMSA(const MSA &msa, Tree &tree, CLUSTER Cluster,
- DISTANCE Distance, ROOT Root);
-
-void StripGaps(char szStr[]);
-void StripWhitespace(char szStr[]);
-const char *GetTimeAsStr();
-unsigned CalcBLOSUMWeights(MSA &Aln, ClusterTree &BlosumCluster);
-void CalcGSCWeights(MSA &Aln, const ClusterTree &BlosumCluster);
-void AssertNormalized(const PROB p[]);
-void AssertNormalizedOrZero(const PROB p[]);
-void AssertNormalized(const double p[]);
-bool VectorIsZero(const double dValues[], unsigned n);
-void VectorSet(double dValues[], unsigned n, double d);
-bool VectorIsZero(const float dValues[], unsigned n);
-void VectorSet(float dValues[], unsigned n, float d);
-
-#if _WIN32
-double log2(double x); // Defined in <math.h> on Linux
-#endif
-
-double pow2(double x);
-double lnTolog2(double ln);
-
-double lp2(double x);
-SCORE SumLog(SCORE x, SCORE y);
-SCORE SumLog(SCORE x, SCORE y, SCORE z);
-SCORE SumLog(SCORE w, SCORE x, SCORE y, SCORE z);
-
-double lp2Fast(double x);
-double SumLogFast(double x, double y);
-double SumLogFast(double x, double y, double z);
-double SumLogFast(double w, double x, double y, double z);
-
-void chkmem(const char szMsg[] = "");
-
-void Normalize(PROB p[], unsigned n);
-void Normalize(PROB p[], unsigned n, double dRequiredTotal);
-void NormalizeUnlessZero(PROB p[], unsigned n);
-
-void DebugPrintf(const char szFormat[], ...);
-void SetListFileName(const char *ptrListFileName, bool bAppend);
-void ModelFromAlign(const char *strInputFileName, const char *strModelFileName,
- double dMaxNIC);
-double GetMemUseMB();
-double GetRAMSizeMB();
-double GetPeakMemUseMB();
-void CheckMemUse();
-const char *ElapsedTimeAsString();
-char *SecsToHHMMSS(long lSecs, char szStr[]);
-double GetCPUGHz();
-SCORE GetBlosum62(unsigned uLetterA, unsigned uLetterB);
-SCORE GetBlosum62d(unsigned uLetterA, unsigned uLetterB);
-SCORE GetBlosum50(unsigned uLetterA, unsigned uLetterB);
-void AssertNormalizedDist(const PROB p[], unsigned N);
-void CmdLineError(const char *Format, ...);
-void Fatal(const char *Format, ...);
-void InitCmd();
-void ExecCommandLine(int argc, char *argv[]);
-void DoCmd();
-void SetLogFile();
-void NameFromPath(const char szPath[], char szName[], unsigned uBytes);
-char *strsave(const char *s);
-void DistKmer20_3(const SeqVect &v, DistFunc &DF);
-void DistKbit20_3(const SeqVect &v, DistFunc &DF);
-void DistKmer6_6(const SeqVect &v, DistFunc &DF);
-void DistKmer4_6(const SeqVect &v, DistFunc &DF);
-void DistPWKimura(const SeqVect &v, DistFunc &DF);
-void FastDistKmer(const SeqVect &v, DistFunc &DF);
-void DistUnaligned(const SeqVect &v, DISTANCE DistMethod, DistFunc &DF);
-double PctIdToMAFFTDist(double dPctId);
-double KimuraDist(double dPctId);
-void SetFastParams();
-void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB);
-void ValidateMuscleIds(const MSA &msa);
-void ValidateMuscleIds(const Tree &tree);
-void TraceBackToPath(int **TraceBack, unsigned uLengthA,
- unsigned uLengthB, PWPath &Path);
-void BitTraceBack(char **TraceBack, unsigned uLengthA, unsigned uLengthB,
- char LastEdge, PWPath &Path);
-SCORE AlignTwoMSAs(const MSA &msa1, const MSA &msa2, MSA &msaOut, PWPath &Path,
- bool bLockLeft = false, bool bLockRight = false);
-SCORE AlignTwoProfs(
- const ProfPos *PA, unsigned uLengthA, WEIGHT wA,
- const ProfPos *PB, unsigned uLengthB, WEIGHT wB,
- PWPath &Path, ProfPos **ptrPout, unsigned *ptruLengthOut);
-void AlignTwoProfsGivenPath(const PWPath &Path,
- const ProfPos *PA, unsigned uLengthA, WEIGHT wA,
- const ProfPos *PB, unsigned uLengthB, WEIGHT wB,
- ProfPos **ptrPOut, unsigned *ptruLengthOut);
-void AlignTwoMSAsGivenPathSW(const PWPath &Path, const MSA &msaA, const MSA &msaB,
- MSA &msaCombined);
-void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,
- MSA &msaCombined);
-SCORE FastScorePath2(const ProfPos *PA, unsigned uLengthA,
- const ProfPos *PB, unsigned uLengthB, const PWPath &Path);
-SCORE GlobalAlignDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path);
-SCORE GlobalAlignSimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path);
-SCORE GlobalAlignSP(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path);
-SCORE GlobalAlignSPN(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path);
-SCORE GlobalAlignLE(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path);
-void CalcThreeWayWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,
- WEIGHT *Weights);
-SCORE GlobalAlignSS(const Seq &seqA, const Seq &seqB, PWPath &Path);
-bool RefineHoriz(MSA &msaIn, const Tree &tree, unsigned uIters, bool bLockLeft, bool bLockRight);
-bool RefineVert(MSA &msaIn, const Tree &tree, unsigned uIters);
-SCORE GlobalAlignNoDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path);
-
-void SetInputFileName(const char *pstrFileName);
-void SetIter(unsigned uIter);
-void IncIter();
-void SetMaxIters(unsigned uMaxIters);
-void Progress(unsigned uStep, unsigned uTotalSteps);
-void Progress(const char *szFormat, ...);
-void SetStartTime();
-void ProgressStepsDone();
-void SetProgressDesc(const char szDesc[]);
-void SetSeqStats(unsigned uSeqCount, unsigned uMaxL, unsigned uAvgL);
-
-void SetNewHandler();
-void SaveCurrentAlignment();
-void SetCurrentAlignment(MSA &msa);
-void SetOutputFileName(const char *out);
-
-#if DEBUG
-void SetMuscleSeqVect(SeqVect &v);
-void SetMuscleInputMSA(MSA &msa);
-void ValidateMuscleIds(const MSA &msa);
-void ValidateMuscleIds(const Tree &tree);
-#else
-#define SetMuscleSeqVect(x) /* empty */
-#define SetMuscleInputMSA(x) /* empty */
-#define ValidateMuscleIds(x) /* empty */
-#endif
-
-void ProcessArgVect(int argc, char *argv[]);
-void ProcessArgStr(const char *Str);
-void Usage();
-void SetParams();
-
-void SortCounts(const FCOUNT fcCounts[], unsigned SortOrder[]);
-unsigned ResidueGroupFromFCounts(const FCOUNT fcCounts[]);
-FCOUNT SumCounts(const FCOUNT Counts[]);
-
-bool FlagOpt(const char *Name);
-const char *ValueOpt(const char *Name);
-void DoMuscle();
-void ProfDB();
-void DoSP();
-void ProgAlignSubFams();
-void Run();
-void ListParams();
-void OnException();
-void SetSeqWeightMethod(SEQWEIGHT Method);
-SEQWEIGHT GetSeqWeightMethod();
-WEIGHT GetMuscleSeqWeightById(unsigned uId);
-void ListDiagSavings();
-void CheckMaxTime();
-const char *MaxSecsToStr();
-unsigned long GetStartTime();
-
-void ProgressiveAlign(const SeqVect &v, const Tree &GuideTree, MSA &a);
-ProgNode *ProgressiveAlignE(const SeqVect &v, const Tree &GuideTree, MSA &a);
-
-void CalcDistRangeKmer6_6(const MSA &msa, unsigned uRow, float Dist[]);
-void CalcDistRangeKmer20_3(const MSA &msa, unsigned uRow, float Dist[]);
-void CalcDistRangeKmer20_4(const MSA &msa, unsigned uRow, float Dist[]);
-void CalcDistRangePctIdKimura(const MSA &msa, unsigned uRow, float Dist[]);
-void CalcDistRangePctIdLog(const MSA &msa, unsigned uRow, float Dist[]);
-
-void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[], MSA &a);
-void MakeRootMSABrenner(SeqVect &v, const Tree &GuideTree, ProgNode Nodes[], MSA &a);
-
-void Refine();
-void Local();
-void Profile();
-void PPScore();
-void UPGMA2(const DistCalc &DC, Tree &tree, LINKAGE Linkage);
-
-char *GetFastaSeq(FILE *f, unsigned *ptrSeqLength, char **ptrLabel,
- bool DeleteGaps = true);
-SCORE SW(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path);
-void TraceBackSW(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
- unsigned uPrefixLengthAMax, unsigned uPrefixLengthBMax, PWPath &Path);
-void DiffPaths(const PWPath &p1, const PWPath &p2, unsigned Edges1[],
- unsigned *ptruDiffCount1, unsigned Edges2[], unsigned *ptruDiffCount2);
-void SetPPScore(bool bRespectFlagOpts = true);
-void SetPPScore(PPSCORE p);
-SCORE GlobalAlignDimer(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path);
-bool MissingCommand();
-void Credits();
-void ProfileProfile(MSA &msa1, MSA &msa2, MSA &msaOut);
-void MHackStart(SeqVect &v);
-void MHackEnd(MSA &msa);
-void WriteScoreFile(const MSA &msa);
-char ConsensusChar(const ProfPos &PP);
-void Stabilize(const MSA &msa, MSA &msaStable);
-void MuscleOutput(MSA &msa);
-PTR_SCOREMATRIX ReadMx(TextFile &File);
Deleted: trunk/packages/muscle/trunk/muscle.html
===================================================================
--- trunk/packages/muscle/trunk/muscle.html 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/muscle.html 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,2042 +0,0 @@
-<html>
-
-<head>
-<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
-<meta name=Generator content="Microsoft Word 10 (filtered)">
-<title>MUSCLE User Guide</title>
-
-<style>
-<!--
- /* Font Definitions */
- @font-face
- {font-family:"MS Mincho";
- panose-1:2 2 6 9 4 2 5 8 3 4;}
- at font-face
- {font-family:Sendnya;
- panose-1:0 0 4 0 0 0 0 0 0 0;}
- at font-face
- {font-family:"Arial Black";
- panose-1:2 11 10 4 2 1 2 2 2 4;}
- at font-face
- {font-family:"\@MS Mincho";
- panose-1:2 2 6 9 4 2 5 8 3 4;}
- /* Style Definitions */
- p.MsoNormal, li.MsoNormal, div.MsoNormal
- {margin:0in;
- margin-bottom:.0001pt;
- font-size:10.0pt;
- font-family:"Times New Roman";}
-h1
- {margin-top:12.0pt;
- margin-right:0in;
- margin-bottom:3.0pt;
- margin-left:0in;
- text-indent:0in;
- page-break-after:avoid;
- font-size:16.0pt;
- font-family:Arial;}
-h2
- {margin-top:12.0pt;
- margin-right:0in;
- margin-bottom:3.0pt;
- margin-left:0in;
- text-indent:0in;
- page-break-after:avoid;
- font-size:12.0pt;
- font-family:Arial;
- font-weight:normal;}
-h3
- {margin-top:12.0pt;
- margin-right:0in;
- margin-bottom:3.0pt;
- margin-left:0in;
- text-indent:0in;
- page-break-after:avoid;
- font-size:10.0pt;
- font-family:"Times New Roman";}
-p.MsoToc1, li.MsoToc1, div.MsoToc1
- {margin:0in;
- margin-bottom:.0001pt;
- font-size:10.0pt;
- font-family:"Times New Roman";}
-p.MsoToc2, li.MsoToc2, div.MsoToc2
- {margin-top:0in;
- margin-right:0in;
- margin-bottom:0in;
- margin-left:10.0pt;
- margin-bottom:.0001pt;
- font-size:10.0pt;
- font-family:"Times New Roman";}
-p.MsoToc3, li.MsoToc3, div.MsoToc3
- {margin-top:0in;
- margin-right:0in;
- margin-bottom:0in;
- margin-left:20.0pt;
- margin-bottom:.0001pt;
- font-size:10.0pt;
- font-family:"Times New Roman";}
-p.MsoHeader, li.MsoHeader, div.MsoHeader
- {margin:0in;
- margin-bottom:.0001pt;
- font-size:10.0pt;
- font-family:"Times New Roman";}
-p.MsoFooter, li.MsoFooter, div.MsoFooter
- {margin:0in;
- margin-bottom:.0001pt;
- font-size:10.0pt;
- font-family:"Times New Roman";}
-p.MsoDate, li.MsoDate, div.MsoDate
- {margin:0in;
- margin-bottom:.0001pt;
- font-size:10.0pt;
- font-family:"Times New Roman";}
-a:link, span.MsoHyperlink
- {color:blue;
- text-decoration:underline;}
-a:visited, span.MsoHyperlinkFollowed
- {color:purple;
- text-decoration:underline;}
-p.EquationNumber, li.EquationNumber, div.EquationNumber
- {margin-top:0in;
- margin-right:0in;
- margin-bottom:0in;
- margin-left:.5in;
- margin-bottom:.0001pt;
- text-align:right;
- text-indent:-.25in;
- font-size:10.0pt;
- font-family:"Times New Roman";}
-p.Code, li.Code, div.Code
- {margin-top:0in;
- margin-right:0in;
- margin-bottom:0in;
- margin-left:.25in;
- margin-bottom:.0001pt;
- font-size:9.0pt;
- font-family:"Courier New";}
- at page Section1
- {size:8.5in 11.0in;
- margin:1.0in 1.25in 1.0in 1.25in;}
-div.Section1
- {page:Section1;}
- /* List Definitions */
- ol
- {margin-bottom:0in;}
-ul
- {margin-bottom:0in;}
--->
-</style>
-
-</head>
-
-<body lang=EN-US link=blue vlink=purple>
-
-<div class=Section1>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal align=right style='text-align:right'><span style='font-size:
-24.0pt;font-family:"Arial Black"'>MUSCLE User Guide</span></p>
-
-<p class=MsoNormal align=right style='text-align:right'><u> </u></p>
-
-<p class=MsoNormal align=right style='text-align:right'><b><span
-style='font-family:Arial'> </span></b></p>
-
-<p class=MsoNormal><span style='font-family:Arial'> </span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'> </span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'> </span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'> </span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'> </span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'>Multiple sequence comparison
-by log-expectation</span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'>by Robert C. Edgar</span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'> </span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'>Version 3.5</span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'>August 2004</span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'> </span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'> </span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'><a
-href="http://www.drive5.com/muscle">http://www.drive5.com/muscle</a></span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'>email: muscle (at) drive5.com</span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'> </span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'>MUSCLE is updated regularly.
-Send me an e-mail if you would like to be notified of new releases.</span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'> </span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'> </span></p>
-
-<p class=MsoNormal><i><span style='font-family:Arial'>Citation:</span></i></p>
-
-<p class=MsoNormal><span style='font-family:Arial'> </span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'><a
-href="http://nar.oupjournals.org/cgi/content/full/32/5/1792?ijkey=48Nmt1tta0fMg&keytype=ref">Edgar,
-Robert C. (2004), MUSCLE: multiple sequence alignment with high accuracy and
-high throughput, <i>Nucleic Acids Research</i> <b>32</b>(5), 1792-97</a>.</span></p>
-
-<span style='font-size:10.0pt;font-family:Arial'><br clear=all
-style='page-break-before:always'>
-</span>
-
-<p class=MsoNormal><b><span style='font-size:16.0pt;font-family:Arial'>Table of
-Contents</span></b></p>
-
-<p class=MsoToc1><span
-class=MsoHyperlink><a href="#_Toc81224823">1 Introduction<span
-style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>3</span></a></span></p>
-
-<p class=MsoToc1><span class=MsoHyperlink><a href="#_Toc81224824">2 Quick Start<span
-style='color:windowtext;display:none;text-decoration:none'> </span><span
-style='color:windowtext;display:none;text-decoration:none'>3</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224825">2.1
-Installation<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>3</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224826">2.2 Making an
-alignment<span style='color:windowtext;display:none;text-decoration:none'> </span><span
-style='color:windowtext;display:none;text-decoration:none'>3</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224827">2.3 Large
-alignments<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>3</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224828">2.4 Fastest
-speed<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>3</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224829">2.5 Huge
-alignments<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>4</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224830">2.6 Accuracy:
-caveat emptor<span style='color:windowtext;display:none;text-decoration:none'> </span><span
-style='color:windowtext;display:none;text-decoration:none'>4</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224831">2.7
-Pipelining<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>4</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224832">2.8 Refining
-an existing alignment<span style='color:windowtext;display:none;text-decoration:
-none'> </span><span
-style='color:windowtext;display:none;text-decoration:none'>4</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224833">2.9
-Profile-profile alignment<span style='color:windowtext;display:none;text-decoration:
-none'> </span><span
-style='color:windowtext;display:none;text-decoration:none'>4</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224834">2.10 Sequence
-clustering<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>5</span></a></span></p>
-
-<p class=MsoToc1><span class=MsoHyperlink><a href="#_Toc81224835">3 File
-Formats<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>5</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224836">3.1 Input
-files<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>5</span></a></span></p>
-
-<p class=MsoToc3><span class=MsoHyperlink><a href="#_Toc81224837">3.1.1 Amino
-acid sequences<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>5</span></a></span></p>
-
-<p class=MsoToc3><span class=MsoHyperlink><a href="#_Toc81224838">3.1.2
-Nucleotide sequences<span style='color:windowtext;display:none;text-decoration:
-none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>5</span></a></span></p>
-
-<p class=MsoToc3><span class=MsoHyperlink><a href="#_Toc81224839">3.1.3
-Determining sequence type<span style='color:windowtext;display:none;text-decoration:
-none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>5</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224840">3.2 Output
-files<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>6</span></a></span></p>
-
-<p class=MsoToc3><span class=MsoHyperlink><a href="#_Toc81224841">3.2.1
-Sequence grouping<span style='color:windowtext;display:none;text-decoration:
-none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>6</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224842">3.3 CLUSTALW
-format<span style='color:windowtext;display:none;text-decoration:none'> </span><span
-style='color:windowtext;display:none;text-decoration:none'>6</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224843">3.4 MSF format<span
-style='color:windowtext;display:none;text-decoration:none'> </span><span
-style='color:windowtext;display:none;text-decoration:none'>6</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224844">3.5 HTML
-format<span style='color:windowtext;display:none;text-decoration:none'> </span><span
-style='color:windowtext;display:none;text-decoration:none'>6</span></a></span></p>
-
-<p class=MsoToc1><span class=MsoHyperlink><a href="#_Toc81224845">4 Using
-MUSCLE<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>6</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224846">4.1 How the
-algorithm works<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>6</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224847">4.2
-Command-line options<span style='color:windowtext;display:none;text-decoration:
-none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>7</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224848">4.3 The
-maxiters option<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>7</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224849">4.4 The
-maxtrees option<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>8</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224850">4.5 The
-maxhours option<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>8</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224851">4.6 The
-profile scoring function<span style='color:windowtext;display:none;text-decoration:
-none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>8</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224852">4.7 Diagonal
-optimization<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>8</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224853">4.8 Anchor
-optimization<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>8</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224854">4.9 Log file<span
-style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>8</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224855">4.10 Progress
-messages<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>9</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224856">4.11 Running
-out of memory<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>9</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224857">4.12
-Troubleshooting<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>9</span></a></span></p>
-
-<p class=MsoToc2><span class=MsoHyperlink><a href="#_Toc81224858">4.13
-Technical support<span style='color:windowtext;display:none;text-decoration:
-none'> </span><span
-style='color:windowtext;display:none;text-decoration:none'>10</span></a></span></p>
-
-<p class=MsoToc1><span class=MsoHyperlink><a href="#_Toc81224859">5 Command
-Line Reference<span style='color:windowtext;display:none;text-decoration:none'>. </span><span
-style='color:windowtext;display:none;text-decoration:none'>10</span></a></span></p>
-
-<p class=MsoNormal><span style='font-family:Arial'> </span></p>
-
-<b><span style='font-size:16.0pt;font-family:Arial'><br clear=all
-style='page-break-before:always'>
-</span></b>
-
-<h1 style='margin-left:0in;text-indent:0in'><a name="_Toc81224823">1 Introduction</a></h1>
-
-<p class=MsoNormal>MUSCLE is a program for creating multiple alignments of
-amino acid or nucleotide sequences. A range of options is provided that give
-you the choice of optimizing accuracy, speed, or some compromise between the
-two. Default parameters are those that give the best average accuracy in our
-tests. Using versions current at the time of writing, my tests show that MUSCLE
-can achieve both better average accuracy and better speed than CLUSTALW or T‑Coffee,
-depending on the chosen options.</p>
-
-<h1 style='margin-left:0in;text-indent:0in'><a name="_Toc81224824">2 Quick
-Start</a></h1>
-
-<p class=MsoNormal>The MUSCLE algorithm is delivered as a command-line program
-called <i>muscle</i>. If you are running under Linux or Unix you will be
-working at a shell prompt. If you are running under Windows, you should be in a
-command window (nostalgically known to us older people as a DOS prompt). If you
-don't know how to use command-line programs, you should get help from a local
-guru.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224825">2.1 Installation</a></h2>
-
-<p class=MsoNormal>Copy the <i>muscle</i> binary file to a directory that is
-accessible from your computer. That's itthere are no configuration files,
-libraries, environment variables or other settings to worry about. If you are
-using Windows, then the binary file is named <i>muscle.exe</i>. From now on <i>muscle</i>
-should be understood to mean "<i>muscle</i> if you are using Linux or Unix,
-<i>muscle.exe</i> if you are using Windows".</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224826">2.2 Making an
-alignment</a></h2>
-
-<p class=MsoNormal>Make a FASTA file containing some sequences. (If you are not
-familiar with FASTA format, it is described in detail later in this Guide.) For
-now, just to make things fast, limit the number of sequence in the file to no
-more than 50 and the sequence length to be no more than 500. Call the input
-file <i>seqs.fa</i>. (An example file named <i>seqs.fa</i> is distributed with
-the standard MUSCLE package). Make sure the directory containing the <i>muscle</i>
-binary is in your path. (If it isn't, you can run it by typing the full path
-name, and the following example command lines must be changed accordingly). Now
-type:</p>
-
-<p class=MsoNormal> </p>
-
-<p class=Code>muscle -in seqs.fa -out seqs.afa</p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal>You should see some progress messages. If <i>muscle</i>
-completes successfully, it will create a file <i>seqs.afa</i> containing the
-alignment. By default, output is created in "aligned FASTA" format
-(hence the <i>.afa</i> extension). This is just like regular FASTA except that
-gaps are added in order to align the sequences. This is a nice format for
-computers but not very readable for people, so to look at the alignment you
-will want an alignment viewer such as Belvu, or a script that converts FASTA to
-a more readable format. You can also use the <i>msf</i> command-line option to
-request output in MSF format, which is easier to understand for people. If <i>muscle</i>
-gives an error message and you don't know how to fix it, please read the
-Troubleshooting section.</p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal>The default settings are designed to give the best accuracy,
-so this may be all you need to know.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224827">2.3 Large
-alignments</a></h2>
-
-<p class=MsoNormal>If you have a large number of sequences (a few thousand), or
-they are very long, then the default settings of may be too slow for practical
-use. A good compromise between speed and accuracy is to run just the first two
-iterations of the algorithm. On average, this gives accuracy equal to T-Coffee
-and speeds much faster than CLUSTALW. This is done by the option <i>maxiters 2</i>,
-as in the following example.</p>
-
-<p class=MsoNormal> </p>
-
-<p class=Code>muscle -in seqs.fa -out seqs.afa -maxiters 2</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224828">2.4 Fastest
-speed</a></h2>
-
-<p class=MsoNormal>If you want the fastest possible speed, then the following
-example shows the applicable options for proteins.</p>
-
-<p class=MsoNormal> </p>
-
-<p class=Code>muscle -in seqs.fa -out seqs.afa -maxiters 1 -diags1 -sv -distance1
-kbit20_3</p>
-
-<p class=Code> </p>
-
-<p class=MsoNormal>For nucleotides, use:</p>
-
-<p class=MsoNormal> </p>
-
-<p class=Code>muscle -in seqs.fa -out seqs.afa -maxiters 1 -diags1</p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal>At the time of writing, <i>muscle</i> with these options is faster
-than any other multiple sequence alignment program that I have tested. The
-alignments are not bad, especially when the sequences are closely related.
-However, as you might expect, this blazing speed comes at the cost of the
-lowest average accuracy of the options that <i>muscle</i> provides.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224829">2.5 Huge
-alignments</a></h2>
-
-<p class=MsoNormal>If you have a <i>very</i> large number of sequences (several
-thousand), or they are very long, then the <i>kbit20_3</i> option may cause
-problems because it needs a relatively large amount of memory. Better is to use
-the default distance measure, which is roughly 2× or 3× slower but needs less
-memory, like this:</p>
-
-<p class=MsoNormal> </p>
-
-<p class=Code>muscle -in seqs.fa -out seqs.afa -maxiters 1 -diags1 -sv</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224830">2.6 Accuracy:
-caveat emptor</a></h2>
-
-<p class=MsoNormal>Why do I keep using the clumsy phrase "average
-accuracy" instead of just saying "accuracy"? That's because the
-quality of alignments produced by MUSCLE varies, as do those produced other programs
-such as CLUSTALW and T-Coffee. The state of the art leaves plenty of room for
-improvement. Sometimes the fastest speed options to <i>muscle</i> give
-alignments that are better than T-Coffee, though the reverse will more often be
-the case. With challenging sets of sequences, it is a good idea to make several
-different alignments using different <i>muscle</i> options and to try other programs
-too. Regions where different alignments agree are more believable than regions
-where they disagree. </p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224831">2.7 Pipelining</a></h2>
-
-<p class=MsoNormal>Input can be taken from standard input, and output can be
-written to standard output. This is the default, so our first example would
-also work like this:</p>
-
-<p class=MsoNormal> </p>
-
-<p class=Code>muscle < seqs.fa > seqs.afa</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224832">2.8 Refining
-an existing alignment</a></h2>
-
-<p class=MsoNormal>You can ask <i>muscle</i> to try to improve an existing alignment
-by using the <i>refine</i> option. The input file must then be a FASTA file
-containing an alignment. All sequences must be of equal length, gaps can be
-specified using dots "." or dashes "". For example:</p>
-
-<p class=MsoNormal> </p>
-
-<p class=Code>muscle -in seqs.afa -out refined.afa -refine</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224833">2.9 Profile-profile
-alignment</a></h2>
-
-<p class=MsoNormal>A fundamental step in the MUSCLE algorithm is aligning two
-multiple sequence alignments, each of which contain some of the input
-sequences. This operation is sometimes called "profile-profile
-alignment". If you have two existing alignments of related sequences you
-can use the <i>profile</i> option of MUSCLE to align those two sequences.
-Typical usage is:</p>
-
-<p class=MsoNormal> </p>
-
-<p class=Code>muscle -profile -in1 one.afa -in2 two.afa -out both.afa</p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal>The alignments in <i>one.afa</i> and <i>two.afa</i>, which
-must be in aligned FASTA format, are aligned to each other, keeping input
-columns intact and inserting columns of gaps where needed. Output is stored in <i>both.afa</i>.</p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal>MUSCLE does not compute a similarity measure or measure of
-statistical significance (such as an E-value), so this option is not useful for
-discriminating homologs from unrelated sequences. For this task, I recommend Sadreyev
-& Grishin's COMPASS program.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224834">2.10 Sequence
-clustering</a></h2>
-
-<p class=MsoNormal>The first stage in MUSCLE is a fast clustering algorithm.
-This may be of use in other applications. Typical usage is:</p>
-
-<p class=MsoNormal> </p>
-
-<p class=Code>muscle -cluster -in seqs.fa -tree1 tree.phy</p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal>The sequences will be clustered, and a tree written to <i>tree.phy</i>.
-Options <i>weight1</i>, <i>distance1</i>, <i>cluster1</i> and <i>root1</i> can
-be applied if desired. Note that by default, UPGMA clustering is used. You can
-use</p>
-
-<p class=MsoNormal> <i>neighborjoining</i> if you prefer, but note that this
-is substantially slower than UPGMA for large numbers of sequences.</p>
-
-<h1 style='margin-left:0in;text-indent:0in'><a name="_Toc81224835">3 File
-Formats</a></h1>
-
-<p class=MsoNormal>MUSCLE uses FASTA format for both input and output. For
-output only, it also offers CLUSTALW, MSF and HTML formats using the <i>clw</i>,
-<i>msf</i> and <i>html</i> command-line options.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224836">3.1 Input
-files</a></h2>
-
-<p class=MsoNormal>Input files must be in FASTA format. These are plain text
-files (word processing files such as Word documents are not understood!). Unix,
-Windows and DOS text files are supported (end-of-line may be NL or CR NL). There
-is no explicit limit on the length of a sequence, however if you are running a
-32-bit version of <i>muscle</i> then the maximum will be very roughly 10,000 letters
-due to maximum addressable size of tables required in memory. Each sequence starts
-with an annotation line, which is recognized by having a greater-than symbol
-">" as its first character. There is no limit on the length of an
-annotation line (this is new as of version 3.5), and there is no requirement
-that the annotation be unique. The sequence itself follows on one or more
-subsequent lines, and is terminated either by the next annotation line or by
-the end of the file. </p>
-
-<h3 style='margin-left:0in;text-indent:0in'><a name="_Toc81224837">3.1.1 Amino
-acid sequences</a></h3>
-
-<p class=MsoNormal>The standard single-letter amino acid alphabet is used. Upper
-and lower case is allowed, the case is not significant. The special characters
-X, B, Z and U are understood. X means "unknown amino acid", B is D or
-N, Z is E or Q. U is understood to be the 21st amino acid Selenocysteine. White
-space (spaces, tabs and the end-of-line characters CR and NL) is allowed inside
-sequence data. Dots "." and dashes "" in sequences are
-allowed and are discarded unless the input is expected to be aligned (e.g. for
-the <i>refine</i> option). </p>
-
-<h3 style='margin-left:0in;text-indent:0in'><a name="_Toc81224838">3.1.2 Nucleotide
-sequences</a></h3>
-
-<p class=MsoNormal>The usual letters A, G, C, T and U stand for nucleotides.
-The letters T and U are equivalent as far as MUSCLE is concerned. N is the
-wildcard meaning "unknown nucleotide". R means A or G, Y means C or
-T/U. Other wildcards, such as those used by RFAM, are not understood in this
-version and will be replaced by Ns. If you would like support for other DNA /
-RNA alphabets, please let me know.</p>
-
-<h3 style='margin-left:0in;text-indent:0in'><a name="_Toc81224839">3.1.3 Determining
-sequence type</a></h3>
-
-<p class=MsoNormal>By default, MUSCLE looks at the first 100 letters in the
-input sequence data (excluding gaps). If 95% or more of those letters are valid
-nucleotides (AGCTUN), then the file is treated as nucleotides, otherwise as
-amino acids. This method almost always guesses correctly, but you can make sure
-by specifying the sequence type on the command line. This is done using the <i>seqtype</i>
-option, which can take the following values:</p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal> <i>seqtype protein</i> Amino
-acid</p>
-
-<p class=MsoNormal> s<i>eqtype nucleo</i> Nucleotide</p>
-
-<p class=MsoNormal> <i>seqtype auto</i> Automatic
-detection (default).</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224840">3.2 Output
-files</a></h2>
-
-<p class=MsoNormal>By default, output is also written in FASTA format. All
-letters are upper-case and gaps are represented by dashes "".</p>
-
-<h3 style='margin-left:0in;text-indent:0in'><a name="_Toc81224841">3.2.1 Sequence
-grouping</a></h3>
-
-<p class=MsoNormal>By default, MUSCLE re-arranges sequences so that similar
-sequences are adjacent in the output file. (This is done by ordering sequences
-according to a prefix traversal of the guide tree). This makes the alignment
-easier to evaluate by eye. If you want to the sequences to be output in the
-same order as the input file, you can use the <i>stable</i> option.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224842">3.3 CLUSTALW
-format</a></h2>
-
-<p class=MsoNormal>You can request CLUSTALW output by using the <i>clw</i>
-option. This should be compatible with CLUSTALW, with the exception of the
-program name in the file header. You can ask MUSCLE to impersonate CLUSTALW by
-writing "CLUSTAL W (1.81)" as the program name by using <i>clwstrict</i>.
-If you have problems parsing MUSCLE output with scripts designed for CLUSTALW,
-please let me know.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224843">3.4 MSF format</a></h2>
-
-<p class=MsoNormal>MSF format (similar to CLUSTALW) is requested by using the <i>msf</i>
-option. As with CLUSTALW format, this is easier for people to read than FASTA.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224844">3.5 HTML
-format</a></h2>
-
-<p class=MsoNormal>I've added an experimental feature starting in version 3.4. To
-get a Web page as output, use the <i>html</i> option. The alignment is colored
-using a color scheme from Eric Sonnhammer's Belvu editor, which is my personal
-favorite. A drawback of this option is that the Web page typically contains a
-very large number of HTML tags, which can be slow to display in the Internet
-Explorer browser. The Netscape browser works much better. If you have any ideas
-about good ways to make Web pages, please let me know.</p>
-
-<h1 style='margin-left:0in;text-indent:0in'><a name="_Toc81224845">4 Using
-MUSCLE</a></h1>
-
-<p class=MsoNormal>In this section we give more details of the MUSCLE algorithm
-and the more important options offered by the <i>muscle</i> implementation.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224846">4.1 How the
-algorithm works</a></h2>
-
-<p class=MsoNormal>We won't give a complete description of the MUSCLE algorithm
-herefor that, you will have to read the paper. But hopefully a summary will
-help explain what some of the command-line options do and how they might be
-useful in your work.</p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal>The first step is to calculate a tree. In CLUSTALW, this is
-done as follows. Each pair of input sequences is aligned, and used to compute
-the pair-wise identity of the pair. Identities are converted to a measure of
-distance. Finally, the distance matrix is converted to a tree using a
-clustering method (CLUSTALW uses neighbor-joining). If you have 1,000
-sequences, there are (1,000 <span style='font-family:Symbol'>´</span> 999)/2 =
-499,500 pairs, so aligning every pair can take a while. MUSCLE uses a much
-faster, but somewhat more approximate, method to compute distances: it counts
-the number of short sub-sequences (known as <i>k</i>-mers, <i>k</i>-tuples or
-words) that two sequences have in common, without constructing an alignment.
-This is typically around 3,000 times faster that CLUSTALW's method, but the trees
-will generally be less accurate. We call this step "<i>k</i>-mer
-clustering".</p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal>The second step is to use the tree to construct what is
-known as a progressive alignment. At each node of the binary tree, a pair-wise
-alignment is constructed, progressing from the leaves towards the root. The
-first alignment will be made from two sequences. Later alignments will be one
-of the three following types: sequence-sequence, profile-sequence or
-profile-profile, where "profile" means the multiple alignment of the sequences
-under a given internal node of the tree. This is very similar to what CLUSTALW
-does once it has built a tree.</p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal style='page-break-after:avoid'>Now we have a multiple
-alignment, which has been built very quickly compared with conventional methods,
-mainly because of the distance calculation using <i>k</i>-mers rather than
-alignments. The quality of this alignment is typically pretty goodit will
-often tie or beat a T-Coffee alignment on our tests. However, on average, we
-find that it can be improved by proceeding through the following steps.</p>
-
-<p class=MsoNormal style='page-break-after:avoid'> </p>
-
-<p class=MsoNormal style='page-break-after:avoid'>From the multiple alignment,
-we can now compute the pair-wise identities of each pair of sequences. This
-gives us a new distance matrix, from which we estimate a new tree. We compare
-the old and new trees, and re-align subgroups where needed to produce a
-progressive multiple alignment from the new tree. If the two trees are
-identical, there is nothing to do; if there are no subtrees that agree (very unusual),
-then the whole progressive alignment procedure must be repeated from scratch.
-Typically we find that the tree is pretty stable near the leaves, but some
-re-alignments are needed closer the root. This procedure (compute pair-wise
-identities, estimate new tree, compare trees, re-align) is iterated until the
-tree stabilizes or until a specified maximum number of iterations has been
-done. We call this process "tree refinement", although it also tends
-to improve the alignment.</p>
-
-<p class=MsoNormal style='page-break-after:avoid'> </p>
-
-<p class=MsoNormal style='page-break-after:avoid'>We now keep the tree fixed
-and move to a new procedure which is designed to improve the multiple
-alignment. The set of sequences is divided into two subsets (i.e., we make a
-bipartition on the set of sequences). A profile is constructed for each of the
-two subsets based on the current multiple alignment. These two profiles are
-then re-aligned to each other using the same pair-wise alignment algorithm as
-used in the progressive stage. If this improves an "objective score"
-that measures the quality of the alignment, then the new multiple alignment is
-kept, otherwise it is discarded. By default, the objective score is the classic
-sum-of-pairs score that takes the (sequence weighted) average of the pair-wise
-alignment score of every pair of sequences in the alignment. Bipartitions are
-chosen by deleting an edge in the guide tree, each of the two resulting
-subtrees defines a subset of sequences. This procedure is called "tree
-dependent refinement". One iteration of tree dependent refinement tries
-bipartitions produced by deleting every edge of the tree in depth order moving
-from the leaves towards the center of the tree. Iterations continue until
-convergence or up to a specified maximum.</p>
-
-<p class=MsoNormal style='page-break-after:avoid'> </p>
-
-<p class=MsoNormal style='page-break-after:avoid'>For convenience, the major
-steps in MUSCLE are described as "iterations", though the first three
-iterations all do quite different things and may take very different lengths of
-time to complete. The tree-dependent refinement iterations 3, 4 ... are true
-iterations and will take similar lengths of time. </p>
-
-<p class=MsoNormal style='page-break-after:avoid'> </p>
-
-<table class=MsoTableGrid border=0 cellspacing=0 cellpadding=0
- style='border-collapse:collapse'>
- <tr>
- <td width=89 valign=top style='width:66.6pt;border:none;border-bottom:solid windowtext 1.0pt;
- padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><b><span style='font-size:
- 9.0pt;font-family:Arial'>Iteration</span></b></p>
- </td>
- <td width=461 valign=top style='width:4.8in;border:none;border-bottom:solid windowtext 1.0pt;
- padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><b><span style='font-size:
- 9.0pt;font-family:Arial'>Actions</span></b></p>
- </td>
- </tr>
- <tr>
- <td width=89 valign=top style='width:66.6pt;border:none;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>1</p>
- </td>
- <td width=461 valign=top style='width:4.8in;border:none;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Distance matrix by <i>k</i>-mer
- clustering, estimate tree, progressive alignment according to this tree.</p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- </td>
- </tr>
- <tr>
- <td width=89 valign=top style='width:66.6pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>2</p>
- </td>
- <td width=461 valign=top style='width:4.8in;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Distance matrix by
- pair-wise identities from current multiple alignment, estimate tree,
- progressive alignment according to new tree, repeat until convergence or specified
- maximum number of times.</p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- </td>
- </tr>
- <tr>
- <td width=89 valign=top style='width:66.6pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>3, 4 ...</p>
- </td>
- <td width=461 valign=top style='width:4.8in;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Tree-dependent refinement. One
- iteration visits every edge in the tree one time.</p>
- </td>
- </tr>
-</table>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224847">4.2 Command-line
-options</a></h2>
-
-<p class=MsoNormal>There are two types of command-line options: value options
-and flag options. Value options are followed by the value of the given
-parameter, for example <i>in <filename></i>; flag options just stand for
-themselves, such as <i>msf</i>. All options are a dash (not two dashes!)
-followed by a long name; there are no single-letter equivalents. Value options
-must be separated from their values by white space in the command line. Thus, <i>muscle</i>
-does not follow Unix, Linux or Posix standards, for which we apologize. The
-order in which options are given is irrelevant unless two options contradict,
-in which case the right-most option silently wins.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224848">4.3 The
-maxiters option</a></h2>
-
-<p class=MsoNormal>You can control the number of iterations that MUSCLE does by
-specifying the <i>maxiters</i> option. If you specify 1, 2 or 3, then this is
-exactly the number of iterations that will be performed. If the value is
-greater than 3, then <i>muscle</i> will continue up to the maximum you specify
-or until convergence is reached, which ever happens sooner. The default is 16.
-If you have a large number of sequences, refinement may be rather slow.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224849">4.4 The maxtrees
-option</a></h2>
-
-<p class=MsoNormal>This option controls the maximum number of new trees to
-create in iteration 2. Our experience suggests that a point of diminishing
-returns is typically reached after the first tree, so the default value is 1.
-If a larger value is given, the process will repeat until convergence or until
-this number of trees has been created, which ever comes first.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224850">4.5 The maxhours
-option</a></h2>
-
-<p class=MsoNormal>If you have a large alignment, <i>muscle</i> may take a long
-time to complete. It is sometimes convenient to say "I want the best
-alignment I can get in 24 hours" rather than specifying a set of options
-that will take an unknown length of time. This is done by using <i>maxhours</i>,
-which specifies a floating-point number of hours. If this time is exceeded, <i>muscle</i>
-will write out current alignment and stop. For example,</p>
-
-<p class=MsoNormal> </p>
-
-<p class=Code>muscle -in huge.fa -out huge.afa -maxiters 9999 -maxhours 24.0</p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal>Note that the actual time may exceed the specified limit by
-a few minutes while <i>muscle</i> finishes up on a step. It is also possible
-for no alignment to be produced if the time limit is too small.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224851">4.6 The
-profile scoring function</a></h2>
-
-<p class=MsoNormal>Three different protein profile scoring functions are
-supported, the log-expectation score (<i>le</i> option) and a sum of pairs
-score using either the PAM200 matrix (<i>sp</i>) or the VTML240 matrix (<i>sv</i>).
-The log-expectation score is the default as it gives better results on our
-tests, but is typically somewhere between two or three times slower than the
-sum-of-pairs score. For nucleotides, <i>spn</i> is currently the only option
-(which is of course the default for nucleotide data, so you don't need to
-specify this option).</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224852">4.7 Diagonal
-optimization</a></h2>
-
-<p class=MsoNormal>Creating a pair-wise alignment by dynamic programming
-requires computing an <i>L</i><sub>1</sub> <span style='font-family:Symbol'>´</span>
-<i>L</i><sub>2</sub> matrix, where <i>L</i><sub>1</sub> and <i>L</i><sub>2</sub>
-are the sequence lengths. A trick used in algorithms such as BLAST is to reduce
-the size of this matrix by using fast methods to find "diagonals",
-i.e. short regions of high similarity between the two sequences. This speeds up
-the algorithm at the expense of some reduction in accuracy. MUSCLE uses a
-technique called <i>k</i>-mer extension to find diagonals. It is disabled by
-default because of the slight reduction in average accuracy and can be turned
-on by specifying the <i>diags1</i> and <i>diags2</i> options.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224853">4.8 Anchor
-optimization</a></h2>
-
-<p class=MsoNormal>Tree-dependent refinement (iterations 3, 4 ... ) can be
-speeded up by dividing the alignment vertically into blocks. Block boundaries
-are found by identifying high-scoring columns (e.g., a perfectly conserved
-column of Cs or Ws would be a candidate). Each vertical block is then refined
-independently before reassembling the complete alignment, which is faster
-because of the <i>L</i><sup>2</sup> factor in dynamic programming (e.g.,
-suppose the alignment is split into two vertical blocks, then 2 <span
-style='font-family:Symbol'>´</span> 0.5<sup>2</sup> = 0.5, so the dynamic
-programming time is roughly halved). The <i>noanchors</i> option is used to disable
-this feature. This option has no effect if <i>maxiters 1</i> or <i>maxiters 2</i>
-is specified. On benchmark tests, enabling anchors has little or no effect on
-accuracy, but if you want to be very conservative and are striving for the best
-possible accuracy then <i>noanchors</i> is a reasonable choice.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224854">4.9 Log file</a></h2>
-
-<p class=MsoNormal>You can specify a log file by using <i>log <filename></i>
-or <i>loga <filename></i>. Using <i>log</i> causes any existing file to
-be deleted, <i>loga</i> appends to any existing file. A message will be
-written to the log file when <i>muscle</i> starts and stops. Error and warning
-messages will also be written to the log. If <i>verbose</i> is specified, then
-more information will be written, including the command line used to invoke <i>muscle</i>,
-the resulting internal parameter settings, and also progress messages. The
-content and format of verbose log file output is subject to change in future
-versions. </p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal>The use of a log file may seem contrary to Unix conventions for
-using standard output and standard error. I like these conventions, but never
-found a fully satisfactory way to use them. I like progress messages (see
-below), but they mess up a file if you re-direct standard error and there are
-errors or warning messages too. I could try to detect whether a standard file
-handle is a <i>tty</i> device or a disk file and change behavior accordingly,
-but I regard this as too complicated and too hard for the user to understand. On
-Windows it can be hard to re-direct standard file handles, especially when
-working in a GUI debugger. Maybe one day I will figure out a better solution
-(suggestions welcomed).</p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal>I highly recommend using <i>verbose </i>and <i>log[a]</i>,
-especially when running <i>muscle</i> in a batch mode. This enables you to
-verify whether a particular alignment was completed and to review any errors or
-warnings that occurred.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224855">4.10 Progress
-messages</a></h2>
-
-<p class=MsoNormal>By default, <i>muscle</i> writes progress messages to
-standard error periodically so that you know it's doing something and get some
-feedback about the time and memory requirements for the alignment. Here is a
-typical progress message.</p>
-
-<p class=MsoNormal> </p>
-
-<p class=Code>00:00:23 25 Mb Iter 2 87.20% Build guide tree</p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal style='page-break-after:avoid'>The fields are as follows.</p>
-
-<p class=MsoNormal style='page-break-after:avoid'> </p>
-
-<table class=MsoTableGrid border=0 cellspacing=0 cellpadding=0
- style='border-collapse:collapse'>
- <tr>
- <td width=118 valign=top style='width:88.2pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=Code style='page-break-after:avoid'>00:00:23</p>
- </td>
- <td width=408 valign=top style='width:306.05pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Elapsed time since <i>muscle</i>
- started.</p>
- </td>
- </tr>
- <tr>
- <td width=118 valign=top style='width:88.2pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=Code style='page-break-after:avoid'>25 Mb</p>
- </td>
- <td width=408 valign=top style='width:306.05pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Peak memory use in megabytes
- (i.e., not the current usage, but the maximum amount of memory used since <i>muscle</i>
- started).</p>
- </td>
- </tr>
- <tr>
- <td width=118 valign=top style='width:88.2pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=Code style='page-break-after:avoid'>Iter 2</p>
- </td>
- <td width=408 valign=top style='width:306.05pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Iteration currently in
- progress.</p>
- </td>
- </tr>
- <tr>
- <td width=118 valign=top style='width:88.2pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=Code style='page-break-after:avoid'>87.20%</p>
- </td>
- <td width=408 valign=top style='width:306.05pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>How much of the current step
- has been completed (percentage).</p>
- </td>
- </tr>
- <tr>
- <td width=118 valign=top style='width:88.2pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=Code>Build...</p>
- </td>
- <td width=408 valign=top style='width:306.05pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>A brief description of the current step.</p>
- </td>
- </tr>
-</table>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal>The <i>quiet</i> command-line option disables writing
-progress messages to standard error. If the <i>verbose</i> command-line option
-is specified, a progress message will be written to the log file when each
-iteration completes. So <i>quiet</i> and <i>verbose</i> are not
-contradictory.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224856">4.11 Running
-out of memory</a></h2>
-
-<p class=MsoNormal>The <i>muscle</i> code tries to deal gracefully with
-low-memory conditions by using the following technique. A block of "emergency
-reserve" memory is allocated when <i>muscle</i> starts. If a later request
-to allocate memory fails, this reserve block is made available, and <i>muscle</i>
-attempts to save the current alignment. With luck, the reserved memory will be
-enough to allow <i>muscle</i> to save the alignment and exit gracefully with an
-informative error message.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224857">4.12 Troubleshooting</a></h2>
-
-<p class=MsoNormal>Here is some general advice on what to do if <i>muscle</i>
-fails and you don't understand what happened. The code is designed to fail
-gracefully with an informative error message when something goes wrong, but
-there will no doubt be situations I haven't anticipated (not to mention bugs).</p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal>Check the MUSCLE web site for updates, bug reports and other
-relevant information.</p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal style='page-break-after:avoid'> <a
-href="http://www.drive5.com/muscle">http://www.drive5.com/muscle</a></p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal>Check the input file to make sure it is in valid FASTA format.
-Try giving it to another sequence analysis program that can accept large FASTA
-files (e.g., the NCBI <i>formatdb</i> utility) to see if you get an informative
-error message. Try dividing the file into two halves and using each half
-individually as input. If one half fails and the other does not, repeat until
-the problem is localized as far as possible.</p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal>Use <i>log</i> or <i>loga</i> and <i>verbose</i> and
-check the log file to see if there are any messages that give you a hint about the
-problem. Look at the peak memory requirements (reported in progress messages)
-to see if you may be exceeding the physical or virtual memory capacity of your
-computer.</p>
-
-<p class=MsoNormal> </p>
-
-<p class=MsoNormal>If <i>muscle</i> crashes without giving an error message, or
-hangs, then you may need to refer to the source code or use a debugger. A
-"debug" version, <i>muscled</i>, may be provided. This is built from
-the same source code but with the DEBUG macro defined and without compiler
-optimizations. This version runs much more slowly (perhaps by a factor of three
-or more), but does a lot more internal checking and may be able to catch
-something that is going wrong in the code. The <i>core</i> option specifies
-that <i>muscle</i> should not catch exceptions. When <i>core</i> is specified,
-an exception may result in a debugger trap or a core dump, depending on the
-execution environment. The <i>nocore</i> option has the opposite effect. In <i>muscle</i>,
-<i>nocore</i> is the default, <i>core</i> is the default in <i>muscled</i>.</p>
-
-<h2 style='margin-left:0in;text-indent:0in'><a name="_Toc81224858">4.13 Technical
-support</a></h2>
-
-<p class=MsoNormal>I am happy to provide support. But I am busy, and am
-offering this program at no charge, so I ask you to make a reasonable effort to
-figure things out for yourself before contacting me.</p>
-
-<h1 style='margin-left:0in;text-indent:0in'><a name="_Toc81224859">5 Command Line
-Reference</a></h1>
-
-<p class=MsoNormal style='page-break-after:avoid'> </p>
-
-<table class=MsoTableGrid border=0 cellspacing=0 cellpadding=0
- style='border-collapse:collapse'>
- <thead>
- <tr>
- <td width=110 valign=top style='width:82.85pt;border:none;border-bottom:
- solid windowtext 1.0pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><b><span style='font-size:
- 9.0pt;font-family:Arial'>Value option</span></b></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;border:none;border-bottom:solid windowtext 1.0pt;
- padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><b><span style='font-size:
- 9.0pt;font-family:Arial'>Legal values</span></b></p>
- </td>
- <td width=103 valign=top style='width:77.35pt;border:none;border-bottom:
- solid windowtext 1.0pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><b><span style='font-size:
- 9.0pt;font-family:Arial'>Default</span></b></p>
- </td>
- <td width=264 valign=top style='width:197.8pt;border:none;border-bottom:
- solid windowtext 1.0pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><b><span style='font-size:
- 9.0pt;font-family:Arial'>Description</span></b></p>
- </td>
- </tr>
- </thead>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>anchorspacing</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Integer</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>32</span></p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Minimum spacing between
- anchor columns.</p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>center</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Floating point</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>[1]</p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Center parameter. Should be
- negative.</p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>cluster1</span></p>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>cluster2</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>upgma</span></p>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>upgmb</span></p>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>neighborjoining</span></p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>upgmb</span></p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Clustering method. cluster1
- is used in iteration 1 and 2, cluster2 in later iterations.</p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>diaglength</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Integer</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>24</span></p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Minimum length of diagonal.</p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>diagmargin</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Integer</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>5</span></p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Discard this many positions
- at ends of diagonal.</p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>distance1</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'> </span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>kmer6_6</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>kmer20_3</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>kmer20_4</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>kbit20_3</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>kmer4_6</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'> </span></p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>Kmer6_6
- (amino) or Kmer4_6 (nucleo)</span></p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Distance measure for iteration 1.</p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>distance2</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'> </span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>kmer6_6</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>kmer20_3</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>kmer20_4</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>kbit20_3</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>pctid_kimura</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>pctid_log</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'> </span></p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>pctid_kimura</span></p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Distance measure for iterations 2, 3 ...</p>
- <p class=MsoNormal> </p>
- <p class=MsoNormal> </p>
- <p class=MsoNormal> </p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>gapopen</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Floating point</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>[1]</p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>The gap open score. Must be negative.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>hydro</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Integer</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>5</span></p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Window size for determining whether a region is
- hydrophobic.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>hydrofactor</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Floating point</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>1.2</span></p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Multiplier for gap open/close penalties in hydrophobic
- regions.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>in</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Any file name</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>standard input</p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Where to find the input sequences.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>log</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>File name</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>None.</p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Log file name (delete existing file).</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>loga</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>File name</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>None.</p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Log file name (append to existing file).</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>maxdiagbreak</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Integer</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>1</p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Maximum distance between two diagonals that allows them to
- merge into one diagonal.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>maxhours</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Floating point</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>None.</p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Maximum time to run in hours. The actual time may exceed
- the requested limit by a few minutes. Decimals are allowed, so 1.5 means one
- hour and 30 minutes.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>maxiters</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Integer 1, 2 ...</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>16</span></p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Maximum number of iterations.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>maxtrees</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Integer</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>1</span></p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Maximum number of new trees to build in iteration 2.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>minbestcolscore</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Floating point </p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>[1]</p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Minimum score a column must
- have to be an anchor.</p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>minsmoothscore</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Floating point</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>[1]</p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Minimum smoothed score a
- column must have to be an anchor.</p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>objscore</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>sp</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>ps</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>dp</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>xp</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>spf</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>spm</span></p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>spm</span></p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Objective score used by tree dependent refinement.</p>
- <p class=MsoNormal>sp=sum-of-pairs score.</p>
- <p class=MsoNormal>spf=sum-of-pairs score (dimer approximation)</p>
- <p class=MsoNormal>spm=sp for < 100 seqs, otherwise spf</p>
- <p class=MsoNormal>dp=dynamic programming score.</p>
- <p class=MsoNormal>ps=average profile-sequence score.</p>
- <p class=MsoNormal>xp=cross profile score.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>out</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>File name</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>standard output</p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Where to write the alignment.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>root1</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>root2</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>pseudo</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>midlongestspan</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>minavgleafdist</span></p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>psuedo</span></p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Method used to root tree; root1 is used in iteration 1 and
- 2, root2 in later iterations.</p>
- <p class=MsoNormal> </p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>seqtype</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>protein</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>nucleo</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>auto</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'> </span></p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>auto</span></p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Sequence type.</p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>smoothscoreceil</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Floating point</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>[1]</p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Maximum value of column score for smoothing purposes.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>smoothwindow</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Integer</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>7</span></p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Window used for anchor column smoothing.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>SUEFF</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Floating point value between 0 and 1.</p>
- <p class=MsoNormal> </p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>0.1</span></p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Constant used in UPGMB clustering. Determines the relative
- fraction of average linkage (SUEFF) vs. nearest-neighbor linkage (1 SUEFF).<br>
- <br>
- </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>tree1</span></p>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>tree2</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>File name</p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>None</p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Save tree produced in first or second iteration to given
- file in Newick (Phylip-compatible) format.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr>
- <td width=110 valign=top style='width:82.85pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>weight1</span></p>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>weight2</span></p>
- </td>
- <td width=113 valign=top style='width:84.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>none</span></p>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>henikoff</span></p>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>henikoffpb</span></p>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>gsc</span></p>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>clustalw</span></p>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>threeway</span></p>
- </td>
- <td width=103 valign=top style='width:77.35pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>clustalw</span></p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- </td>
- <td width=264 valign=top style='width:197.8pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Sequence weighting scheme.</p>
- <p class=MsoNormal style='page-break-after:avoid'>weight1 is used in
- iterations 1 and 2.</p>
- <p class=MsoNormal style='page-break-after:avoid'>weight2 is used for
- tree-dependent refinement.</p>
- <p class=MsoNormal style='page-break-after:avoid'>none=all sequences have
- equal weight.</p>
- <p class=MsoNormal style='page-break-after:avoid'>henikoff=Henikoff &
- Henikoff weighting scheme.</p>
- <p class=MsoNormal style='page-break-after:avoid'>henikoffpb=Modified
- Henikoff scheme as used in PSI-BLAST.</p>
- <p class=MsoNormal style='page-break-after:avoid'>clustalw=CLUSTALW method.</p>
- <p class=MsoNormal style='page-break-after:avoid'>threeway=Gotoh three-way
- method.</p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- </td>
- </tr>
-</table>
-
-<p class=MsoNormal> </p>
-
-<table class=MsoTableGrid border=0 cellspacing=0 cellpadding=0
- style='border-collapse:collapse'>
- <thead>
- <tr>
- <td width=137 valign=top style='width:102.6pt;border:none;border-bottom:
- solid windowtext 1.0pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><b><span style='font-size:
- 9.0pt;font-family:Arial'>Flag option</span></b></p>
- </td>
- <td width=106 valign=top style='width:1.1in;border:none;border-bottom:solid windowtext 1.0pt;
- padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><b><span style='font-size:
- 9.0pt;font-family:Arial'>Set by default?</span></b></p>
- </td>
- <td width=348 valign=top style='width:261.0pt;border:none;border-bottom:
- solid windowtext 1.0pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><b><span style='font-size:
- 9.0pt;font-family:Arial'>Description</span></b></p>
- </td>
- </tr>
- </thead>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>anchors</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'>yes</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Use anchor optimization in
- tree dependent refinement iterations.</p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>clw</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'>no</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Write output in CLUSTALW
- format (default is FASTA).</p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>clwstrict</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'>no</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Write output in CLUSTALW
- format with the "CLUSTAL W (1.81)" header rather than the MUSCLE
- version. This is useful when a post-processing step is picky about the file
- header.</p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>core</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'>yes in muscle,</p>
- <p class=MsoNormal style='page-break-after:avoid'>no in muscled.</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Do not catch exceptions.</p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>fasta</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'>yes</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Write output in FASTA
- format. Alternatives include <i>clw</i>,</p>
- <p class=MsoNormal style='page-break-after:avoid'><i>clwstrict, msf</i> and
- <i>html</i>.</p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>group</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'>yes</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Group similar sequences
- together in the output. This is the default. See also <i>stable</i>.</p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>html</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'>no</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'>Write output in HTML format
- (default is FASTA).</p>
- <p class=MsoNormal style='page-break-after:avoid'> </p>
- </td>
- </tr>
- <tr>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>le</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>maybe</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt'>
- <p class=MsoNormal>Use log-expectation profile score (VTML240). Alternatives
- are to use <i>sp</i> or <i>sv</i>. This is the default for amino acid
- sequences.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>msf</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>no</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>Write output in MSF format (default is FASTA).</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>noanchors</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>no</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>Disable anchor optimization. Default is <i>anchors</i>.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>nocore</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>no in muscle,</p>
- <p class=MsoNormal>yes in muscled.</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>Catch exceptions and give an error message if possible.</p>
- <p class=MsoNormal> </p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>quiet</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>no</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>Do not display progress messages.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>refine</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>no</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>Input file is already aligned, skip first two iterations
- and begin tree dependent refinement.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>sp</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>no</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>Use sum-of-pairs protein profile score (PAM200). Default
- is <i>le.</i></p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>spn</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>maybe</p>
- <p class=MsoNormal> </p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>Use sum-of-pairs nucleotide profile score (BLASTZ
- parameters). This is the only option for nucleotides, and is therefore the
- default.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal style='page-break-after:avoid'><span style='font-size:
- 8.0pt;font-family:"Courier New"'>stable</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>no</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>Preserve input order of sequences in output file. Default
- is to group sequences by similarity (<i>group</i>).</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>sv</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>no</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>Use sum-of-pairs profile score (VTML240). Default is <i>le.</i></p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>termgapsfull</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>no</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>Terminal gaps penalized with full penalty.</p>
- <p class=MsoNormal>[1] Not fully supported in this version.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>termgapshalf</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>yes</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>Terminal gaps penalized with half penalty.</p>
- <p class=MsoNormal>[1] Not fully supported in this version.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>termgapshalflonger</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>no</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>Terminal gaps penalized with half penalty if gap relative
- to </p>
- <p class=MsoNormal>longer sequence, otherwise with full penalty.</p>
- <p class=MsoNormal>[1] Not fully supported in this version.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>verbose</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>no</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>Write parameter settings and progress messages to log
- file.</p>
- <p class=MsoNormal> </p>
- </td>
- </tr>
- <tr style='height:6.25pt'>
- <td width=137 valign=top style='width:102.6pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal><span style='font-size:8.0pt;font-family:"Courier New"'>version</span></p>
- </td>
- <td width=106 valign=top style='width:1.1in;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>no</p>
- </td>
- <td width=348 valign=top style='width:261.0pt;padding:0in 5.4pt 0in 5.4pt;
- height:6.25pt'>
- <p class=MsoNormal>Write version string to stdout and exit.</p>
- </td>
- </tr>
-</table>
-
-<p class=MsoNormal><i> </i></p>
-
-<p class=MsoNormal><i>Notes</i></p>
-
-<p class=MsoNormal>[1] Default depends on the profile scoring function. To
-determine the default, use <i>verbose log</i> and check the log file.</p>
-
-<p class=MsoNormal><u><span style='text-decoration:none'> </span></u></p>
-
-</div>
-
-</body>
-
-</html>
Deleted: trunk/packages/muscle/trunk/muscleout.cpp
===================================================================
--- trunk/packages/muscle/trunk/muscleout.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/muscleout.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,109 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "params.h"
-#include "textfile.h"
-
-static void DoOutput(MSA &msa)
- {
- bool AnyOutput = false;
-
-// Flag options, at most one used (because only one -out filename)
- TextFile fileOut(g_pstrOutFileName, true);
- if (g_bFASTA)
- {
- msa.ToFASTAFile(fileOut);
- AnyOutput = true;
- }
- else if (g_bMSF)
- {
- msa.ToMSFFile(fileOut);
- AnyOutput = true;
- }
- else if (g_bAln)
- {
- msa.ToAlnFile(fileOut);
- AnyOutput = true;
- }
- else if (g_bHTML)
- {
- msa.ToHTMLFile(fileOut);
- AnyOutput = true;
- }
- else if (g_bPHYI)
- {
- msa.ToPhyInterleavedFile(fileOut);
- AnyOutput = true;
- }
- else if (g_bPHYS)
- {
- msa.ToPhySequentialFile(fileOut);
- AnyOutput = true;
- }
-
-// If -out option was given but no flags, output as FASTA
- if (!AnyOutput && strcmp(g_pstrOutFileName, "-") != 0)
- msa.ToFASTAFile(fileOut);
-
- fileOut.Close();
-
-// Value options
- if (g_pstrFASTAOutFileName)
- {
- TextFile File(g_pstrFASTAOutFileName, true);
- msa.ToFASTAFile(File);
- }
-
- if (g_pstrMSFOutFileName)
- {
- TextFile File(g_pstrMSFOutFileName, true);
- msa.ToMSFFile(File);
- }
-
- if (g_pstrClwOutFileName)
- {
- TextFile File(g_pstrClwOutFileName, true);
- msa.ToAlnFile(File);
- }
-
- if (g_pstrClwStrictOutFileName)
- {
- g_bClwStrict = true;
- TextFile File(g_pstrClwStrictOutFileName, true);
- msa.ToAlnFile(File);
- }
-
- if (g_pstrHTMLOutFileName)
- {
- TextFile File(g_pstrHTMLOutFileName, true);
- msa.ToHTMLFile(File);
- }
-
- if (g_pstrPHYIOutFileName)
- {
- TextFile File(g_pstrPHYIOutFileName, true);
- msa.ToPhySequentialFile(File);
- }
-
- if (g_pstrPHYSOutFileName)
- {
- TextFile File(g_pstrPHYSOutFileName, true);
- msa.ToPhySequentialFile(File);
- }
-
- if (0 != g_pstrScoreFileName)
- WriteScoreFile(msa);
- }
-
-void MuscleOutput(MSA &msa)
- {
- MHackEnd(msa);
- if (g_bStable)
- {
- MSA msaStable;
- Stabilize(msa, msaStable);
- msa.Clear(); // save memory
- DoOutput(msaStable);
- }
- else
- DoOutput(msa);
- }
Deleted: trunk/packages/muscle/trunk/nucmx.cpp
===================================================================
--- trunk/packages/muscle/trunk/nucmx.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/nucmx.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,23 +0,0 @@
-#include "muscle.h"
-
-// BLASTZ default parameters
-// open 400, extend 30, matrix as below
-
-const float NUC_EXTEND = 30;
-const float NUC_SP_CENTER = 2*NUC_EXTEND;
-
-#define v(x) ((float) x + NUC_SP_CENTER)
-#define ROW(A, C, G, T) \
- { v(A), v(C), v(G), v(T) },
-
-float NUC_SP[32][32] =
- {
-// A C G T
-ROW( 91, -114, -31, -123) // A
-
-ROW( -114, 100, -125, -31) // C
-
-ROW( -31, -125, 100, -114) // G
-
-ROW( -123, -31, -114, 91) // T
- };
Deleted: trunk/packages/muscle/trunk/nwdasimple.cpp
===================================================================
--- trunk/packages/muscle/trunk/nwdasimple.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/nwdasimple.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,494 +0,0 @@
-#include "muscle.h"
-#include <math.h>
-#include "pwpath.h"
-#include "profile.h"
-#include <stdio.h>
-
-#define TRACE 0
-
-bool g_bKeepSimpleDP;
-SCORE *g_DPM;
-SCORE *g_DPD;
-SCORE *g_DPE;
-SCORE *g_DPI;
-SCORE *g_DPJ;
-char *g_TBM;
-char *g_TBD;
-char *g_TBE;
-char *g_TBI;
-char *g_TBJ;
-
-#if DOUBLE_AFFINE
-
-static char XlatEdgeType(char c)
- {
- if ('E' == c)
- return 'D';
- if ('J' == c)
- return 'I';
- return c;
- }
-
-static const char *LocalScoreToStr(SCORE s)
- {
- static char str[16];
- if (s < -100000)
- return " *";
- sprintf(str, "%6.1f", s);
- return str;
- }
-
-static void ListTB(const char *TBM_, const ProfPos *PA, const ProfPos *PB,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- Log(" ");
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = ' ';
- if (uPrefixLengthB > 0)
- c = ConsensusChar(PB[uPrefixLengthB - 1]);
- Log(" %4u:%c", uPrefixLengthB, c);
- }
- Log("\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- Log(" %6c", TBM(uPrefixLengthA, uPrefixLengthB));
- Log("\n");
- }
- }
-
-static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- Log(" ");
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = ' ';
- if (uPrefixLengthB > 0)
- c = ConsensusChar(PB[uPrefixLengthB - 1]);
- Log(" %4u:%c", uPrefixLengthB, c);
- }
- Log("\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));
- Log("\n");
- }
- }
-
-SCORE NWDASimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path)
- {
- assert(uLengthB > 0 && uLengthA > 0);
-
- const unsigned uPrefixCountA = uLengthA + 1;
- const unsigned uPrefixCountB = uLengthB + 1;
-
-// Allocate DP matrices
- const size_t LM = uPrefixCountA*uPrefixCountB;
- SCORE *DPL_ = new SCORE[LM];
- SCORE *DPM_ = new SCORE[LM];
- SCORE *DPD_ = new SCORE[LM];
- SCORE *DPE_ = new SCORE[LM];
- SCORE *DPI_ = new SCORE[LM];
- SCORE *DPJ_ = new SCORE[LM];
-
- char *TBM_ = new char[LM];
- char *TBD_ = new char[LM];
- char *TBE_ = new char[LM];
- char *TBI_ = new char[LM];
- char *TBJ_ = new char[LM];
-
- memset(TBM_, '?', LM);
- memset(TBD_, '?', LM);
- memset(TBE_, '?', LM);
- memset(TBI_, '?', LM);
- memset(TBJ_, '?', LM);
-
- DPM(0, 0) = 0;
- DPD(0, 0) = MINUS_INFINITY;
- DPE(0, 0) = MINUS_INFINITY;
- DPI(0, 0) = MINUS_INFINITY;
- DPJ(0, 0) = MINUS_INFINITY;
-
- DPM(1, 0) = MINUS_INFINITY;
- DPD(1, 0) = PA[0].m_scoreGapOpen;
- DPE(1, 0) = PA[0].m_scoreGapOpen2;
- TBD(1, 0) = 'D';
- TBE(1, 0) = 'E';
- DPI(1, 0) = MINUS_INFINITY;
- DPJ(1, 0) = MINUS_INFINITY;
-
- DPM(0, 1) = MINUS_INFINITY;
- DPD(0, 1) = MINUS_INFINITY;
- DPE(0, 1) = MINUS_INFINITY;
- DPI(0, 1) = PB[0].m_scoreGapOpen;
- DPJ(0, 1) = PB[0].m_scoreGapOpen2;
- TBI(0, 1) = 'I';
- TBJ(0, 1) = 'J';
-
-// Empty prefix of B is special case
- for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- DPM(uPrefixLengthA, 0) = MINUS_INFINITY;
-
- DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) + g_scoreGapExtend;
- DPE(uPrefixLengthA, 0) = DPE(uPrefixLengthA - 1, 0) + g_scoreGapExtend2;
-
- TBD(uPrefixLengthA, 0) = 'D';
- TBE(uPrefixLengthA, 0) = 'E';
-
- DPI(uPrefixLengthA, 0) = MINUS_INFINITY;
- DPJ(uPrefixLengthA, 0) = MINUS_INFINITY;
- }
-
-// Empty prefix of A is special case
- for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- DPM(0, uPrefixLengthB) = MINUS_INFINITY;
-
- DPD(0, uPrefixLengthB) = MINUS_INFINITY;
- DPE(0, uPrefixLengthB) = MINUS_INFINITY;
-
- DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) + g_scoreGapExtend;
- DPJ(0, uPrefixLengthB) = DPJ(0, uPrefixLengthB - 1) + g_scoreGapExtend2;
-
- TBI(0, uPrefixLengthB) = 'I';
- TBJ(0, uPrefixLengthB) = 'J';
- }
-
-// Special case to agree with NWFast, no D-I transitions so...
- DPD(uLengthA, 0) = MINUS_INFINITY;
- DPE(uLengthA, 0) = MINUS_INFINITY;
-// DPI(0, uLengthB) = MINUS_INFINITY;
-// DPJ(0, uLengthB) = MINUS_INFINITY;
-
-// ============
-// Main DP loop
-// ============
- SCORE scoreGapCloseB = MINUS_INFINITY;
- SCORE scoreGapClose2B = MINUS_INFINITY;
- for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- const ProfPos &PPB = PB[uPrefixLengthB - 1];
-
- SCORE scoreGapCloseA = MINUS_INFINITY;
- SCORE scoreGapClose2A = MINUS_INFINITY;
- for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- const ProfPos &PPA = PA[uPrefixLengthA - 1];
-
- {
- // Match M=LetterA+LetterB
- SCORE scoreLL = ScoreProfPos2(PPA, PPB);
- DPL(uPrefixLengthA, uPrefixLengthB) = scoreLL;
-
- SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1);
- SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseA;
- SCORE scoreEM = DPE(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapClose2A;
- SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseB;
- SCORE scoreJM = DPJ(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapClose2B;
-
- SCORE scoreBest;
- if (scoreMM >= scoreDM && scoreMM >= scoreEM && scoreMM >= scoreIM && scoreMM >= scoreJM)
- {
- scoreBest = scoreMM;
- TBM(uPrefixLengthA, uPrefixLengthB) = 'M';
- }
- else if (scoreDM >= scoreMM && scoreDM >= scoreEM && scoreDM >= scoreIM && scoreDM >= scoreJM)
- {
- scoreBest = scoreDM;
- TBM(uPrefixLengthA, uPrefixLengthB) = 'D';
- }
- else if (scoreEM >= scoreMM && scoreEM >= scoreDM && scoreEM >= scoreIM && scoreEM >= scoreJM)
- {
- scoreBest = scoreEM;
- TBM(uPrefixLengthA, uPrefixLengthB) = 'E';
- }
- else if (scoreIM >= scoreMM && scoreIM >= scoreDM && scoreIM >= scoreEM && scoreIM >= scoreJM)
- {
- scoreBest = scoreIM;
- TBM(uPrefixLengthA, uPrefixLengthB) = 'I';
- }
- else
- {
- assert(scoreJM >= scoreMM && scoreJM >= scoreDM && scoreJM >= scoreEM && scoreJM >= scoreIM);
- scoreBest = scoreJM;
- TBM(uPrefixLengthA, uPrefixLengthB) = 'J';
- }
- DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;
- }
-
- {
- // Delete D=LetterA+GapB
- SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) +
- PA[uPrefixLengthA-1].m_scoreGapOpen;
- SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + g_scoreGapExtend;
-
- SCORE scoreBest;
- if (scoreMD >= scoreDD)
- {
- scoreBest = scoreMD;
- TBD(uPrefixLengthA, uPrefixLengthB) = 'M';
- }
- else
- {
- assert(scoreDD >= scoreMD);
- scoreBest = scoreDD;
- TBD(uPrefixLengthA, uPrefixLengthB) = 'D';
- }
- DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;
- }
-
- {
- // Delete E=LetterA+GapB
- SCORE scoreME = DPM(uPrefixLengthA-1, uPrefixLengthB) +
- PA[uPrefixLengthA-1].m_scoreGapOpen2;
- SCORE scoreEE = DPE(uPrefixLengthA-1, uPrefixLengthB) + g_scoreGapExtend2;
-
- SCORE scoreBest;
- if (scoreME >= scoreEE)
- {
- scoreBest = scoreME;
- TBE(uPrefixLengthA, uPrefixLengthB) = 'M';
- }
- else
- {
- assert(scoreEE >= scoreME);
- scoreBest = scoreEE;
- TBE(uPrefixLengthA, uPrefixLengthB) = 'E';
- }
- DPE(uPrefixLengthA, uPrefixLengthB) = scoreBest;
- }
-
- // Insert I=GapA+LetterB
- {
- SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) +
- PB[uPrefixLengthB - 1].m_scoreGapOpen;
- SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + g_scoreGapExtend;
-
- SCORE scoreBest;
- if (scoreMI >= scoreII)
- {
- scoreBest = scoreMI;
- TBI(uPrefixLengthA, uPrefixLengthB) = 'M';
- }
- else
- {
- assert(scoreII > scoreMI);
- scoreBest = scoreII;
- TBI(uPrefixLengthA, uPrefixLengthB) = 'I';
- }
- DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;
- }
-
- // Insert J=GapA+LetterB
- {
- SCORE scoreMJ = DPM(uPrefixLengthA, uPrefixLengthB-1) +
- PB[uPrefixLengthB - 1].m_scoreGapOpen2;
- SCORE scoreJJ = DPJ(uPrefixLengthA, uPrefixLengthB-1) + g_scoreGapExtend2;
-
- SCORE scoreBest;
- if (scoreMJ >= scoreJJ)
- {
- scoreBest = scoreMJ;
- TBJ(uPrefixLengthA, uPrefixLengthB) = 'M';
- }
- else
- {
- assert(scoreJJ > scoreMJ);
- scoreBest = scoreJJ;
- TBJ(uPrefixLengthA, uPrefixLengthB) = 'J';
- }
- DPJ(uPrefixLengthA, uPrefixLengthB) = scoreBest;
- }
-
- scoreGapCloseA = PPA.m_scoreGapClose;
- scoreGapClose2A = PPA.m_scoreGapClose2;
- }
- scoreGapCloseB = PPB.m_scoreGapClose;
- scoreGapClose2B = PPB.m_scoreGapClose2;
- }
-
-#if TRACE
- Log("\n");
- Log("DA Simple DPL:\n");
- ListDP(DPL_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("\n");
- Log("DA Simple DPM:\n");
- ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("\n");
- Log("DA Simple DPD:\n");
- ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("\n");
- Log("DA Simple DPE:\n");
- ListDP(DPE_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("\n");
- Log("DA Simple DPI:\n");
- ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("\n");
- Log("DA Simple DPJ:\n");
- ListDP(DPJ_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("\n");
- Log("DA Simple TBM:\n");
- ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("\n");
- Log("DA Simple TBD:\n");
- ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("\n");
- Log("DA Simple TBE:\n");
- ListTB(TBE_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("\n");
- Log("DA Simple TBI:\n");
- ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("\n");
- Log("DA Simple TBJ:\n");
- ListTB(TBJ_, PA, PB, uPrefixCountA, uPrefixCountB);
-#endif
-
-// Trace-back
-// ==========
- Path.Clear();
-
-// Find last edge
- SCORE M = DPM(uLengthA, uLengthB);
- SCORE D = DPD(uLengthA, uLengthB) + PA[uLengthA-1].m_scoreGapClose;
- SCORE E = DPE(uLengthA, uLengthB) + PA[uLengthA-1].m_scoreGapClose2;
- SCORE I = DPI(uLengthA, uLengthB) + PB[uLengthB-1].m_scoreGapClose;
- SCORE J = DPJ(uLengthA, uLengthB) + PB[uLengthB-1].m_scoreGapClose2;
- char cEdgeType = '?';
-
- SCORE BestScore = M;
- cEdgeType = 'M';
- if (D > BestScore)
- {
- cEdgeType = 'D';
- BestScore = D;
- }
- if (E > BestScore)
- {
- cEdgeType = 'E';
- BestScore = E;
- }
- if (I > BestScore)
- {
- cEdgeType = 'I';
- BestScore = I;
- }
- if (J > BestScore)
- {
- cEdgeType = 'J';
- BestScore = J;
- }
-
-#if TRACE
- Log("DA Simple: MAB=%.4g DAB=%.4g EAB=%.4g IAB=%.4g JAB=%.4g best=%c\n",
- M, D, E, I, J, cEdgeType);
-#endif
-
- unsigned PLA = uLengthA;
- unsigned PLB = uLengthB;
- for (;;)
- {
- PWEdge Edge;
- Edge.cType = XlatEdgeType(cEdgeType);
- Edge.uPrefixLengthA = PLA;
- Edge.uPrefixLengthB = PLB;
-#if TRACE
- Log("Prepend %c%d.%d\n", Edge.cType, PLA, PLB);
-#endif
- Path.PrependEdge(Edge);
-
- switch (cEdgeType)
- {
- case 'M':
- assert(PLA > 0);
- assert(PLB > 0);
- cEdgeType = TBM(PLA, PLB);
- --PLA;
- --PLB;
- break;
-
- case 'D':
- assert(PLA > 0);
- cEdgeType = TBD(PLA, PLB);
- --PLA;
- break;
-
- case 'E':
- assert(PLA > 0);
- cEdgeType = TBE(PLA, PLB);
- --PLA;
- break;
-
- case 'I':
- assert(PLB > 0);
- cEdgeType = TBI(PLA, PLB);
- --PLB;
- break;
-
- case 'J':
- assert(PLB > 0);
- cEdgeType = TBJ(PLA, PLB);
- --PLB;
- break;
-
- default:
- Quit("Invalid edge %c", cEdgeType);
- }
- if (0 == PLA && 0 == PLB)
- break;
- }
- Path.Validate();
-
-// SCORE Score = TraceBack(PA, uLengthA, PB, uLengthB, DPM_, DPD_, DPI_, Path);
-
-#if TRACE
- SCORE scorePath = FastScorePath2(PA, uLengthA, PB, uLengthB, Path);
- Path.LogMe();
- Log("Score = %s Path = %s\n", LocalScoreToStr(BestScore), LocalScoreToStr(scorePath));
-#endif
-
- if (g_bKeepSimpleDP)
- {
- g_DPM = DPM_;
- g_DPD = DPD_;
- g_DPE = DPE_;
- g_DPI = DPI_;
- g_DPJ = DPJ_;
-
- g_TBM = TBM_;
- g_TBD = TBD_;
- g_TBE = TBE_;
- g_TBI = TBI_;
- g_TBJ = TBJ_;
- }
- else
- {
- delete[] DPM_;
- delete[] DPD_;
- delete[] DPE_;
- delete[] DPI_;
- delete[] DPJ_;
-
- delete[] TBM_;
- delete[] TBD_;
- delete[] TBE_;
- delete[] TBI_;
- delete[] TBJ_;
- }
-
- return BestScore;
- }
-
-#endif // DOUBLE_AFFINE
Deleted: trunk/packages/muscle/trunk/nwdasimple2.cpp
===================================================================
--- trunk/packages/muscle/trunk/nwdasimple2.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/nwdasimple2.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,549 +0,0 @@
-#include "muscle.h"
-#include "pwpath.h"
-#include "profile.h"
-
-#if DOUBLE_AFFINE
-
-#define TRACE 0
-
-extern bool g_bKeepSimpleDP;
-extern SCORE *g_DPM;
-extern SCORE *g_DPD;
-extern SCORE *g_DPE;
-extern SCORE *g_DPI;
-extern SCORE *g_DPJ;
-extern char *g_TBM;
-extern char *g_TBD;
-extern char *g_TBE;
-extern char *g_TBI;
-extern char *g_TBJ;
-
-static char XlatEdgeType(char c)
- {
- if ('E' == c)
- return 'D';
- if ('J' == c)
- return 'I';
- return c;
- }
-
-static const char *LocalScoreToStr(SCORE s)
- {
- static char str[16];
- if (s < -100000)
- return " *";
- sprintf(str, "%6.1f", s);
- return str;
- }
-
-static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- Log(" ");
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = ' ';
- if (uPrefixLengthB > 0)
- c = ConsensusChar(PB[uPrefixLengthB - 1]);
- Log(" %4u:%c", uPrefixLengthB, c);
- }
- Log("\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));
- Log("\n");
- }
- }
-
-static void ListTB(const char *TBM_, const ProfPos *PA, const ProfPos *PB,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- Log(" ");
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = ' ';
- if (uPrefixLengthB > 0)
- c = ConsensusChar(PB[uPrefixLengthB - 1]);
- Log(" %4u:%c", uPrefixLengthB, c);
- }
- Log("\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- Log(" %6c", TBM(uPrefixLengthA, uPrefixLengthB));
- Log("\n");
- }
- }
-
-static void ListDPM(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- Log(" ");
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = ' ';
- if (uPrefixLengthB > 0)
- c = ConsensusChar(PB[uPrefixLengthB - 1]);
- Log(" %4u:%c", uPrefixLengthB, c);
- }
- Log("\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- SCORE x = (uPrefixLengthA + uPrefixLengthB)*g_scoreGapExtend;
- SCORE s = DPM(uPrefixLengthA, uPrefixLengthB) - x;
- Log(" %s", LocalScoreToStr(s));
- }
- Log("\n");
- }
- }
-
-extern SCORE ScoreProfPos2(const ProfPos &PP, const ProfPos &PPB);
-
-SCORE NWDASimple2(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path)
- {
- assert(uLengthB > 0 && uLengthA > 0);
-
- const unsigned uPrefixCountA = uLengthA + 1;
- const unsigned uPrefixCountB = uLengthB + 1;
-
-// Allocate DP matrices
- const size_t LM = uPrefixCountA*uPrefixCountB;
- SCORE *DPM_ = new SCORE[LM];
- SCORE *DPD_ = new SCORE[LM];
- SCORE *DPE_ = new SCORE[LM];
- SCORE *DPI_ = new SCORE[LM];
- SCORE *DPJ_ = new SCORE[LM];
- SCORE *DPL_ = new SCORE[LM];
-
- char *TBM_ = new char[LM];
- char *TBD_ = new char[LM];
- char *TBE_ = new char[LM];
- char *TBI_ = new char[LM];
- char *TBJ_ = new char[LM];
-
- memset(DPM_, 0, LM*sizeof(SCORE));
- memset(DPD_, 0, LM*sizeof(SCORE));
- memset(DPE_, 0, LM*sizeof(SCORE));
- memset(DPI_, 0, LM*sizeof(SCORE));
- memset(DPJ_, 0, LM*sizeof(SCORE));
-
-// memset(DPL_, 0, LM*sizeof(SCORE));
-
- memset(TBM_, '?', LM);
- memset(TBD_, '?', LM);
- memset(TBE_, '?', LM);
- memset(TBI_, '?', LM);
- memset(TBJ_, '?', LM);
-
- DPM(0, 0) = 0;
- DPD(0, 0) = MINUS_INFINITY;
- DPE(0, 0) = MINUS_INFINITY;
- DPI(0, 0) = MINUS_INFINITY;
- DPJ(0, 0) = MINUS_INFINITY;
-
- DPM(1, 0) = MINUS_INFINITY;
- DPD(1, 0) = PA[0].m_scoreGapOpen;
- DPE(1, 0) = PA[0].m_scoreGapOpen2;
- DPI(1, 0) = MINUS_INFINITY;
- DPJ(1, 0) = MINUS_INFINITY;
-
- DPM(0, 1) = MINUS_INFINITY;
- DPD(0, 1) = MINUS_INFINITY;
- DPE(0, 1) = MINUS_INFINITY;
- DPI(0, 1) = PB[0].m_scoreGapOpen;
- DPJ(0, 1) = PB[0].m_scoreGapOpen2;
-
-// Empty prefix of B is special case
- for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- // M=LetterA+LetterB, impossible with empty prefix
- DPM(uPrefixLengthA, 0) = MINUS_INFINITY;
-
- // D=LetterA+GapB
- DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) + g_scoreGapExtend;
- TBD(uPrefixLengthA, 0) = 'D';
-
- DPE(uPrefixLengthA, 0) = DPE(uPrefixLengthA - 1, 0) + g_scoreGapExtend2;
- TBE(uPrefixLengthA, 0) = 'E';
-
- // I=GapA+LetterB, impossible with empty prefix
- DPI(uPrefixLengthA, 0) = MINUS_INFINITY;
- DPJ(uPrefixLengthA, 0) = MINUS_INFINITY;
- }
-
-// Empty prefix of A is special case
- for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- // M=LetterA+LetterB, impossible with empty prefix
- DPM(0, uPrefixLengthB) = MINUS_INFINITY;
-
- // D=LetterA+GapB, impossible with empty prefix
- DPD(0, uPrefixLengthB) = MINUS_INFINITY;
- DPE(0, uPrefixLengthB) = MINUS_INFINITY;
-
- // I=GapA+LetterB
- DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) + g_scoreGapExtend;
- TBI(0, uPrefixLengthB) = 'I';
-
- DPJ(0, uPrefixLengthB) = DPJ(0, uPrefixLengthB - 1) + g_scoreGapExtend2;
- TBJ(0, uPrefixLengthB) = 'J';
- }
-
-// ============
-// Main DP loop
-// ============
- for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- const ProfPos &PPB = PB[uPrefixLengthB - 1];
- SCORE scoreGapCloseB;
- if (uPrefixLengthB == 1)
- scoreGapCloseB = MINUS_INFINITY;
- else
- scoreGapCloseB = PB[uPrefixLengthB-2].m_scoreGapClose;
-
- SCORE scoreGapClose2B;
- if (uPrefixLengthB == 1)
- scoreGapClose2B = MINUS_INFINITY;
- else
- scoreGapClose2B = PB[uPrefixLengthB-2].m_scoreGapClose2;
-
- for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- const ProfPos &PPA = PA[uPrefixLengthA - 1];
-
- {
- // Match M=LetterA+LetterB
- SCORE scoreLL = ScoreProfPos2(PPA, PPB);
- DPL(uPrefixLengthA, uPrefixLengthB) = scoreLL;
-
- SCORE scoreGapCloseA;
- if (uPrefixLengthA == 1)
- scoreGapCloseA = MINUS_INFINITY;
- else
- scoreGapCloseA = PA[uPrefixLengthA-2].m_scoreGapClose;
-
- SCORE scoreGapClose2A;
- if (uPrefixLengthA == 1)
- scoreGapClose2A = MINUS_INFINITY;
- else
- scoreGapClose2A = PA[uPrefixLengthA-2].m_scoreGapClose2;
-
- SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1);
- SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseA;
- SCORE scoreEM = DPE(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapClose2A;
- SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseB;
- SCORE scoreJM = DPJ(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapClose2B;
- SCORE scoreBest;
- if (scoreMM >= scoreDM && scoreMM >= scoreIM && scoreMM >= scoreEM && scoreMM >= scoreJM)
- {
- scoreBest = scoreMM;
- TBM(uPrefixLengthA, uPrefixLengthB) = 'M';
- }
- else if (scoreDM >= scoreMM && scoreDM >= scoreIM && scoreDM >= scoreEM && scoreDM >= scoreJM)
- {
- scoreBest = scoreDM;
- TBM(uPrefixLengthA, uPrefixLengthB) = 'D';
- }
- else if (scoreEM >= scoreMM && scoreEM >= scoreIM && scoreEM >= scoreDM && scoreEM >= scoreJM)
- {
- scoreBest = scoreEM;
- TBM(uPrefixLengthA, uPrefixLengthB) = 'E';
- }
- else if (scoreIM >= scoreMM && scoreIM >= scoreDM && scoreIM >= scoreEM && scoreIM >= scoreJM)
- {
- scoreBest = scoreIM;
- TBM(uPrefixLengthA, uPrefixLengthB) = 'I';
- }
- else if (scoreJM >= scoreMM && scoreJM >= scoreDM && scoreJM >= scoreEM && scoreJM >= scoreIM)
- {
- scoreBest = scoreJM;
- TBM(uPrefixLengthA, uPrefixLengthB) = 'J';
- }
- else
- Quit("Max failed (M)");
-
- DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;
- }
-
- {
- // Delete D=LetterA+GapB
- SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) +
- PA[uPrefixLengthA-1].m_scoreGapOpen;
- SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) +
- g_scoreGapExtend;
-
- SCORE scoreBest;
- if (scoreMD >= scoreDD)
- {
- scoreBest = scoreMD;
- TBD(uPrefixLengthA, uPrefixLengthB) = 'M';
- }
- else
- {
- assert(scoreDD >= scoreMD);
- scoreBest = scoreDD;
- TBD(uPrefixLengthA, uPrefixLengthB) = 'D';
- }
- DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;
- }
-
- {
- // Delete E=LetterA+GapB
- SCORE scoreME = DPM(uPrefixLengthA-1, uPrefixLengthB) +
- PA[uPrefixLengthA-1].m_scoreGapOpen2;
- SCORE scoreEE = DPE(uPrefixLengthA-1, uPrefixLengthB) +
- g_scoreGapExtend2;
-
- SCORE scoreBest;
- if (scoreME >= scoreEE)
- {
- scoreBest = scoreME;
- TBE(uPrefixLengthA, uPrefixLengthB) = 'M';
- }
- else
- {
- assert(scoreEE >= scoreME);
- scoreBest = scoreEE;
- TBE(uPrefixLengthA, uPrefixLengthB) = 'E';
- }
- DPE(uPrefixLengthA, uPrefixLengthB) = scoreBest;
- }
-
- // Insert I=GapA+LetterB
- {
- SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) +
- PB[uPrefixLengthB-1].m_scoreGapOpen;
- SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) +
- g_scoreGapExtend;
-
- SCORE scoreBest;
- if (scoreMI >= scoreII)
- {
- scoreBest = scoreMI;
- TBI(uPrefixLengthA, uPrefixLengthB) = 'M';
- }
- else
- {
- assert(scoreII > scoreMI);
- scoreBest = scoreII;
- TBI(uPrefixLengthA, uPrefixLengthB) = 'I';
- }
- DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;
- }
-
- // Insert J=GapA+LetterB
- {
- SCORE scoreMJ = DPM(uPrefixLengthA, uPrefixLengthB-1) +
- PB[uPrefixLengthB-1].m_scoreGapOpen2;
- SCORE scoreJJ = DPJ(uPrefixLengthA, uPrefixLengthB-1) +
- g_scoreGapExtend2;
-
- SCORE scoreBest;
- if (scoreMJ > scoreJJ)
- {
- scoreBest = scoreMJ;
- TBJ(uPrefixLengthA, uPrefixLengthB) = 'M';
- }
- else
- {
- assert(scoreJJ >= scoreMJ);
- scoreBest = scoreJJ;
- TBJ(uPrefixLengthA, uPrefixLengthB) = 'J';
- }
- DPJ(uPrefixLengthA, uPrefixLengthB) = scoreBest;
- }
- }
- }
-
-// Special case: close gaps at end of alignment
- DPD(uLengthA, uLengthB) += PA[uLengthA-1].m_scoreGapClose;
- DPE(uLengthA, uLengthB) += PA[uLengthA-1].m_scoreGapClose2;
-
- DPI(uLengthA, uLengthB) += PB[uLengthB-1].m_scoreGapClose;
- DPJ(uLengthA, uLengthB) += PB[uLengthB-1].m_scoreGapClose2;
-
-#if TRACE
- Log("DPL:\n");
- ListDP(DPL_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("DPM:\n");
- ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("DPD:\n");
- ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("DPE:\n");
- ListDP(DPE_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("DPI:\n");
- ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("DPJ:\n");
- ListDP(DPJ_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("TBM:\n");
- ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("TBD:\n");
- ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("TBE:\n");
- ListTB(TBE_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("TBI:\n");
- ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);
- Log("TBJ:\n");
- ListTB(TBJ_, PA, PB, uPrefixCountA, uPrefixCountB);
-#endif
-
-// ==========
-// Trace-back
-// ==========
-
- Path.Clear();
-
-// Find last edge
- char cEdgeType = '?';
- SCORE BestScore = MINUS_INFINITY;
- SCORE M = DPM(uLengthA, uLengthB);
- SCORE D = DPD(uLengthA, uLengthB);
- SCORE E = DPE(uLengthA, uLengthB);
- SCORE I = DPI(uLengthA, uLengthB);
- SCORE J = DPJ(uLengthA, uLengthB);
-
- if (M >= D && M >= E && M >= I && M >= J)
- {
- cEdgeType = 'M';
- BestScore = M;
- }
- else if (D >= M && D >= E && D >= I && D >= J)
- {
- cEdgeType = 'D';
- BestScore = D;
- }
- else if (E >= M && E >= D && E >= I && E >= J)
- {
- cEdgeType = 'E';
- BestScore = E;
- }
- else if (I >= M && I >= D && I >= E && I >= J)
- {
- cEdgeType = 'I';
- BestScore = I;
- }
- else if (J >= M && J >= D && J >= E && J >= I)
- {
- cEdgeType = 'J';
- BestScore = J;
- }
- else
- Quit("Bad max");
-
- unsigned PLA = uLengthA;
- unsigned PLB = uLengthB;
- unsigned ECount = 0;
- unsigned JCount = 0;
- for (;;)
- {
-#if TRACE
- Log("TraceBack: %c%u.%u\n", cEdgeType, PLA, PLB);
-#endif
- PWEdge Edge;
- Edge.cType = XlatEdgeType(cEdgeType);
- Edge.uPrefixLengthA = PLA;
- Edge.uPrefixLengthB = PLB;
- Path.PrependEdge(Edge);
-
- switch (cEdgeType)
- {
- case 'M':
- assert(PLA > 0);
- assert(PLB > 0);
- cEdgeType = TBM(PLA, PLB);
- --PLA;
- --PLB;
- break;
-
- case 'D':
- assert(PLA > 0);
- cEdgeType = TBD(PLA, PLB);
- --PLA;
- break;
-
- case 'E':
- ++ECount;
- assert(PLA > 0);
- cEdgeType = TBE(PLA, PLB);
- --PLA;
- break;
-
- case 'I':
- assert(PLB > 0);
- cEdgeType = TBI(PLA, PLB);
- --PLB;
- break;
-
- case 'J':
- ++JCount;
- assert(PLB > 0);
- cEdgeType = TBJ(PLA, PLB);
- --PLB;
- break;
-
- default:
- Quit("Invalid edge %c", cEdgeType);
- }
- if (0 == PLA && 0 == PLB)
- break;
- }
- //if (ECount > 0 || JCount > 0)
- // fprintf(stderr, "E=%d J=%d\n", ECount, JCount);
- Path.Validate();
- if (Path.GetMatchCount() + Path.GetDeleteCount() != uLengthA)
- Quit("Path count A");
- if (Path.GetMatchCount() + Path.GetInsertCount() != uLengthB)
- Quit("Path count B");
-
- if (g_bKeepSimpleDP)
- {
- g_DPM = DPM_;
- g_DPD = DPD_;
- g_DPE = DPE_;
- g_DPI = DPI_;
- g_DPJ = DPJ_;
-
- g_TBM = TBM_;
- g_TBD = TBD_;
- g_TBE = TBE_;
- g_TBI = TBI_;
- g_TBJ = TBJ_;
- }
- else
- {
- delete[] DPM_;
- delete[] DPD_;
- delete[] DPE_;
- delete[] DPI_;
- delete[] DPJ_;
-
- delete[] TBM_;
- delete[] TBD_;
- delete[] TBE_;
- delete[] TBI_;
- delete[] TBJ_;
- }
-
-#if TRACE
- Log("BestScore=%.6g\n", BestScore);
-#endif
- return BestScore;
- }
-
-#endif // DOUBLE_AFFINE
Deleted: trunk/packages/muscle/trunk/nwdasmall.cpp
===================================================================
--- trunk/packages/muscle/trunk/nwdasmall.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/nwdasmall.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,947 +0,0 @@
-#include "muscle.h"
-#include <math.h>
-#include "pwpath.h"
-#include "profile.h"
-#include <stdio.h>
-
-#if DOUBLE_AFFINE
-
-// NW double affine small memory, term gaps fully penalized
-// (so up to caller to adjust in profile if desired).
-
-#define TRACE 0
-
-#define MIN(x, y) ((x) < (y) ? (x) : (y))
-
-#if TRACE
-extern bool g_bKeepSimpleDP;
-extern SCORE *g_DPM;
-extern SCORE *g_DPD;
-extern SCORE *g_DPE;
-extern SCORE *g_DPI;
-extern SCORE *g_DPJ;
-extern char *g_TBM;
-extern char *g_TBD;
-extern char *g_TBE;
-extern char *g_TBI;
-extern char *g_TBJ;
-#endif
-
-#if TRACE
-#define ALLOC_TRACE() \
- const SCORE UNINIT = MINUS_INFINITY; \
- const size_t LM = uPrefixCountA*uPrefixCountB; \
- \
- SCORE *DPM_ = new SCORE[LM]; \
- SCORE *DPD_ = new SCORE[LM]; \
- SCORE *DPE_ = new SCORE[LM]; \
- SCORE *DPI_ = new SCORE[LM]; \
- SCORE *DPJ_ = new SCORE[LM]; \
- \
- char *TBM_ = new char[LM]; \
- char *TBD_ = new char[LM]; \
- char *TBE_ = new char[LM]; \
- char *TBI_ = new char[LM]; \
- char *TBJ_ = new char[LM]; \
- \
- memset(TBM_, '?', LM); \
- memset(TBD_, '?', LM); \
- memset(TBE_, '?', LM); \
- memset(TBI_, '?', LM); \
- memset(TBJ_, '?', LM); \
- \
- for (unsigned i = 0; i <= uLengthA; ++i) \
- for (unsigned j = 0; j <= uLengthB; ++j) \
- { \
- DPM(i, j) = UNINIT; \
- DPD(i, j) = UNINIT; \
- DPE(i, j) = UNINIT; \
- DPI(i, j) = UNINIT; \
- DPJ(i, j) = UNINIT; \
- }
-#else
-#define ALLOC_TRACE()
-#endif
-
-#if TRACE
-#define SetDPM(i, j, x) DPM(i, j) = x
-#define SetDPD(i, j, x) DPD(i, j) = x
-#define SetDPE(i, j, x) DPE(i, j) = x
-#define SetDPI(i, j, x) DPI(i, j) = x
-#define SetDPJ(i, j, x) DPJ(i, j) = x
-#define SetTBM(i, j, x) TBM(i, j) = x
-#define SetTBD(i, j, x) TBD(i, j) = x
-#define SetTBE(i, j, x) TBE(i, j) = x
-#define SetTBI(i, j, x) TBI(i, j) = x
-#define SetTBJ(i, j, x) TBJ(i, j) = x
-#else
-#define SetDPM(i, j, x) /* empty */
-#define SetDPD(i, j, x) /* empty */
-#define SetDPE(i, j, x) /* empty */
-#define SetDPI(i, j, x) /* empty */
-#define SetDPJ(i, j, x) /* empty */
-#define SetTBM(i, j, x) /* empty */
-#define SetTBD(i, j, x) /* empty */
-#define SetTBE(i, j, x) /* empty */
-#define SetTBI(i, j, x) /* empty */
-#define SetTBJ(i, j, x) /* empty */
-#endif
-
-#define RECURSE_D(i, j) \
- { \
- SCORE DD = DRow[j] + e; \
- SCORE MD = MPrev[j] + PA[i-1].m_scoreGapOpen;\
- if (DD > MD) \
- { \
- DRow[j] = DD; \
- SetTBD(i, j, 'D'); \
- } \
- else \
- { \
- DRow[j] = MD; \
- SetBitTBD(TB, i, j, 'M'); \
- SetTBD(i, j, 'M'); \
- } \
- SetDPD(i, j, DRow[j]); \
- }
-
-#define RECURSE_E(i, j) \
- { \
- SCORE EE = ERow[j] + e2; \
- SCORE ME = MPrev[j] + PA[i-1].m_scoreGapOpen2;\
- if (EE > ME) \
- { \
- ERow[j] = EE; \
- SetTBE(i, j, 'E'); \
- } \
- else \
- { \
- ERow[j] = ME; \
- SetBitTBE(TB, i, j, 'M'); \
- SetTBE(i, j, 'M'); \
- } \
- SetDPE(i, j, ERow[j]); \
- }
-
-#define RECURSE_D_ATerm(j) RECURSE_D(uLengthA, j)
-#define RECURSE_E_ATerm(j) RECURSE_E(uLengthA, j)
-
-#define RECURSE_D_BTerm(j) RECURSE_D(i, uLengthB)
-#define RECURSE_E_BTerm(j) RECURSE_E(i, uLengthB)
-
-#define RECURSE_I(i, j) \
- { \
- Iij += e; \
- SCORE MI = MCurr[j-1] + PB[j-1].m_scoreGapOpen;\
- if (MI >= Iij) \
- { \
- Iij = MI; \
- SetBitTBI(TB, i, j, 'M'); \
- SetTBI(i, j, 'M'); \
- } \
- else \
- SetTBI(i, j, 'I'); \
- SetDPI(i, j, Iij); \
- }
-
-#define RECURSE_J(i, j) \
- { \
- Jij += e2; \
- SCORE MJ = MCurr[j-1] + PB[j-1].m_scoreGapOpen2;\
- if (MJ >= Jij) \
- { \
- Jij = MJ; \
- SetBitTBJ(TB, i, j, 'M'); \
- SetTBJ(i, j, 'M'); \
- } \
- else \
- SetTBJ(i, j, 'I'); \
- SetDPJ(i, j, Jij); \
- }
-
-#define RECURSE_I_ATerm(j) RECURSE_I(uLengthA, j)
-#define RECURSE_J_ATerm(j) RECURSE_J(uLengthA, j)
-
-#define RECURSE_I_BTerm(j) RECURSE_I(i, uLengthB)
-#define RECURSE_J_BTerm(j) RECURSE_J(i, uLengthB)
-
-#define RECURSE_M(i, j) \
- { \
- SCORE Best = MCurr[j]; /* MM */ \
- SetTBM(i+1, j+1, 'M'); \
- SetBitTBM(TB, i+1, j+1, 'M'); \
- \
- SCORE DM = DRow[j] + PA[i-1].m_scoreGapClose; \
- if (DM > Best) \
- { \
- Best = DM; \
- SetTBM(i+1, j+1, 'D'); \
- SetBitTBM(TB, i+1, j+1, 'D'); \
- } \
- \
- SCORE EM = ERow[j] + PA[i-1].m_scoreGapClose2; \
- if (EM > Best) \
- { \
- Best = EM; \
- SetTBM(i+1, j+1, 'E'); \
- SetBitTBM(TB, i+1, j+1, 'E'); \
- } \
- \
- SCORE IM = Iij + PB[j-1].m_scoreGapClose; \
- if (IM > Best) \
- { \
- Best = IM; \
- SetTBM(i+1, j+1, 'I'); \
- SetBitTBM(TB, i+1, j+1, 'I'); \
- } \
- \
- SCORE JM = Jij + PB[j-1].m_scoreGapClose2; \
- if (JM > Best) \
- { \
- Best = JM; \
- SetTBM(i+1, j+1, 'J'); \
- SetBitTBM(TB, i+1, j+1, 'J'); \
- } \
- MNext[j+1] += Best; \
- SetDPM(i+1, j+1, MNext[j+1]); \
- }
-
-#if TRACE
-static bool LocalEq(BASETYPE b1, BASETYPE b2)
- {
- if (b1 < -100000 && b2 < -100000)
- return true;
- double diff = fabs(b1 - b2);
- if (diff < 0.0001)
- return true;
- double sum = fabs(b1) + fabs(b2);
- return diff/sum < 0.005;
- }
-
-static char Get_M_Char(char Bits)
- {
- switch (Bits & BIT_xM)
- {
- case BIT_MM:
- return 'M';
- case BIT_DM:
- return 'D';
- case BIT_EM:
- return 'E';
- case BIT_IM:
- return 'I';
- case BIT_JM:
- return 'J';
- }
- Quit("Huh?");
- return '?';
- }
-
-static char Get_D_Char(char Bits)
- {
- return (Bits & BIT_xD) ? 'M' : 'D';
- }
-
-static char Get_E_Char(char Bits)
- {
- return (Bits & BIT_xE) ? 'M' : 'E';
- }
-
-static char Get_I_Char(char Bits)
- {
- return (Bits & BIT_xI) ? 'M' : 'I';
- }
-
-static char Get_J_Char(char Bits)
- {
- return (Bits & BIT_xJ) ? 'M' : 'J';
- }
-
-static bool DPEq(char c, SCORE *g_DP, SCORE *DPD_,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- if (0 == g_DP)
- {
- Log("***DPDIFF*** DP%c=NULL\n", c);
- return true;
- }
-
- SCORE *DPM_ = g_DP;
- for (unsigned i = 0; i < uPrefixCountA; ++i)
- for (unsigned j = 0; j < uPrefixCountB; ++j)
- if (!LocalEq(DPM(i, j), DPD(i, j)))
- {
- Log("***DPDIFF*** DP%c(%d, %d) Simple = %.2g, Small = %.2g\n",
- c, i, j, DPM(i, j), DPD(i, j));
- return false;
- }
- return true;
- }
-
-static bool CompareTB(char **TB, char *TBM_, char *TBD_, char *TBE_, char *TBI_, char *TBJ_,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- if (!g_bKeepSimpleDP)
- return true;
- SCORE *DPM_ = g_DPM;
- bool Eq = true;
- for (unsigned i = 0; i < uPrefixCountA; ++i)
- for (unsigned j = 0; j < uPrefixCountB; ++j)
- {
- char c1 = TBM(i, j);
- char c2 = Get_M_Char(TB[i][j]);
- if (c1 != '?' && c1 != c2 && DPM(i, j) > -100000)
- {
- Log("TBM(%d, %d) Simple = %c, NW = %c\n", i, j, c1, c2);
- Eq = false;
- goto D;
- }
- }
-
-D:
- SCORE *DPD_ = g_DPD;
- for (unsigned i = 0; i < uPrefixCountA; ++i)
- for (unsigned j = 0; j < uPrefixCountB; ++j)
- {
- char c1 = TBD(i, j);
- char c2 = Get_D_Char(TB[i][j]);
- if (c1 != '?' && c1 != c2 && DPD(i, j) > -100000)
- {
- Log("TBD(%d, %d) Simple = %c, NW = %c\n", i, j, c1, c2);
- Eq = false;
- goto E;
- }
- }
-E:
- SCORE *DPE_ = g_DPE;
- if (0 == TBE_)
- goto I;
- for (unsigned i = 0; i < uPrefixCountA; ++i)
- for (unsigned j = 0; j < uPrefixCountB; ++j)
- {
- char c1 = TBE(i, j);
- char c2 = Get_E_Char(TB[i][j]);
- if (c1 != '?' && c1 != c2 && DPE(i, j) > -100000)
- {
- Log("TBE(%d, %d) Simple = %c, NW = %c\n", i, j, c1, c2);
- Eq = false;
- goto I;
- }
- }
-I:
- SCORE *DPI_ = g_DPI;
- for (unsigned i = 0; i < uPrefixCountA; ++i)
- for (unsigned j = 0; j < uPrefixCountB; ++j)
- {
- char c1 = TBI(i, j);
- char c2 = Get_I_Char(TB[i][j]);
- if (c1 != '?' && c1 != c2 && DPI(i, j) > -100000)
- {
- Log("TBI(%d, %d) Simple = %c, NW = %c\n", i, j, c1, c2);
- Eq = false;
- goto J;
- }
- }
-J:
- SCORE *DPJ_ = g_DPJ;
- if (0 == DPJ_)
- goto Done;
- for (unsigned i = 0; i < uPrefixCountA; ++i)
- for (unsigned j = 0; j < uPrefixCountB; ++j)
- {
- char c1 = TBJ(i, j);
- char c2 = Get_J_Char(TB[i][j]);
- if (c1 != '?' && c1 != c2 && DPJ(i, j) > -100000)
- {
- Log("TBJ(%d, %d) Simple = %c, NW = %c\n", i, j, c1, c2);
- Eq = false;
- goto Done;
- }
- }
-Done:
- if (Eq)
- Log("TB success\n");
- return Eq;
- }
-
-static const char *LocalScoreToStr(SCORE s)
- {
- static char str[16];
- if (s < -100000)
- return " *";
- sprintf(str, "%6.1f", s);
- return str;
- }
-
-static void LogDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- Log(" ");
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = ' ';
- if (uPrefixLengthB > 0)
- c = ConsensusChar(PB[uPrefixLengthB - 1]);
- Log(" %4u:%c", uPrefixLengthB, c);
- }
- Log("\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));
- Log("\n");
- }
- }
-
-static void LogBitTB(char **TB, const ProfPos *PA, const ProfPos *PB,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- Log(" ");
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = ' ';
- if (uPrefixLengthB > 0)
- c = ConsensusChar(PB[uPrefixLengthB - 1]);
- Log(" %4u:%c", uPrefixLengthB, c);
- }
- Log("\n");
- Log("Bit TBM:\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = Get_M_Char(TB[uPrefixLengthA][uPrefixLengthB]);
- Log(" %6c", c);
- }
- Log("\n");
- }
-
- Log("\n");
- Log("Bit TBD:\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = Get_D_Char(TB[uPrefixLengthA][uPrefixLengthB]);
- Log(" %6c", c);
- }
- Log("\n");
- }
-
- Log("\n");
- Log("Bit TBE:\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = Get_E_Char(TB[uPrefixLengthA][uPrefixLengthB]);
- Log(" %6c", c);
- }
- Log("\n");
- }
-
- Log("\n");
- Log("Bit TBI:\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = Get_I_Char(TB[uPrefixLengthA][uPrefixLengthB]);
- Log(" %6c", c);
- }
- Log("\n");
- }
-
- Log("\n");
- Log("Bit TBJ:\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = Get_J_Char(TB[uPrefixLengthA][uPrefixLengthB]);
- Log(" %6c", c);
- }
- Log("\n");
- }
- }
-
-static void ListTB(char *TBM_, const ProfPos *PA, const ProfPos *PB,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- Log(" ");
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = ' ';
- if (uPrefixLengthB > 0)
- c = ConsensusChar(PB[uPrefixLengthB - 1]);
- Log(" %4u:%c", uPrefixLengthB, c);
- }
- Log("\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = TBM(uPrefixLengthA, uPrefixLengthB);
- Log(" %6c", c);
- }
- Log("\n");
- }
- }
-
-static const char *BitsToStr(char Bits)
- {
- static char Str[32];
-
- sprintf(Str, "%cM %cD %cE %cI %cJ",
- Get_M_Char(Bits),
- Get_D_Char(Bits),
- Get_E_Char(Bits),
- Get_I_Char(Bits),
- Get_J_Char(Bits));
- }
-#endif // TRACE
-
-static inline void SetBitTBM(char **TB, unsigned i, unsigned j, char c)
- {
- char Bit;
- switch (c)
- {
- case 'M':
- Bit = BIT_MM;
- break;
- case 'D':
- Bit = BIT_DM;
- break;
-#if DOUBLE_AFFINE
- case 'E':
- Bit = BIT_EM;
- break;
- case 'I':
- Bit = BIT_IM;
- break;
- case 'J':
- Bit = BIT_JM;
- break;
-#endif
- default:
- Quit("Huh?!");
- }
- TB[i][j] &= ~BIT_xM;
- TB[i][j] |= Bit;
- }
-
-static inline void SetBitTBD(char **TB, unsigned i, unsigned j, char c)
- {
- char Bit;
- switch (c)
- {
- case 'M':
- Bit = BIT_MD;
- break;
- case 'D':
- Bit = BIT_DD;
- break;
- default:
- Quit("Huh?!");
- }
- TB[i][j] &= ~BIT_xD;
- TB[i][j] |= Bit;
- }
-
-static inline void SetBitTBI(char **TB, unsigned i, unsigned j, char c)
- {
- char Bit;
- switch (c)
- {
- case 'M':
- Bit = BIT_MI;
- break;
- case 'I':
- Bit = BIT_II;
- break;
- default:
- Quit("Huh?!");
- }
- TB[i][j] &= ~BIT_xI;
- TB[i][j] |= Bit;
- }
-
-#if DOUBLE_AFFINE
-static inline void SetBitTBE(char **TB, unsigned i, unsigned j, char c)
- {
- char Bit;
- switch (c)
- {
- case 'M':
- Bit = BIT_ME;
- break;
- case 'E':
- Bit = BIT_EE;
- break;
- default:
- Quit("Huh?!");
- }
- TB[i][j] &= ~BIT_xE;
- TB[i][j] |= Bit;
- }
-
-static inline void SetBitTBJ(char **TB, unsigned i, unsigned j, char c)
- {
- char Bit;
- switch (c)
- {
- case 'M':
- Bit = BIT_MJ;
- break;
- case 'J':
- Bit = BIT_JJ;
- break;
- default:
- Quit("Huh?!");
- }
- TB[i][j] &= ~BIT_xJ;
- TB[i][j] |= Bit;
- }
-#endif
-
-#if TRACE
-#define LogMatrices() \
- { \
- Log("Bit DPM:\n"); \
- LogDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB); \
- Log("Bit DPD:\n"); \
- LogDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB); \
- Log("Bit DPE:\n"); \
- LogDP(DPE_, PA, PB, uPrefixCountA, uPrefixCountB); \
- Log("Bit DPI:\n"); \
- LogDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB); \
- Log("Bit DPJ:\n"); \
- LogDP(DPJ_, PA, PB, uPrefixCountA, uPrefixCountB); \
- Log("Bit TB:\n"); \
- LogBitTB(TB, PA, PB, uPrefixCountA, uPrefixCountB); \
- bool Same; \
- Same = DPEq('M', g_DPM, DPM_, uPrefixCountA, uPrefixCountB);\
- if (Same) \
- Log("DPM success\n"); \
- Same = DPEq('D', g_DPD, DPD_, uPrefixCountA, uPrefixCountB);\
- if (Same) \
- Log("DPD success\n"); \
- Same = DPEq('E', g_DPE, DPE_, uPrefixCountA, uPrefixCountB);\
- if (Same) \
- Log("DPE success\n"); \
- Same = DPEq('I', g_DPI, DPI_, uPrefixCountA, uPrefixCountB);\
- if (Same) \
- Log("DPI success\n"); \
- Same = DPEq('J', g_DPJ, DPJ_, uPrefixCountA, uPrefixCountB);\
- if (Same) \
- Log("DPJ success\n"); \
- CompareTB(TB, g_TBM, g_TBD, g_TBE, g_TBI, g_TBJ, uPrefixCountA, uPrefixCountB);\
- }
-#else
-#define LogMatrices() /* empty */
-#endif
-
-SCORE NWDASmall(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path)
- {
- assert(uLengthB > 0 && uLengthA > 0);
-
- ProfPos *pa0 = (ProfPos *) PA;
- ProfPos *pb0 = (ProfPos *) PB;
- ProfPos *paa = (ProfPos *) (PA + uLengthA - 1);
- ProfPos *pbb = (ProfPos *) (PB + uLengthB - 1);
-
- pa0->m_scoreGapOpen *= -1;
- pb0->m_scoreGapOpen *= -1;
-
- paa->m_scoreGapClose *= -1;
- pbb->m_scoreGapClose *= -1;
-
- pa0->m_scoreGapOpen2 *= -1;
- pb0->m_scoreGapOpen2 *= -1;
- paa->m_scoreGapClose2 *= -1;
- pbb->m_scoreGapClose2 *= -1;
-
- const unsigned uPrefixCountA = uLengthA + 1;
- const unsigned uPrefixCountB = uLengthB + 1;
- const SCORE e = g_scoreGapExtend;
-
- const SCORE e2 = g_scoreGapExtend2;
- const SCORE min_e = MIN(g_scoreGapExtend, g_scoreGapExtend2);
-
- ALLOC_TRACE()
-
- SCORE *MCurr = new SCORE[uPrefixCountB];
- SCORE *MNext = new SCORE[uPrefixCountB];
- SCORE *MPrev = new SCORE[uPrefixCountB];
- SCORE *DRow = new SCORE[uPrefixCountB];
- SCORE *ERow = new SCORE[uPrefixCountB];
-
- char **TB = new char *[uPrefixCountA];
- for (unsigned i = 0; i < uPrefixCountA; ++i)
- {
- TB[i] = new char [uPrefixCountB];
- memset(TB[i], 0, uPrefixCountB);
- }
-
- SCORE Iij = MINUS_INFINITY;
- SetDPI(0, 0, Iij);
-
- SCORE Jij = MINUS_INFINITY;
- SetDPJ(0, 0, Jij);
-
- Iij = PB[0].m_scoreGapOpen;
- SetDPI(0, 1, Iij);
-
- Jij = PB[0].m_scoreGapOpen2;
- SetDPJ(0, 1, Jij);
-
- for (unsigned j = 2; j <= uLengthB; ++j)
- {
- Iij += e;
- Jij += e2;
-
- SetDPI(0, j, Iij);
- SetDPJ(0, j, Jij);
-
- SetTBI(0, j, 'I');
- SetTBJ(0, j, 'J');
- }
-
- for (unsigned j = 0; j <= uLengthB; ++j)
- {
- DRow[j] = MINUS_INFINITY;
- ERow[j] = MINUS_INFINITY;
-
- SetDPD(0, j, DRow[j]);
- SetDPE(0, j, ERow[j]);
-
- SetTBD(0, j, 'D');
- SetTBE(0, j, 'E');
- }
-
- MPrev[0] = 0;
- SetDPM(0, 0, MPrev[0]);
- for (unsigned j = 1; j <= uLengthB; ++j)
- {
- MPrev[j] = MINUS_INFINITY;
- SetDPM(0, j, MPrev[j]);
- }
-
- MCurr[0] = MINUS_INFINITY;
- SetDPM(1, 0, MCurr[0]);
-
- MCurr[1] = ScoreProfPos2(PA[0], PB[0]);
- SetDPM(1, 1, MCurr[1]);
- SetBitTBM(TB, 1, 1, 'M');
- SetTBM(1, 1, 'M');
-
- for (unsigned j = 2; j <= uLengthB; ++j)
- {
- SCORE M = ScoreProfPos2(PA[0], PB[j-1]) + PB[0].m_scoreGapOpen +
- (j - 2)*e + PB[j-2].m_scoreGapClose;
- SCORE M2 = ScoreProfPos2(PA[0], PB[j-1]) + PB[0].m_scoreGapOpen2 +
- (j - 2)*e2 + PB[j-2].m_scoreGapClose2;
-
- if (M >= M2)
- {
- MCurr[j] = M;
- SetBitTBM(TB, 1, j, 'I');
- SetTBM(1, j, 'I');
- }
- else
- {
- MCurr[j] = M2;
- SetBitTBM(TB, 1, j, 'J');
- SetTBM(1, j, 'J');
- }
- SetDPM(1, j, MCurr[j]);
- }
-
-// Main DP loop
- for (unsigned i = 1; i < uLengthA; ++i)
- {
- Iij = MINUS_INFINITY;
- Jij = MINUS_INFINITY;
- SetDPI(i, 0, Iij);
- SetDPJ(i, 0, Jij);
-
- DRow[0] = PA[0].m_scoreGapOpen + (i - 1)*e;
- ERow[0] = PA[0].m_scoreGapOpen2 + (i - 1)*e2;
- SetDPD(i, 0, DRow[0]);
- SetDPE(i, 0, ERow[0]);
-
- MCurr[0] = MINUS_INFINITY;
- if (i == 1)
- {
- MCurr[1] = ScoreProfPos2(PA[0], PB[0]);
- SetBitTBM(TB, i, 1, 'M');
- SetTBM(i, 1, 'M');
- }
- else
- {
- SCORE M = ScoreProfPos2(PA[i-1], PB[0]) + PA[0].m_scoreGapOpen +
- (i - 2)*e + PA[i-2].m_scoreGapClose;
- SCORE M2 = ScoreProfPos2(PA[i-1], PB[0]) + PA[0].m_scoreGapOpen2 +
- (i - 2)*e2 + PA[i-2].m_scoreGapClose2;
- if (M >= M2)
- {
- MCurr[1] = M;
- SetBitTBM(TB, i, 1, 'D');
- SetTBM(i, 1, 'D');
- }
- else
- {
- MCurr[1] = M2;
- SetBitTBM(TB, i, 1, 'E');
- SetTBM(i, 1, 'E');
- }
- }
- SetDPM(i, 0, MCurr[0]);
- SetDPM(i, 1, MCurr[1]);
-
- for (unsigned j = 1; j < uLengthB; ++j)
- MNext[j+1] = ScoreProfPos2(PA[i], PB[j]);
-
- for (unsigned j = 1; j < uLengthB; ++j)
- {
- RECURSE_D(i, j)
- RECURSE_E(i, j)
- RECURSE_I(i, j)
- RECURSE_J(i, j)
- RECURSE_M(i, j)
- }
- // Special case for j=uLengthB
- RECURSE_D_BTerm(i)
- RECURSE_E_BTerm(i)
- RECURSE_I_BTerm(i)
- RECURSE_J_BTerm(i)
-
- // Prev := Curr, Curr := Next, Next := Prev
- Rotate(MPrev, MCurr, MNext);
- }
-
-// Special case for i=uLengthA
- MCurr[0] = MINUS_INFINITY;
- SCORE M = ScoreProfPos2(PA[uLengthA-1], PB[0]) + (uLengthA - 2)*e +
- PA[0].m_scoreGapOpen + PA[uLengthA-2].m_scoreGapClose;
- SCORE M2 = ScoreProfPos2(PA[uLengthA-1], PB[0]) + (uLengthA - 2)*e +
- PA[0].m_scoreGapOpen + PA[uLengthA-2].m_scoreGapClose;
- if (M >= M2)
- {
- MCurr[1] = M;
- SetBitTBM(TB, uLengthA, 1, 'D');
- SetTBM(uLengthA, 1, 'D');
- }
- else
- {
- MCurr[1] = M2;
- SetBitTBM(TB, uLengthA, 1, 'E');
- SetTBM(uLengthA, 1, 'D');
- }
- SetDPM(uLengthA, 0, MCurr[0]);
- SetDPM(uLengthA, 1, MCurr[1]);
-
- DRow[0] = MINUS_INFINITY;
- ERow[0] = MINUS_INFINITY;
-
- SetDPD(uLengthA, 0, DRow[0]);
- SetDPE(uLengthA, 0, ERow[0]);
-
- for (unsigned j = 1; j <= uLengthB; ++j)
- {
- RECURSE_D_ATerm(j);
- RECURSE_E_ATerm(j);
- }
-
- Iij = MINUS_INFINITY;
- Jij = MINUS_INFINITY;
-
- for (unsigned j = 1; j <= uLengthB; ++j)
- {
- RECURSE_I_ATerm(j)
- RECURSE_J_ATerm(j)
- }
-
- LogMatrices();
-
- SCORE MAB = MCurr[uLengthB];
- SCORE DAB = DRow[uLengthB] + PA[uLengthA-1].m_scoreGapClose;
- SCORE EAB = ERow[uLengthB] + PA[uLengthA-1].m_scoreGapClose2;
- SCORE IAB = Iij + PB[uLengthB-1].m_scoreGapClose;
- SCORE JAB = Jij + PB[uLengthB-1].m_scoreGapClose2;
-
- SCORE Score = MAB;
- char cEdgeType = 'M';
- if (DAB > Score)
- {
- Score = DAB;
- cEdgeType = 'D';
- }
- if (EAB > Score)
- {
- Score = EAB;
- cEdgeType = 'E';
- }
- if (IAB > Score)
- {
- Score = IAB;
- cEdgeType = 'I';
- }
- if (JAB > Score)
- {
- Score = JAB;
- cEdgeType = 'J';
- }
-
-#if TRACE
- Log(" Small: MAB=%.4g DAB=%.4g EAB=%.4g IAB=%.4g JAB=%.4g best=%c\n",
- MAB, DAB, EAB, IAB, JAB, cEdgeType);
-#endif
-
- BitTraceBack(TB, uLengthA, uLengthB, cEdgeType, Path);
-
-#if DBEUG
- Path.Validate();
-#endif
-
- delete[] MCurr;
- delete[] MNext;
- delete[] MPrev;
- delete[] DRow;
- delete[] ERow;
- for (unsigned i = 0; i < uPrefixCountA; ++i)
- delete[] TB[i];
- delete[] TB;
-
- return 0;
- }
-#endif // DOUBLE_AFFINE
Deleted: trunk/packages/muscle/trunk/nwrec.cpp
===================================================================
--- trunk/packages/muscle/trunk/nwrec.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/nwrec.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,137 +0,0 @@
-/***
-Needleman-Wunch recursions
-
-Notation: i,j are prefix lengths so are in
-ranges i = [0,|A|) and j = [0,|B|].
-
-Profile positions are in ranges [0,|A|-1]
-and [0,|B|-1] so prefix length i corresponds
-to position (i-1) in the profile, and similarly
-for j.
-
-Terminal gap scoring
---------------------
-Terminal gaps are scored as with open [close]
-penalties only at the left [right] terminal,
-as follows:
-
- 0 i
- | |
- A XXXXX...
- B ---XX...
-
- i |A|-1
- | |
- A ...XXXXX
- B ...XX---
-
-In these examples, open / close penalty at position
-i is included, but close / open penalty at |A|-1 /
-0 is not included.
-
-This is implemented by setting the open [close]
-penalty to zero in the first [last] position of
-each profile.
-
-Consider adding a column to a sub-alignment. After the
-column is added, there are i letters from A and j letters
-from B.
-
-The column starts a left-terminal gap if:
- Delete with i=1, j=0 or
- Insert with i=0, j=1.
-
-The column ends a left-terminal gap if:
- Match following Delete with j=1, or
- Match following Insert with i=1.
-
-The column starts a right-terminal gap if:
- Delete following a Match and i=|A|, or
- Insert following a Match and j=|B|.
-
-The column ends a right-terminal gap if:
- Match with i=|A|, j=|B| following Delete or Insert.
-
-RECURSION RELATIONS
-===================
-
- i-1
- |
-DD A ..X X
- B ..- -
-
-MD A ..X X
- B ..X -
-
-D(i,j) = max
- D(i-1,j) + e
- M(i-1,j) + goA(i-1)
-Valid for:
- i = [1,|A|-1]
- j = [1,|B|]
-
-I(i,j) By symmetry with D(i,j).
-
- i-2
- | i-1
- | |
-MM A ..X X
- B ..X X
-
-DM A ..X X
- B ..- X
-
-IM A ..- X
- B ..X X
- | |
- | j-1
- j-2
-
-M(i,j) = L(i-1,j-1) + max
- M(i-1,j-1)
- D(i-1,j-1) + gcA(i-2)
- I(i-1,j-1) + gcB(j-2)
-Valid for:
- i = [2,|A|]
- j = [2,|B|]
-
-Equivalently:
-
-M(i+1,j+1) = L(i,j) + max
- M(i,j)
- D(i,j) + gcA(i-1)
- I(i,j) + gcB(j-1)
-
-Valid for:
- i = [1,|A|-1]
- j = [1,|B|-1]
-
-Boundary conditions
-===================
-
-A XXXX
-B ----
- D(0,0) = -infinity
-
- D(i,0) = ie
- i = [1,|A|]
-
- D(0,j) = -infinity
- j = [0,|B|]
-
-I(0,0), I(0,j) and I(i,0) by symmetry with D.
-
- M(0,0) = 0
- M(i,0) = -infinity, i > 0
- M(0,j) = -infinity, j > 0
-
-A X
-B -
- D(1,0) = e
- D(1,j) = -infinity, j = [1,|B|]
- (assuming no I-D allowed).
-
- D(0,1) = -infinity
- D(1,1) = -infinity
- D(i,1) = max.
-***/
Deleted: trunk/packages/muscle/trunk/nwsmall.cpp
===================================================================
--- trunk/packages/muscle/trunk/nwsmall.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/nwsmall.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,660 +0,0 @@
-#include "muscle.h"
-#include <math.h>
-#include "pwpath.h"
-#include "profile.h"
-#include <stdio.h>
-
-// NW small memory
-
-#define TRACE 0
-
-#if TRACE
-extern bool g_bKeepSimpleDP;
-extern SCORE *g_DPM;
-extern SCORE *g_DPD;
-extern SCORE *g_DPI;
-extern char *g_TBM;
-extern char *g_TBD;
-extern char *g_TBI;
-#endif
-
-#if TRACE
-#define ALLOC_TRACE() \
- const SCORE UNINIT = MINUS_INFINITY; \
- const size_t LM = uPrefixCountA*uPrefixCountB; \
- \
- SCORE *DPM_ = new SCORE[LM]; \
- SCORE *DPD_ = new SCORE[LM]; \
- SCORE *DPI_ = new SCORE[LM]; \
- \
- char *TBM_ = new char[LM]; \
- char *TBD_ = new char[LM]; \
- char *TBI_ = new char[LM]; \
- \
- memset(TBM_, '?', LM); \
- memset(TBD_, '?', LM); \
- memset(TBI_, '?', LM); \
- \
- for (unsigned i = 0; i <= uLengthA; ++i) \
- for (unsigned j = 0; j <= uLengthB; ++j) \
- { \
- DPM(i, j) = UNINIT; \
- DPD(i, j) = UNINIT; \
- DPI(i, j) = UNINIT; \
- }
-#else
-#define ALLOC_TRACE()
-#endif
-
-#if TRACE
-#define SetDPM(i, j, x) DPM(i, j) = x
-#define SetDPD(i, j, x) DPD(i, j) = x
-#define SetDPI(i, j, x) DPI(i, j) = x
-#define SetTBM(i, j, x) TBM(i, j) = x
-#define SetTBD(i, j, x) TBD(i, j) = x
-#define SetTBI(i, j, x) TBI(i, j) = x
-#else
-#define SetDPM(i, j, x) /* empty */
-#define SetDPD(i, j, x) /* empty */
-#define SetDPI(i, j, x) /* empty */
-#define SetTBM(i, j, x) /* empty */
-#define SetTBD(i, j, x) /* empty */
-#define SetTBI(i, j, x) /* empty */
-#endif
-
-#define RECURSE_D(i, j) \
- { \
- SCORE DD = DRow[j] + e; \
- SCORE MD = MPrev[j] + PA[i-1].m_scoreGapOpen;\
- if (DD > MD) \
- { \
- DRow[j] = DD; \
- SetTBD(i, j, 'D'); \
- } \
- else \
- { \
- DRow[j] = MD; \
- /* SetBitTBD(TB, i, j, 'M'); */ \
- TBRow[j] &= ~BIT_xD; \
- TBRow[j] |= BIT_MD; \
- SetTBD(i, j, 'M'); \
- } \
- SetDPD(i, j, DRow[j]); \
- }
-
-#define RECURSE_D_ATerm(j) RECURSE_D(uLengthA, j)
-#define RECURSE_D_BTerm(j) RECURSE_D(i, uLengthB)
-
-#define RECURSE_I(i, j) \
- { \
- Iij += e; \
- SCORE MI = MCurr[j-1] + PB[j-1].m_scoreGapOpen;\
- if (MI >= Iij) \
- { \
- Iij = MI; \
- /* SetBitTBI(TB, i, j, 'M'); */ \
- TBRow[j] &= ~BIT_xI; \
- TBRow[j] |= BIT_MI; \
- SetTBI(i, j, 'M'); \
- } \
- else \
- SetTBI(i, j, 'I'); \
- SetDPI(i, j, Iij); \
- }
-
-#define RECURSE_I_ATerm(j) RECURSE_I(uLengthA, j)
-#define RECURSE_I_BTerm(j) RECURSE_I(i, uLengthB)
-
-#define RECURSE_M(i, j) \
- { \
- SCORE DM = DRow[j] + PA[i-1].m_scoreGapClose; \
- SCORE IM = Iij + PB[j-1].m_scoreGapClose; \
- SCORE MM = MCurr[j]; \
- TB[i+1][j+1] &= ~BIT_xM; \
- if (MM >= DM && MM >= IM) \
- { \
- MNext[j+1] += MM; \
- SetDPM(i+1, j+1, MNext[j+1]); \
- SetTBM(i+1, j+1, 'M'); \
- /* SetBitTBM(TB, i+1, j+1, 'M'); */ \
- TB[i+1][j+1] |= BIT_MM; \
- } \
- else if (DM >= MM && DM >= IM) \
- { \
- MNext[j+1] += DM; \
- SetDPM(i+1, j+1, MNext[j+1]); \
- SetTBM(i+1, j+1, 'D'); \
- /* SetBitTBM(TB, i+1, j+1, 'D'); */ \
- TB[i+1][j+1] |= BIT_DM; \
- } \
- else \
- { \
- assert(IM >= MM && IM >= DM); \
- MNext[j+1] += IM; \
- SetDPM(i+1, j+1, MNext[j+1]); \
- SetTBM(i+1, j+1, 'I'); \
- /* SetBitTBM(TB, i+1, j+1, 'I'); */ \
- TB[i+1][j+1] |= BIT_IM; \
- } \
- }
-
-#if TRACE
-static bool LocalEq(BASETYPE b1, BASETYPE b2)
- {
- if (b1 < -100000 && b2 < -100000)
- return true;
- double diff = fabs(b1 - b2);
- if (diff < 0.0001)
- return true;
- double sum = fabs(b1) + fabs(b2);
- return diff/sum < 0.005;
- }
-
-static char Get_M_Char(char Bits)
- {
- switch (Bits & BIT_xM)
- {
- case BIT_MM:
- return 'M';
- case BIT_DM:
- return 'D';
- case BIT_IM:
- return 'I';
- }
- Quit("Huh?");
- return '?';
- }
-
-static char Get_D_Char(char Bits)
- {
- return (Bits & BIT_xD) ? 'M' : 'D';
- }
-
-static char Get_I_Char(char Bits)
- {
- return (Bits & BIT_xI) ? 'M' : 'I';
- }
-
-static bool DPEq(char c, SCORE *g_DP, SCORE *DPD_,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- SCORE *DPM_ = g_DP;
- for (unsigned i = 0; i < uPrefixCountA; ++i)
- for (unsigned j = 0; j < uPrefixCountB; ++j)
- if (!LocalEq(DPM(i, j), DPD(i, j)))
- {
- Log("***DPDIFF*** DP%c(%d, %d) Simple = %.2g, Fast = %.2g\n",
- c, i, j, DPM(i, j), DPD(i, j));
- return false;
- }
- return true;
- }
-
-static bool CompareTB(char **TB, char *TBM_, char *TBD_, char *TBI_,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- SCORE *DPM_ = g_DPM;
- bool Eq = true;
- for (unsigned i = 0; i < uPrefixCountA; ++i)
- for (unsigned j = 0; j < uPrefixCountB; ++j)
- {
- char c1 = TBM(i, j);
- char c2 = Get_M_Char(TB[i][j]);
- if (c1 != '?' && c1 != c2 && DPM(i, j) > -100000)
- {
- Log("TBM(%d, %d) Simple = %c, NW = %c\n", i, j, c1, c2);
- Eq = false;
- goto D;
- }
- }
-
-D:
- SCORE *DPD_ = g_DPD;
- for (unsigned i = 0; i < uPrefixCountA; ++i)
- for (unsigned j = 0; j < uPrefixCountB; ++j)
- {
- char c1 = TBD(i, j);
- char c2 = Get_D_Char(TB[i][j]);
- if (c1 != '?' && c1 != c2 && DPD(i, j) > -100000)
- {
- Log("TBD(%d, %d) Simple = %c, NW = %c\n", i, j, c1, c2);
- Eq = false;
- goto I;
- }
- }
-I:
- SCORE *DPI_ = g_DPI;
- for (unsigned i = 0; i < uPrefixCountA; ++i)
- for (unsigned j = 0; j < uPrefixCountB; ++j)
- {
- char c1 = TBI(i, j);
- char c2 = Get_I_Char(TB[i][j]);
- if (c1 != '?' && c1 != c2 && DPI(i, j) > -100000)
- {
- Log("TBI(%d, %d) Simple = %c, NW = %c\n", i, j, c1, c2);
- Eq = false;
- goto Done;
- }
- }
-Done:
- if (Eq)
- Log("TB success\n");
- return Eq;
- }
-
-static const char *LocalScoreToStr(SCORE s)
- {
- static char str[16];
- if (s < -100000)
- return " *";
- sprintf(str, "%6.1f", s);
- return str;
- }
-
-static void LogDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- Log(" ");
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = ' ';
- if (uPrefixLengthB > 0)
- c = ConsensusChar(PB[uPrefixLengthB - 1]);
- Log(" %4u:%c", uPrefixLengthB, c);
- }
- Log("\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));
- Log("\n");
- }
- }
-
-static void LogBitTB(char **TB, const ProfPos *PA, const ProfPos *PB,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- Log(" ");
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = ' ';
- if (uPrefixLengthB > 0)
- c = ConsensusChar(PB[uPrefixLengthB - 1]);
- Log(" %4u:%c", uPrefixLengthB, c);
- }
- Log("\n");
- Log("Bit TBM:\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = Get_M_Char(TB[uPrefixLengthA][uPrefixLengthB]);
- Log(" %6c", c);
- }
- Log("\n");
- }
-
- Log("\n");
- Log("Bit TBD:\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = Get_D_Char(TB[uPrefixLengthA][uPrefixLengthB]);
- Log(" %6c", c);
- }
- Log("\n");
- }
-
- Log("\n");
- Log("Bit TBI:\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = Get_I_Char(TB[uPrefixLengthA][uPrefixLengthB]);
- Log(" %6c", c);
- }
- Log("\n");
- }
- }
-
-static void ListTB(char *TBM_, const ProfPos *PA, const ProfPos *PB,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- Log(" ");
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = ' ';
- if (uPrefixLengthB > 0)
- c = ConsensusChar(PB[uPrefixLengthB - 1]);
- Log(" %4u:%c", uPrefixLengthB, c);
- }
- Log("\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = TBM(uPrefixLengthA, uPrefixLengthB);
- Log(" %6c", c);
- }
- Log("\n");
- }
- }
-
-static const char *BitsToStr(char Bits)
- {
- static char Str[9];
-
- sprintf(Str, "%cM %cD %cI",
- Get_M_Char(Bits),
- Get_D_Char(Bits),
- Get_I_Char(Bits));
- }
-#endif // TRACE
-
-static inline void SetBitTBM(char **TB, unsigned i, unsigned j, char c)
- {
- char Bit;
- switch (c)
- {
- case 'M':
- Bit = BIT_MM;
- break;
- case 'D':
- Bit = BIT_DM;
- break;
- case 'I':
- Bit = BIT_IM;
- break;
- default:
- Quit("Huh?!");
- }
- TB[i][j] &= ~BIT_xM;
- TB[i][j] |= Bit;
- }
-
-static inline void SetBitTBD(char **TB, unsigned i, unsigned j, char c)
- {
- char Bit;
- switch (c)
- {
- case 'M':
- Bit = BIT_MD;
- break;
- case 'D':
- Bit = BIT_DD;
- break;
- default:
- Quit("Huh?!");
- }
- TB[i][j] &= ~BIT_xD;
- TB[i][j] |= Bit;
- }
-
-static inline void SetBitTBI(char **TB, unsigned i, unsigned j, char c)
- {
- char Bit;
- switch (c)
- {
- case 'M':
- Bit = BIT_MI;
- break;
- case 'I':
- Bit = BIT_II;
- break;
- default:
- Quit("Huh?!");
- }
- TB[i][j] &= ~BIT_xI;
- TB[i][j] |= Bit;
- }
-
-#if TRACE
-#define LogMatrices() \
- { \
- Log("Bit DPM:\n"); \
- LogDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB); \
- Log("Bit DPD:\n"); \
- LogDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB); \
- Log("Bit DPI:\n"); \
- LogDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB); \
- Log("Bit TB:\n"); \
- LogBitTB(TB, PA, PB, uPrefixCountA, uPrefixCountB); \
- bool Same; \
- Same = DPEq('M', g_DPM, DPM_, uPrefixCountA, uPrefixCountB);\
- if (Same) \
- Log("DPM success\n"); \
- Same = DPEq('D', g_DPD, DPD_, uPrefixCountA, uPrefixCountB);\
- if (Same) \
- Log("DPD success\n"); \
- Same = DPEq('I', g_DPI, DPI_, uPrefixCountA, uPrefixCountB);\
- if (Same) \
- Log("DPI success\n"); \
- CompareTB(TB, g_TBM, g_TBD, g_TBI, uPrefixCountA, uPrefixCountB);\
- }
-#else
-#define LogMatrices() /* empty */
-#endif
-
-static unsigned uCachePrefixCountB;
-static unsigned uCachePrefixCountA;
-static SCORE *CacheMCurr;
-static SCORE *CacheMNext;
-static SCORE *CacheMPrev;
-static SCORE *CacheDRow;
-static char **CacheTB;
-
-static void AllocCache(unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- if (uPrefixCountA <= uCachePrefixCountA && uPrefixCountB <= uCachePrefixCountB)
- return;
-
- delete[] CacheMCurr;
- delete[] CacheMNext;
- delete[] CacheMPrev;
- delete[] CacheDRow;
- for (unsigned i = 0; i < uCachePrefixCountA; ++i)
- delete[] CacheTB[i];
- delete[] CacheTB;
-
- uCachePrefixCountA = uPrefixCountA + 1024;
- uCachePrefixCountB = uPrefixCountB + 1024;
-
- CacheMCurr = new SCORE[uCachePrefixCountB];
- CacheMNext = new SCORE[uCachePrefixCountB];
- CacheMPrev = new SCORE[uCachePrefixCountB];
- CacheDRow = new SCORE[uCachePrefixCountB];
-
- CacheTB = new char *[uCachePrefixCountA];
- for (unsigned i = 0; i < uCachePrefixCountA; ++i)
- CacheTB[i] = new char [uCachePrefixCountB];
- }
-
-SCORE NWSmall(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path)
- {
- if (0 == uLengthB || 0 == uLengthA )
- Quit("Internal error, NWSmall: length=0");
-
- SetTermGaps(PA, uLengthA);
- SetTermGaps(PB, uLengthB);
-
- const unsigned uPrefixCountA = uLengthA + 1;
- const unsigned uPrefixCountB = uLengthB + 1;
- const SCORE e = g_scoreGapExtend;
-
- ALLOC_TRACE()
-
- AllocCache(uPrefixCountA, uPrefixCountB);
-
- SCORE *MCurr = CacheMCurr;
- SCORE *MNext = CacheMNext;
- SCORE *MPrev = CacheMPrev;
- SCORE *DRow = CacheDRow;
-
- char **TB = CacheTB;
- for (unsigned i = 0; i < uPrefixCountA; ++i)
- memset(TB[i], 0, uPrefixCountB);
-
- SCORE Iij = MINUS_INFINITY;
- SetDPI(0, 0, Iij);
-
- Iij = PB[0].m_scoreGapOpen;
- SetDPI(0, 1, Iij);
-
- for (unsigned j = 2; j <= uLengthB; ++j)
- {
- Iij += e;
- SetDPI(0, j, Iij);
- SetTBI(0, j, 'I');
- }
-
- for (unsigned j = 0; j <= uLengthB; ++j)
- {
- DRow[j] = MINUS_INFINITY;
- SetDPD(0, j, DRow[j]);
- SetTBD(0, j, 'D');
- }
-
- MPrev[0] = 0;
- SetDPM(0, 0, MPrev[0]);
- for (unsigned j = 1; j <= uLengthB; ++j)
- {
- MPrev[j] = MINUS_INFINITY;
- SetDPM(0, j, MPrev[j]);
- }
-
- MCurr[0] = MINUS_INFINITY;
- SetDPM(1, 0, MCurr[0]);
-
- MCurr[1] = ScoreProfPos2(PA[0], PB[0]);
- SetDPM(1, 1, MCurr[1]);
- SetBitTBM(TB, 1, 1, 'M');
- SetTBM(1, 1, 'M');
-
- for (unsigned j = 2; j <= uLengthB; ++j)
- {
- MCurr[j] = ScoreProfPos2(PA[0], PB[j-1]) + PB[0].m_scoreGapOpen +
- (j - 2)*e + PB[j-2].m_scoreGapClose;
- SetDPM(1, j, MCurr[j]);
- SetBitTBM(TB, 1, j, 'I');
- SetTBM(1, j, 'I');
- }
-
-// Main DP loop
- for (unsigned i = 1; i < uLengthA; ++i)
- {
- char *TBRow = TB[i];
-
- Iij = MINUS_INFINITY;
- SetDPI(i, 0, Iij);
-
- DRow[0] = PA[0].m_scoreGapOpen + (i - 1)*e;
- SetDPD(i, 0, DRow[0]);
-
- MCurr[0] = MINUS_INFINITY;
- if (i == 1)
- {
- MCurr[1] = ScoreProfPos2(PA[0], PB[0]);
- SetBitTBM(TB, i, 1, 'M');
- SetTBM(i, 1, 'M');
- }
- else
- {
- MCurr[1] = ScoreProfPos2(PA[i-1], PB[0]) + PA[0].m_scoreGapOpen +
- (i - 2)*e + PA[i-2].m_scoreGapClose;
- SetBitTBM(TB, i, 1, 'D');
- SetTBM(i, 1, 'D');
- }
- SetDPM(i, 0, MCurr[0]);
- SetDPM(i, 1, MCurr[1]);
-
- for (unsigned j = 1; j < uLengthB; ++j)
- MNext[j+1] = ScoreProfPos2(PA[i], PB[j]);
-
- for (unsigned j = 1; j < uLengthB; ++j)
- {
- RECURSE_D(i, j)
- RECURSE_I(i, j)
- RECURSE_M(i, j)
- }
- // Special case for j=uLengthB
- RECURSE_D_BTerm(i)
- RECURSE_I_BTerm(i)
-
- // Prev := Curr, Curr := Next, Next := Prev
- Rotate(MPrev, MCurr, MNext);
- }
-
-// Special case for i=uLengthA
- char *TBRow = TB[uLengthA];
- MCurr[0] = MINUS_INFINITY;
- MCurr[1] = ScoreProfPos2(PA[uLengthA-1], PB[0]) + (uLengthA - 2)*e +
- PA[0].m_scoreGapOpen + PA[uLengthA-2].m_scoreGapClose;
- SetBitTBM(TB, uLengthA, 1, 'D');
- SetTBM(uLengthA, 1, 'D');
- SetDPM(uLengthA, 0, MCurr[0]);
- SetDPM(uLengthA, 1, MCurr[1]);
-
- DRow[0] = MINUS_INFINITY;
- SetDPD(uLengthA, 0, DRow[0]);
- for (unsigned j = 1; j <= uLengthB; ++j)
- RECURSE_D_ATerm(j);
-
- Iij = MINUS_INFINITY;
- for (unsigned j = 1; j <= uLengthB; ++j)
- RECURSE_I_ATerm(j)
-
- LogMatrices();
-
- SCORE MAB = MCurr[uLengthB];
- SCORE DAB = DRow[uLengthB];
- SCORE IAB = Iij;
-
- SCORE Score = MAB;
- char cEdgeType = 'M';
- if (DAB > Score)
- {
- Score = DAB;
- cEdgeType = 'D';
- }
- if (IAB > Score)
- {
- Score = IAB;
- cEdgeType = 'I';
- }
-
-#if TRACE
- Log(" Fast: MAB=%.4g DAB=%.4g IAB=%.4g best=%c\n",
- MAB, DAB, IAB, cEdgeType);
-#endif
-
- BitTraceBack(TB, uLengthA, uLengthB, cEdgeType, Path);
-
-#if DBEUG
- Path.Validate();
-#endif
-
- return 0;
- }
Deleted: trunk/packages/muscle/trunk/objscore.cpp
===================================================================
--- trunk/packages/muscle/trunk/objscore.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/objscore.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,113 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "objscore.h"
-#include "profile.h"
-#include "timing.h"
-
-#if TIMING
-TICKS g_ticksObjScore = 0;
-#endif
-
-SCORE ObjScore(const MSA &msa, const unsigned SeqIndexes1[],
- unsigned uSeqCount1, const unsigned SeqIndexes2[], unsigned uSeqCount2)
- {
-#if TIMING
- TICKS t1 = GetClockTicks();
-#endif
- const unsigned uSeqCount = msa.GetSeqCount();
-
- OBJSCORE OS = g_ObjScore;
- if (g_ObjScore == OBJSCORE_SPM)
- {
- if (uSeqCount <= 100)
- OS = OBJSCORE_XP;
- else
- OS = OBJSCORE_SPF;
- }
-
- MSA msa1;
- MSA msa2;
-
- switch (OS)
- {
- case OBJSCORE_DP:
- case OBJSCORE_XP:
- MSAFromSeqSubset(msa, SeqIndexes1, uSeqCount1, msa1);
- MSAFromSeqSubset(msa, SeqIndexes2, uSeqCount2, msa2);
-
- SetMSAWeightsMuscle(msa1);
- SetMSAWeightsMuscle(msa2);
- break;
-
- case OBJSCORE_SP:
- case OBJSCORE_SPF:
- case OBJSCORE_PS:
- // Yuck -- casting away const (design flaw)
- SetMSAWeightsMuscle((MSA &) msa);
- break;
- }
-
- SCORE Score = 0;
- switch (OS)
- {
- case OBJSCORE_SP:
- Score = ObjScoreSP(msa);
- break;
-
- case OBJSCORE_DP:
- Score = ObjScoreDP(msa1, msa2);
- break;
-
- case OBJSCORE_XP:
- Score = ObjScoreXP(msa1, msa2);
- break;
-
- case OBJSCORE_PS:
- Score = ObjScorePS(msa);
- break;
-
- case OBJSCORE_SPF:
- Score = ObjScoreSPDimer(msa);
- break;
-
- default:
- Quit("Invalid g_ObjScore=%d", g_ObjScore);
- }
-#if TIMING
- TICKS t2 = GetClockTicks();
- g_ticksObjScore += (t2 - t1);
-#endif
- return Score;
- }
-
-SCORE ObjScoreIds(const MSA &msa, const unsigned Ids1[],
- unsigned uCount1, const unsigned Ids2[], unsigned uCount2)
- {
-#if TIMING
- TICKS t1 = GetClockTicks();
-#endif
- unsigned *SeqIndexes1 = new unsigned[uCount1];
- unsigned *SeqIndexes2 = new unsigned[uCount2];
-
- for (unsigned n = 0; n < uCount1; ++n)
- SeqIndexes1[n] = msa.GetSeqIndex(Ids1[n]);
-
- for (unsigned n = 0; n < uCount2; ++n)
- SeqIndexes2[n] = msa.GetSeqIndex(Ids2[n]);
-
-#if DOUBLE_AFFINE
- extern SCORE ObjScoreDA(const MSA &msa, SCORE *ptrLetters, SCORE *ptrGaps);
- SCORE Letters, Gaps;
- SCORE dObjScore = ObjScoreDA(msa, &Letters, &Gaps);
-
- delete[] SeqIndexes1;
- delete[] SeqIndexes2;
-#else
- SCORE dObjScore = ObjScore(msa, SeqIndexes1, uCount1, SeqIndexes2, uCount2);
-#endif
-#if TIMING
- TICKS t2 = GetClockTicks();
- g_ticksObjScore += (t2 - t1);
-#endif
- return dObjScore;
- }
Deleted: trunk/packages/muscle/trunk/objscore.h
===================================================================
--- trunk/packages/muscle/trunk/objscore.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/objscore.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,30 +0,0 @@
-#ifndef ObjScore_h
-#define ObjScore_h
-
-SCORE ScoreSeqPairGaps(const MSA &msa1, unsigned uSeqIndex1,
- const MSA &msa2, unsigned uSeqIndex2);
-SCORE ScoreSeqPairLetters(const MSA &msa1, unsigned uSeqIndex1,
- const MSA &msa2, unsigned uSeqIndex2);
-SCORE ScoreGaps(const MSA &msa, const unsigned Cols[], unsigned ColCount);
-
-SCORE ObjScore(const MSA &msa, const unsigned SeqIndexes1[],
- unsigned uSeqCount1, const unsigned SeqIndexes2[], unsigned uSeqCount2);
-
-SCORE ObjScoreIds(const MSA &msa, const unsigned Ids1[],
- unsigned uCount1, const unsigned Ids2[], unsigned uCount2);
-
-void GetLetterScores(const MSA &msa, SCORE LetterScores[]);
-
-SCORE ObjScoreDP(const MSA &msa1, const MSA &msa2, SCORE MatchScore[] = 0);
-SCORE ObjScorePS(const MSA &msa, SCORE MatchScore[] = 0);
-SCORE ObjScoreSP(const MSA &msa, SCORE MatchScore[] = 0);
-SCORE ObjScoreXP(const MSA &msa, const MSA &msa2);
-SCORE ObjScoreSPDimer(const MSA &msa);
-SCORE ObjScoreDP_Profs(const ProfPos *PA, const ProfPos *PB, unsigned uColCount,
- SCORE MatchScore[] = 0);
-
-SCORE DiffObjScore(
- const MSA &msa1, const PWPath &Path1, const unsigned Edges1[], unsigned uEdgeCount1,
- const MSA &msa2, const PWPath &Path2, const unsigned Edges2[], unsigned uEdgeCount2);
-
-#endif // ObjScore_h
Deleted: trunk/packages/muscle/trunk/objscore2.cpp
===================================================================
--- trunk/packages/muscle/trunk/objscore2.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/objscore2.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,522 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "profile.h"
-#include "objscore.h"
-
-#define TRACE 0
-#define TRACE_SEQPAIR 0
-#define TEST_SPFAST 0
-
-extern SCOREMATRIX VTML_LA;
-extern SCOREMATRIX PAM200;
-extern SCOREMATRIX PAM200NoCenter;
-extern SCOREMATRIX VTML_SP;
-extern SCOREMATRIX VTML_SPNoCenter;
-extern SCOREMATRIX NUC_SP;
-
-SCORE g_SPScoreLetters;
-SCORE g_SPScoreGaps;
-
-static SCORE TermGapScore(bool Gap)
- {
- switch (g_TermGaps)
- {
- case TERMGAPS_Full:
- return 0;
-
- case TERMGAPS_Half:
- if (Gap)
- return g_scoreGapOpen/2;
- return 0;
-
- case TERMGAPS_Ext:
- if (Gap)
- return g_scoreGapExtend;
- return 0;
- }
- Quit("TermGapScore?!");
- return 0;
- }
-
-SCORE ScoreSeqPairLetters(const MSA &msa1, unsigned uSeqIndex1,
- const MSA &msa2, unsigned uSeqIndex2)
- {
- const unsigned uColCount = msa1.GetColCount();
- const unsigned uColCount2 = msa2.GetColCount();
- if (uColCount != uColCount2)
- Quit("ScoreSeqPairLetters, different lengths");
-
-#if TRACE_SEQPAIR
- {
- Log("\n");
- Log("ScoreSeqPairLetters\n");
- MSA msaTmp;
- msaTmp.SetSize(2, uColCount);
- msaTmp.CopySeq(0, msa1, uSeqIndex1);
- msaTmp.CopySeq(1, msa2, uSeqIndex2);
- msaTmp.LogMe();
- }
-#endif
-
- SCORE scoreLetters = 0;
- SCORE scoreGaps = 0;
- bool bGapping1 = false;
- bool bGapping2 = false;
-
- unsigned uColStart = 0;
- bool bLeftTermGap = false;
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
- bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
- if (!bGap1 || !bGap2)
- {
- if (bGap1 || bGap2)
- bLeftTermGap = true;
- uColStart = uColIndex;
- break;
- }
- }
-
- unsigned uColEnd = uColCount - 1;
- bool bRightTermGap = false;
- for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
- {
- bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
- bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
- if (!bGap1 || !bGap2)
- {
- if (bGap1 || bGap2)
- bRightTermGap = true;
- uColEnd = (unsigned) iColIndex;
- break;
- }
- }
-
-#if TRACE_SEQPAIR
- Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
-#endif
-
- for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
- {
- unsigned uLetter1 = msa1.GetLetterEx(uSeqIndex1, uColIndex);
- if (uLetter1 >= g_AlphaSize)
- continue;
- unsigned uLetter2 = msa2.GetLetterEx(uSeqIndex2, uColIndex);
- if (uLetter2 >= g_AlphaSize)
- continue;
-
- SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2];
- scoreLetters += scoreMatch;
- }
- return scoreLetters;
- }
-
-SCORE ScoreSeqPairGaps(const MSA &msa1, unsigned uSeqIndex1,
- const MSA &msa2, unsigned uSeqIndex2)
- {
- const unsigned uColCount = msa1.GetColCount();
- const unsigned uColCount2 = msa2.GetColCount();
- if (uColCount != uColCount2)
- Quit("ScoreSeqPairGaps, different lengths");
-
-#if TRACE_SEQPAIR
- {
- Log("\n");
- Log("ScoreSeqPairGaps\n");
- MSA msaTmp;
- msaTmp.SetSize(2, uColCount);
- msaTmp.CopySeq(0, msa1, uSeqIndex1);
- msaTmp.CopySeq(1, msa2, uSeqIndex2);
- msaTmp.LogMe();
- }
-#endif
-
- SCORE scoreGaps = 0;
- bool bGapping1 = false;
- bool bGapping2 = false;
-
- unsigned uColStart = 0;
- bool bLeftTermGap = false;
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
- bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
- if (!bGap1 || !bGap2)
- {
- if (bGap1 || bGap2)
- bLeftTermGap = true;
- uColStart = uColIndex;
- break;
- }
- }
-
- unsigned uColEnd = uColCount - 1;
- bool bRightTermGap = false;
- for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
- {
- bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
- bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
- if (!bGap1 || !bGap2)
- {
- if (bGap1 || bGap2)
- bRightTermGap = true;
- uColEnd = (unsigned) iColIndex;
- break;
- }
- }
-
-#if TRACE_SEQPAIR
- Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
-#endif
-
- for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
- {
- bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
- bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
-
- if (bGap1 && bGap2)
- continue;
-
- if (bGap1)
- {
- if (!bGapping1)
- {
-#if TRACE_SEQPAIR
- Log("Gap open seq 1 col %d\n", uColIndex);
-#endif
- if (uColIndex == uColStart)
- scoreGaps += TermGapScore(true);
- else
- scoreGaps += g_scoreGapOpen;
- bGapping1 = true;
- }
- else
- scoreGaps += g_scoreGapExtend;
- continue;
- }
-
- else if (bGap2)
- {
- if (!bGapping2)
- {
-#if TRACE_SEQPAIR
- Log("Gap open seq 2 col %d\n", uColIndex);
-#endif
- if (uColIndex == uColStart)
- scoreGaps += TermGapScore(true);
- else
- scoreGaps += g_scoreGapOpen;
- bGapping2 = true;
- }
- else
- scoreGaps += g_scoreGapExtend;
- continue;
- }
-
- bGapping1 = false;
- bGapping2 = false;
- }
-
- if (bGapping1 || bGapping2)
- {
- scoreGaps -= g_scoreGapOpen;
- scoreGaps += TermGapScore(true);
- }
- return scoreGaps;
- }
-
-// The usual sum-of-pairs objective score: sum the score
-// of the alignment of each pair of sequences.
-SCORE ObjScoreSP(const MSA &msa, SCORE MatchScore[])
- {
-#if TRACE
- Log("==================ObjScoreSP==============\n");
- Log("msa=\n");
- msa.LogMe();
-#endif
- g_SPScoreLetters = 0;
- g_SPScoreGaps = 0;
-
- if (0 != MatchScore)
- {
- const unsigned uColCount = msa.GetColCount();
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- MatchScore[uColIndex] = 0;
- }
-
- const unsigned uSeqCount = msa.GetSeqCount();
- SCORE scoreTotal = 0;
- unsigned uPairCount = 0;
-#if TRACE
- Log("Seq1 Seq2 wt1 wt2 Letters Gaps Unwt.Score Wt.Score Total\n");
- Log("---- ---- ------ ------ ---------- ---------- ---------- ---------- ----------\n");
-#endif
- for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
- {
- const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
- for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
- {
- const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
- const WEIGHT w = w1*w2;
-
- SCORE scoreLetters = ScoreSeqPairLetters(msa, uSeqIndex1, msa, uSeqIndex2);
- SCORE scoreGaps = ScoreSeqPairGaps(msa, uSeqIndex1, msa, uSeqIndex2);
- SCORE scorePair = scoreLetters + scoreGaps;
- ++uPairCount;
-
- scoreTotal += w*scorePair;
-
- g_SPScoreLetters += w*scoreLetters;
- g_SPScoreGaps += w*scoreGaps;
-#if TRACE
- Log("%4d %4d %6.3f %6.3f %10.2f %10.2f %10.2f %10.2f %10.2f >%s >%s\n",
- uSeqIndex1,
- uSeqIndex2,
- w1,
- w2,
- scoreLetters,
- scoreGaps,
- scorePair,
- scorePair*w1*w2,
- scoreTotal,
- msa.GetSeqName(uSeqIndex1),
- msa.GetSeqName(uSeqIndex2));
-#endif
- }
- }
-#if TEST_SPFAST
- {
- SCORE f = ObjScoreSPFast(msa);
- Log("Fast = %.6g\n", f);
- Log("Brute = %.6g\n", scoreTotal);
- if (BTEq(f, scoreTotal))
- Log("Agree\n");
- else
- Log("** DISAGREE **\n");
- }
-#endif
-// return scoreTotal / uPairCount;
- return scoreTotal;
- }
-
-// Objective score defined as the dynamic programming score.
-// Input is two alignments, which must be of the same length.
-// Result is the same profile-profile score that is optimized
-// by dynamic programming.
-SCORE ObjScoreDP(const MSA &msa1, const MSA &msa2, SCORE MatchScore[])
- {
- const unsigned uColCount = msa1.GetColCount();
- if (msa2.GetColCount() != uColCount)
- Quit("ObjScoreDP, must be same length");
-
- const unsigned uColCount1 = msa1.GetColCount();
- const unsigned uColCount2 = msa2.GetColCount();
-
- const ProfPos *PA = ProfileFromMSA(msa1);
- const ProfPos *PB = ProfileFromMSA(msa2);
-
- return ObjScoreDP_Profs(PA, PB, uColCount1, MatchScore);
- }
-
-SCORE ObjScoreDP_Profs(const ProfPos *PA, const ProfPos *PB, unsigned uColCount,
- SCORE MatchScore[])
- {
-//#if TRACE
-// Log("Profile 1:\n");
-// ListProfile(PA, uColCount, &msa1);
-//
-// Log("Profile 2:\n");
-// ListProfile(PB, uColCount, &msa2);
-//#endif
-
- SCORE scoreTotal = 0;
-
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- const ProfPos &PPA = PA[uColIndex];
- const ProfPos &PPB = PB[uColIndex];
-
- SCORE scoreGap = 0;
- SCORE scoreMatch = 0;
- // If gapped column...
- if (PPA.m_bAllGaps && PPB.m_bAllGaps)
- scoreGap = 0;
- else if (PPA.m_bAllGaps)
- {
- if (uColCount - 1 == uColIndex || !PA[uColIndex+1].m_bAllGaps)
- scoreGap = PPB.m_scoreGapClose;
- if (0 == uColIndex || !PA[uColIndex-1].m_bAllGaps)
- scoreGap += PPB.m_scoreGapOpen;
- //if (0 == scoreGap)
- // scoreGap = PPB.m_scoreGapExtend;
- }
- else if (PPB.m_bAllGaps)
- {
- if (uColCount - 1 == uColIndex || !PB[uColIndex+1].m_bAllGaps)
- scoreGap = PPA.m_scoreGapClose;
- if (0 == uColIndex || !PB[uColIndex-1].m_bAllGaps)
- scoreGap += PPA.m_scoreGapOpen;
- //if (0 == scoreGap)
- // scoreGap = PPA.m_scoreGapExtend;
- }
- else
- scoreMatch = ScoreProfPos2(PPA, PPB);
-
- if (0 != MatchScore)
- MatchScore[uColIndex] = scoreMatch;
-
- scoreTotal += scoreMatch + scoreGap;
-
- extern bool g_bTracePPScore;
- extern MSA *g_ptrPPScoreMSA1;
- extern MSA *g_ptrPPScoreMSA2;
- if (g_bTracePPScore)
- {
- const MSA &msa1 = *g_ptrPPScoreMSA1;
- const MSA &msa2 = *g_ptrPPScoreMSA2;
- const unsigned uSeqCount1 = msa1.GetSeqCount();
- const unsigned uSeqCount2 = msa2.GetSeqCount();
-
- for (unsigned n = 0; n < uSeqCount1; ++n)
- Log("%c", msa1.GetChar(n, uColIndex));
- Log(" ");
- for (unsigned n = 0; n < uSeqCount2; ++n)
- Log("%c", msa2.GetChar(n, uColIndex));
- Log(" %10.3f", scoreMatch);
- if (scoreGap != 0)
- Log(" %10.3f", scoreGap);
- Log("\n");
- }
- }
-
- delete[] PA;
- delete[] PB;
-
- return scoreTotal;
- }
-
-// Objective score defined as the sum of profile-sequence
-// scores for each sequence in the alignment. The profile
-// is computed from the entire alignment, so this includes
-// the score of each sequence against itself. This is to
-// avoid recomputing the profile each time, so we reduce
-// complexity but introduce a questionable approximation.
-// The goal is to see if we can exploit the apparent
-// improvement in performance of log-expectation score
-// over the usual sum-of-pairs by optimizing this
-// objective score in the iterative refinement stage.
-SCORE ObjScorePS(const MSA &msa, SCORE MatchScore[])
- {
- if (g_PPScore != PPSCORE_LE)
- Quit("FastScoreMSA_LASimple: LA");
-
- const unsigned uSeqCount = msa.GetSeqCount();
- const unsigned uColCount = msa.GetColCount();
-
- const ProfPos *Prof = ProfileFromMSA(msa);
-
- if (0 != MatchScore)
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- MatchScore[uColIndex] = 0;
-
- SCORE scoreTotal = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- const WEIGHT weightSeq = msa.GetSeqWeight(uSeqIndex);
- SCORE scoreSeq = 0;
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- const ProfPos &PP = Prof[uColIndex];
- if (msa.IsGap(uSeqIndex, uColIndex))
- {
- bool bOpen = (0 == uColIndex ||
- !msa.IsGap(uSeqIndex, uColIndex - 1));
- bool bClose = (uColCount - 1 == uColIndex ||
- !msa.IsGap(uSeqIndex, uColIndex + 1));
-
- if (bOpen)
- scoreSeq += PP.m_scoreGapOpen;
- if (bClose)
- scoreSeq += PP.m_scoreGapClose;
- //if (!bOpen && !bClose)
- // scoreSeq += PP.m_scoreGapExtend;
- }
- else if (msa.IsWildcard(uSeqIndex, uColIndex))
- continue;
- else
- {
- unsigned uLetter = msa.GetLetter(uSeqIndex, uColIndex);
- const SCORE scoreMatch = PP.m_AAScores[uLetter];
- if (0 != MatchScore)
- MatchScore[uColIndex] += weightSeq*scoreMatch;
- scoreSeq += scoreMatch;
- }
- }
- scoreTotal += weightSeq*scoreSeq;
- }
-
- delete[] Prof;
- return scoreTotal;
- }
-
-// The XP score is the sum of the score of each pair of
-// sequences between two profiles which are aligned to
-// each other. Notice that for two given profiles aligned
-// in different ways, the difference in XP score must be
-// the same as the difference in SP score because the
-// score of a pair of sequences in one profile doesn't
-// depend on the alignment.
-SCORE ObjScoreXP(const MSA &msa1, const MSA &msa2)
- {
- const unsigned uColCount1 = msa1.GetColCount();
- const unsigned uColCount2 = msa2.GetColCount();
- if (uColCount1 != uColCount2)
- Quit("ObjScoreXP, alignment lengths differ %u %u", uColCount1, uColCount2);
-
- const unsigned uSeqCount1 = msa1.GetSeqCount();
- const unsigned uSeqCount2 = msa2.GetSeqCount();
-
-#if TRACE
- Log(" Score Weight Weight Total\n");
- Log("---------- ------ ------ ----------\n");
-#endif
-
- SCORE scoreTotal = 0;
- unsigned uPairCount = 0;
- for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
- {
- const WEIGHT w1 = msa1.GetSeqWeight(uSeqIndex1);
- for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
- {
- const WEIGHT w2 = msa2.GetSeqWeight(uSeqIndex2);
- const WEIGHT w = w1*w2;
- SCORE scoreLetters = ScoreSeqPairLetters(msa1, uSeqIndex1, msa2, uSeqIndex2);
- SCORE scoreGaps = ScoreSeqPairGaps(msa1, uSeqIndex1, msa2, uSeqIndex2);
- SCORE scorePair = scoreLetters + scoreGaps;
- scoreTotal += w1*w2*scorePair;
- ++uPairCount;
-#if TRACE
- Log("%10.2f %6.3f %6.3f %10.2f >%s >%s\n",
- scorePair,
- w1,
- w2,
- scorePair*w1*w2,
- msa1.GetSeqName(uSeqIndex1),
- msa2.GetSeqName(uSeqIndex2));
-#endif
- }
- }
- if (0 == uPairCount)
- Quit("0 == uPairCount");
-
-#if TRACE
- Log("msa1=\n");
- msa1.LogMe();
- Log("msa2=\n");
- msa2.LogMe();
- Log("XP=%g\n", scoreTotal);
-#endif
-// return scoreTotal / uPairCount;
- return scoreTotal;
- }
Deleted: trunk/packages/muscle/trunk/objscoreda.cpp
===================================================================
--- trunk/packages/muscle/trunk/objscoreda.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/objscoreda.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,289 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "profile.h"
-#include "objscore.h"
-
-#if DOUBLE_AFFINE
-
-#define TRACE 0
-#define TEST_SPFAST 0
-
-static SCORE GapPenalty(unsigned uLength, bool Term, SCORE g, SCORE e)
- {
- //if (Term)
- // {
- // switch (g_TermGap)
- // {
- // case TERMGAP_Full:
- // return g + (uLength - 1)*e;
-
- // case TERMGAP_Half:
- // return g/2 + (uLength - 1)*e;
-
- // case TERMGAP_Ext:
- // return uLength*e;
- // }
- // Quit("Bad termgap");
- // }
- //else
- // return g + (uLength - 1)*e;
- //return MINUS_INFINITY;
- return g + (uLength - 1)*e;
- }
-
-static SCORE GapPenalty(unsigned uLength, bool Term)
- {
- SCORE s1 = GapPenalty(uLength, Term, g_scoreGapOpen, g_scoreGapExtend);
-#if DOUBLE_AFFINE
- SCORE s2 = GapPenalty(uLength, Term, g_scoreGapOpen2, g_scoreGapExtend2);
- if (s1 > s2)
- return s1;
- return s2;
-#else
- return s1;
-#endif
- }
-
-static const MSA *g_ptrMSA1;
-static const MSA *g_ptrMSA2;
-static unsigned g_uSeqIndex1;
-static unsigned g_uSeqIndex2;
-
-static void LogGap(unsigned uStart, unsigned uEnd, unsigned uGapLength,
- bool bNTerm, bool bCTerm)
- {
- Log("%16.16s ", "");
- for (unsigned i = 0; i < uStart; ++i)
- Log(" ");
- unsigned uMyLength = 0;
- for (unsigned i = uStart; i <= uEnd; ++i)
- {
- bool bGap1 = g_ptrMSA1->IsGap(g_uSeqIndex1, i);
- bool bGap2 = g_ptrMSA2->IsGap(g_uSeqIndex2, i);
- if (!bGap1 && !bGap2)
- Quit("Error -- neither gapping");
- if (bGap1 && bGap2)
- Log(".");
- else
- {
- ++uMyLength;
- Log("-");
- }
- }
- SCORE s = GapPenalty(uGapLength, bNTerm || bCTerm);
- Log(" L=%d N%d C%d s=%.3g", uGapLength, bNTerm, bCTerm, s);
- Log("\n");
- if (uMyLength != uGapLength)
- Quit("Lengths differ");
-
- }
-
-static SCORE ScoreSeqPair(const MSA &msa1, unsigned uSeqIndex1,
- const MSA &msa2, unsigned uSeqIndex2, SCORE *ptrLetters, SCORE *ptrGaps)
- {
- g_ptrMSA1 = &msa1;
- g_ptrMSA2 = &msa2;
- g_uSeqIndex1 = uSeqIndex1;
- g_uSeqIndex2 = uSeqIndex2;
-
- const unsigned uColCount = msa1.GetColCount();
- const unsigned uColCount2 = msa2.GetColCount();
- if (uColCount != uColCount2)
- Quit("ScoreSeqPair, different lengths");
-
-#if TRACE
- Log("ScoreSeqPair\n");
- Log("%16.16s ", msa1.GetSeqName(uSeqIndex1));
- for (unsigned i = 0; i < uColCount; ++i)
- Log("%c", msa1.GetChar(uSeqIndex1, i));
- Log("\n");
- Log("%16.16s ", msa2.GetSeqName(uSeqIndex2));
- for (unsigned i = 0; i < uColCount; ++i)
- Log("%c", msa1.GetChar(uSeqIndex2, i));
- Log("\n");
-#endif
-
- SCORE scoreTotal = 0;
-
-// Substitution scores
- unsigned uFirstLetter1 = uInsane;
- unsigned uFirstLetter2 = uInsane;
- unsigned uLastLetter1 = uInsane;
- unsigned uLastLetter2 = uInsane;
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
- bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
- bool bWildcard1 = msa1.IsWildcard(uSeqIndex1, uColIndex);
- bool bWildcard2 = msa2.IsWildcard(uSeqIndex2, uColIndex);
-
- if (!bGap1)
- {
- if (uInsane == uFirstLetter1)
- uFirstLetter1 = uColIndex;
- uLastLetter1 = uColIndex;
- }
- if (!bGap2)
- {
- if (uInsane == uFirstLetter2)
- uFirstLetter2 = uColIndex;
- uLastLetter2 = uColIndex;
- }
-
- if (bGap1 || bGap2 || bWildcard1 || bWildcard2)
- continue;
-
- unsigned uLetter1 = msa1.GetLetter(uSeqIndex1, uColIndex);
- unsigned uLetter2 = msa2.GetLetter(uSeqIndex2, uColIndex);
-
- SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2];
- scoreTotal += scoreMatch;
-#if TRACE
- Log("%c <-> %c = %7.1f %10.1f\n",
- msa1.GetChar(uSeqIndex1, uColIndex),
- msa2.GetChar(uSeqIndex2, uColIndex),
- scoreMatch,
- scoreTotal);
-#endif
- }
-
- *ptrLetters = scoreTotal;
-
-// Gap penalties
- unsigned uGapLength = uInsane;
- unsigned uGapStartCol = uInsane;
- bool bGapping1 = false;
- bool bGapping2 = false;
-
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
- bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
-
- if (bGap1 && bGap2)
- continue;
-
- if (bGapping1)
- {
- if (bGap1)
- ++uGapLength;
- else
- {
- bGapping1 = false;
- bool bNTerm = (uFirstLetter2 == uGapStartCol);
- bool bCTerm = (uLastLetter2 + 1 == uColIndex);
- SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm);
- scoreTotal += scoreGap;
-#if TRACE
- LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm);
- Log("GAP %7.1f %10.1f\n",
- scoreGap,
- scoreTotal);
-#endif
- }
- continue;
- }
- else
- {
- if (bGap1)
- {
- uGapStartCol = uColIndex;
- bGapping1 = true;
- uGapLength = 1;
- continue;
- }
- }
-
- if (bGapping2)
- {
- if (bGap2)
- ++uGapLength;
- else
- {
- bGapping2 = false;
- bool bNTerm = (uFirstLetter1 == uGapStartCol);
- bool bCTerm = (uLastLetter1 + 1 == uColIndex);
- SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm);
- scoreTotal += scoreGap;
-#if TRACE
- LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm);
- Log("GAP %7.1f %10.1f\n",
- scoreGap,
- scoreTotal);
-#endif
- }
- }
- else
- {
- if (bGap2)
- {
- uGapStartCol = uColIndex;
- bGapping2 = true;
- uGapLength = 1;
- }
- }
- }
-
- if (bGapping1 || bGapping2)
- {
- SCORE scoreGap = GapPenalty(uGapLength, true);
- scoreTotal += scoreGap;
-#if TRACE
- LogGap(uGapStartCol, uColCount - 1, uGapLength, false, true);
- Log("GAP %7.1f %10.1f\n",
- scoreGap,
- scoreTotal);
-#endif
- }
- *ptrGaps = scoreTotal - *ptrLetters;
- return scoreTotal;
- }
-
-// The usual sum-of-pairs objective score: sum the score
-// of the alignment of each pair of sequences.
-SCORE ObjScoreDA(const MSA &msa, SCORE *ptrLetters, SCORE *ptrGaps)
- {
- const unsigned uSeqCount = msa.GetSeqCount();
- SCORE scoreTotal = 0;
- unsigned uPairCount = 0;
-#if TRACE
- msa.LogMe();
- Log(" Score Weight Weight Total\n");
- Log("---------- ------ ------ ----------\n");
-#endif
- SCORE TotalLetters = 0;
- SCORE TotalGaps = 0;
- for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
- {
- const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
- for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
- {
- const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
- const WEIGHT w = w1*w2;
- SCORE Letters;
- SCORE Gaps;
- SCORE scorePair = ScoreSeqPair(msa, uSeqIndex1, msa, uSeqIndex2,
- &Letters, &Gaps);
- scoreTotal += w1*w2*scorePair;
- TotalLetters += w1*w2*Letters;
- TotalGaps += w1*w2*Gaps;
- ++uPairCount;
-#if TRACE
- Log("%10.2f %6.3f %6.3f %10.2f %d=%s %d=%s\n",
- scorePair,
- w1,
- w2,
- scorePair*w1*w2,
- uSeqIndex1,
- msa.GetSeqName(uSeqIndex1),
- uSeqIndex2,
- msa.GetSeqName(uSeqIndex2));
-#endif
- }
- }
- *ptrLetters = TotalLetters;
- *ptrGaps = TotalGaps;
- return scoreTotal;
- }
-
-#endif // DOUBLE_AFFINE
Deleted: trunk/packages/muscle/trunk/onexception.cpp
===================================================================
--- trunk/packages/muscle/trunk/onexception.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/onexception.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,15 +0,0 @@
-#include "muscle.h"
-#include <stdio.h>
-
-static char szOnExceptionMessage[] =
- {
- "\nFatal error, exception caught.\n"
- };
-
-void OnException()
- {
- fprintf(stderr, szOnExceptionMessage);
- Log(szOnExceptionMessage);
- Log("Finished %s\n", GetTimeAsStr());
- exit(EXIT_Except);
- }
Deleted: trunk/packages/muscle/trunk/options.cpp
===================================================================
--- trunk/packages/muscle/trunk/options.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/options.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,233 +0,0 @@
-#include "muscle.h"
-#include <stdio.h>
-
-struct VALUE_OPT
- {
- const char *m_pstrName;
- const char *m_pstrValue;
- };
-
-struct FLAG_OPT
- {
- const char *m_pstrName;
- bool m_bSet;
- };
-
-static VALUE_OPT ValueOpts[] =
- {
- "in", 0,
- "in1", 0,
- "in2", 0,
- "out", 0,
- "MaxIters", 0,
- "MaxHours", 0,
- "GapOpen", 0,
- "GapOpen2", 0,
- "GapExtend", 0,
- "GapExtend2", 0,
- "GapAmbig", 0,
- "Center", 0,
- "SmoothScoreCeil", 0,
- "MinBestColScore", 0,
- "MinSmoothScore", 0,
- "ObjScore", 0,
- "SmoothWindow", 0,
- "RefineWindow", 0,
- "FromWindow", 0,
- "ToWindow", 0,
- "SaveWindow", 0,
- "WindowOffset", 0,
- "FirstWindow", 0,
- "AnchorSpacing", 0,
- "Log", 0,
- "LogA", 0,
- "MaxTrees", 0,
- "SUEFF", 0,
- "Distance1", 0,
- "Distance2", 0,
- "Weight1", 0,
- "Weight2", 0,
- "Cluster1", 0,
- "Cluster2", 0,
- "Root1", 0,
- "Root2", 0,
- "Tree1", 0,
- "Tree2", 0,
- "UseTree", 0,
- "UseTree_NoWarn", 0,
- "DiagLength", 0,
- "DiagMargin", 0,
- "DiagBreak", 0,
- "Hydro", 0,
- "HydroFactor", 0,
- "SPScore", 0,
- "SeqType", 0,
- "MaxMB", 0,
- "ComputeWeights", 0,
- "MaxSubFam", 0,
- "ScoreFile", 0,
- "TermGaps", 0,
- "FASTAOut", 0,
- "CLWOut", 0,
- "CLWStrictOut", 0,
- "HTMLOut", 0,
- "MSFOut", 0,
- "PHYIOut", 0,
- "PHYSOut", 0,
- "Matrix", 0,
- };
-static int ValueOptCount = sizeof(ValueOpts)/sizeof(ValueOpts[0]);
-
-static FLAG_OPT FlagOpts[] =
- {
- "LE", false,
- "SP", false,
- "SV", false,
- "SPN", false,
- "Core", false,
- "NoCore", false,
- "Diags1", false,
- "Diags2", false,
- "Diags", false,
- "Quiet", false,
- "MSF", false,
- "Verbose", false,
- "Anchors", false,
- "NoAnchors", false,
- "Refine", false,
- "RefineW", false,
- "SW", false,
- "Profile", false,
- "PPScore", false,
- "Cluster", false,
- "Brenner", false,
- "Dimer", false,
- "clw", false,
- "clwstrict", false,
- "HTML", false,
- "Version", false,
- "Stable", false,
- "Group", false,
- "FASTA", false,
- "ProfDB", false,
- "PAS", false,
- "PHYI", false,
- "PHYS", false,
- };
-static int FlagOptCount = sizeof(FlagOpts)/sizeof(FlagOpts[0]);
-
-static bool TestSetFlagOpt(const char *Arg)
- {
- for (int i = 0; i < FlagOptCount; ++i)
- if (!stricmp(Arg, FlagOpts[i].m_pstrName))
- {
- FlagOpts[i].m_bSet = true;
- return true;
- }
- return false;
- }
-
-static bool TestSetValueOpt(const char *Arg, const char *Value)
- {
- for (int i = 0; i < ValueOptCount; ++i)
- if (!stricmp(Arg, ValueOpts[i].m_pstrName))
- {
- if (0 == Value)
- {
- fprintf(stderr, "Option -%s must have value\n", Arg);
- exit(EXIT_NotStarted);
- }
- ValueOpts[i].m_pstrValue = strsave(Value);
- return true;
- }
- return false;
- }
-
-bool FlagOpt(const char *Name)
- {
- for (int i = 0; i < FlagOptCount; ++i)
- if (!stricmp(Name, FlagOpts[i].m_pstrName))
- return FlagOpts[i].m_bSet;
- Quit("FlagOpt(%s) invalid", Name);
- return false;
- }
-
-const char *ValueOpt(const char *Name)
- {
- for (int i = 0; i < ValueOptCount; ++i)
- if (!stricmp(Name, ValueOpts[i].m_pstrName))
- return ValueOpts[i].m_pstrValue;
- Quit("ValueOpt(%s) invalid", Name);
- return 0;
- }
-
-void ProcessArgVect(int argc, char *argv[])
- {
- for (int iArgIndex = 0; iArgIndex < argc; )
- {
- const char *Arg = argv[iArgIndex];
- if (Arg[0] != '-')
- {
- fprintf(stderr, "Command-line option \"%s\" must start with '-'\n", Arg);
- exit(EXIT_NotStarted);
- }
- const char *ArgName = Arg + 1;
- if (TestSetFlagOpt(ArgName))
- {
- ++iArgIndex;
- continue;
- }
-
- char *Value = 0;
- if (iArgIndex < argc - 1)
- Value = argv[iArgIndex+1];
- if (TestSetValueOpt(ArgName, Value))
- {
- iArgIndex += 2;
- continue;
- }
- fprintf(stderr, "Invalid command line option \"%s\"\n", ArgName);
- Usage();
- exit(EXIT_NotStarted);
- }
- }
-
-void ProcessArgStr(const char *ArgStr)
- {
- const int MAX_ARGS = 64;
- char *argv[MAX_ARGS];
-
- if (0 == ArgStr)
- return;
-
-// Modifiable copy
- char *StrCopy = strsave(ArgStr);
-
- int argc = 0;
- bool bInArg = false;
- char *Str = StrCopy;
- while (char c = *Str)
- {
- if (isspace(c))
- {
- *Str = 0;
- bInArg = false;
- }
- else if (!bInArg)
- {
- bInArg = true;
- if (argc >= MAX_ARGS)
- Quit("Too many args in MUSCLE_CMDLINE");
- argv[argc++] = Str;
- }
- Str++;
- }
- ProcessArgVect(argc, argv);
- free(StrCopy);
- }
-
-void ListFlagOpts()
- {
- for (int i = 0; i < FlagOptCount; ++i)
- Log("%s %d\n", FlagOpts[i].m_pstrName, FlagOpts[i].m_bSet);
- }
Deleted: trunk/packages/muscle/trunk/outweights.cpp
===================================================================
--- trunk/packages/muscle/trunk/outweights.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/outweights.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,17 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-
-void OutWeights(const char *FileName, const MSA &msa)
- {
- FILE *f = fopen(FileName, "w");
- if (0 == f)
- Quit("Cannot open '%s'", FileName);
- const unsigned uSeqCount = msa.GetSeqCount();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- const char *Id = msa.GetSeqName(uSeqIndex);
- const WEIGHT w = msa.GetSeqWeight(uSeqIndex);
- fprintf(f, "%s\t%.3g\n", Id, w);
- }
- fclose(f);
- }
Deleted: trunk/packages/muscle/trunk/pam200mafft.cpp
===================================================================
--- trunk/packages/muscle/trunk/pam200mafft.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/pam200mafft.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,32 +0,0 @@
-#include "muscle.h"
-
-// Adjusted PAM200 scoring matrix as used by default in MAFFT.
-// Katoh, Misawa, Kuma and Miyata (2002), NAR 30(14), 3059-3066.
-
-static float PAM200[23][23] =
- {
-// A C D E F G H I K L M N P Q R S T V W Y B Z X
- 408, 20, 54, 52, -182, 179, -68, 109, -35, -47, 39, 106, 206, -14, -12, 257, 293, 191, -306, -219, 0, 0, 0, // A
- 20, 1190, -228, -295, 94, 6, 63, -131, -184, -176, -112, -29, -122, -195, 49, 185, 13, -49, 199, 333, 0, 0, 0, // C
- 54, -228, 645, 516, -399, 168, 98, -225, 75, -341, -235, 352, -149, 142, -44, 65, 7, -147, -418, -128, 0, 0, 0, // D
- 52, -295, 516, 630, -460, 145, 45, -225, 195, -307, -222, 186, -121, 299, 54, -10, -36, -130, -366, -285, 0, 0, 0, // E
- -182, 94, -399, -460, 908, -387, 82, 100, -423, 340, 87, -216, -160, -274, -307, -31, -153, 51, 19, 604, 0, 0, 0, // F
- 179, 6, 168, 145, -387, 682, -94, -196, -14, -304, -226, 99, -57, -48, 117, 175, 41, -73, -38, -329, 0, 0, 0, // G
- -68, 63, 98, 45, 82, -94, 786, -185, 164, -72, -132, 258, 86, 388, 277, 55, -15, -197, -181, 488, 0, 0, 0, // H
- 109, -131, -225, -225, 100, -196, -185, 574, -204, 308, 411, -94, -95, -202, -188, 1, 182, 489, -254, -133, 0, 0, 0, // I
- -35, -184, 75, 195, -423, -14, 164, -204, 652, -229, -98, 206, -66, 335, 486, 22, 39, -207, -196, -244, 0, 0, 0, // K
- -47, -176, -341, -307, 340, -304, -72, 308, -229, 611, 389, -203, 73, -66, -150, -49, -21, 259, -46, -9, 0, 0, 0, // L
- 39, -112, -235, -222, 87, -226, -132, 411, -98, 389, 776, -111, -78, -104, -109, -29, 149, 351, -209, -162, 0, 0, 0, // M
- 106, -29, 352, 186, -216, 99, 258, -94, 206, -203, -111, 536, -1, 108, 93, 260, 188, -98, -359, 12, 0, 0, 0, // N
- 206, -122, -149, -121, -160, -57, 86, -95, -66, 73, -78, -1, 756, 142, 25, 241, 159, -55, -353, -206, 0, 0, 0, // P
- -14, -195, 142, 299, -274, -48, 388, -202, 335, -66, -104, 108, 142, 655, 321, 7, -15, -175, -223, -53, 0, 0, 0, // Q
- -12, 49, -44, 54, -307, 117, 277, -188, 486, -150, -109, 93, 25, 321, 626, 48, 16, -181, 124, -113, 0, 0, 0, // R
- 257, 185, 65, -10, -31, 175, 55, 1, 22, -49, -29, 260, 241, 7, 48, 373, 279, 28, -193, -35, 0, 0, 0, // S
- 293, 13, 7, -36, -153, 41, -15, 182, 39, -21, 149, 188, 159, -15, 16, 279, 442, 163, -323, -170, 0, 0, 0, // T
- 191, -49, -147, -130, 51, -73, -197, 489, -207, 259, 351, -98, -55, -175, -181, 28, 163, 525, -225, -177, 0, 0, 0, // V
- -306, 199, -418, -366, 19, -38, -181, -254, -196, -46, -209, -359, -353, -223, 124, -193, -323, -225, 1495, 83, 0, 0, 0, // W
- -219, 333, -128, -285, 604, -329, 488, -133, -244, -9, -162, 12, -206, -53, -113, -35, -170, -177, 83, 999, 0, 0, 0, // Y
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Z
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // X
- };
Deleted: trunk/packages/muscle/trunk/params.cpp
===================================================================
--- trunk/packages/muscle/trunk/params.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/params.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,608 +0,0 @@
-#include "muscle.h"
-#include "objscore.h"
-#include "profile.h"
-#include "enumopts.h"
-
-const double DEFAULT_MAX_MB_FRACT = 0.8;
-
-SCORE g_scoreCenter = 0;
-SCORE g_scoreGapExtend = 0;
-SCORE g_scoreGapOpen2 = MINUS_INFINITY;
-SCORE g_scoreGapExtend2 = MINUS_INFINITY;
-SCORE g_scoreGapAmbig = 0;
-SCORE g_scoreAmbigFactor = 0;
-
-extern SCOREMATRIX VTML_LA;
-extern SCOREMATRIX PAM200;
-extern SCOREMATRIX PAM200NoCenter;
-extern SCOREMATRIX VTML_SP;
-extern SCOREMATRIX VTML_SPNoCenter;
-extern SCOREMATRIX NUC_SP;
-
-PTR_SCOREMATRIX g_ptrScoreMatrix;
-
-const char *g_pstrInFileName = "-";
-const char *g_pstrOutFileName = "-";
-const char *g_pstrFASTAOutFileName = 0;
-const char *g_pstrMSFOutFileName = 0;
-const char *g_pstrClwOutFileName = 0;
-const char *g_pstrClwStrictOutFileName = 0;
-const char *g_pstrHTMLOutFileName = 0;
-const char *g_pstrPHYIOutFileName = 0;
-const char *g_pstrPHYSOutFileName = 0;
-
-const char *g_pstrFileName1 = 0;
-const char *g_pstrFileName2 = 0;
-
-const char *g_pstrSPFileName = 0;
-const char *g_pstrMatrixFileName = 0;
-
-const char *g_pstrUseTreeFileName = 0;
-bool g_bUseTreeNoWarn = false;
-
-const char *g_pstrComputeWeightsFileName;
-const char *g_pstrScoreFileName;
-
-const char *g_pstrProf1FileName = 0;
-const char *g_pstrProf2FileName = 0;
-
-unsigned g_uSmoothWindowLength = 7;
-unsigned g_uAnchorSpacing = 32;
-unsigned g_uMaxTreeRefineIters = 1;
-
-unsigned g_uRefineWindow = 200;
-unsigned g_uWindowFrom = 0;
-unsigned g_uWindowTo = 0;
-unsigned g_uSaveWindow = uInsane;
-unsigned g_uWindowOffset = 0;
-
-unsigned g_uMaxSubFamCount = 5;
-
-unsigned g_uHydrophobicRunLength = 5;
-float g_dHydroFactor = (float) 1.2;
-
-unsigned g_uMinDiagLength = 24; // TODO alpha -- should depend on alphabet?
-unsigned g_uMaxDiagBreak = 1;
-unsigned g_uDiagMargin = 5;
-
-float g_dSUEFF = (float) 0.1;
-
-bool g_bPrecompiledCenter = true;
-bool g_bNormalizeCounts = false;
-bool g_bDiags1 = false;
-bool g_bDiags2 = false;
-bool g_bAnchors = true;
-bool g_bQuiet = false;
-bool g_bVerbose = false;
-bool g_bRefine = false;
-bool g_bRefineW = false;
-bool g_bProfDB = false;
-bool g_bLow = false;
-bool g_bSW = false;
-bool g_bCluster = false;
-bool g_bProfile = false;
-bool g_bPPScore = false;
-bool g_bBrenner = false;
-bool g_bDimer = false;
-bool g_bVersion = false;
-bool g_bStable = false;
-bool g_bFASTA = false;
-bool g_bPAS = false;
-
-#if DEBUG
-bool g_bCatchExceptions = false;
-#else
-bool g_bCatchExceptions = true;
-#endif
-
-bool g_bMSF = false;
-bool g_bAln = false;
-bool g_bClwStrict = false;
-bool g_bHTML = false;
-bool g_bPHYI = false;
-bool g_bPHYS = false;
-
-unsigned g_uMaxIters = 8;
-unsigned long g_ulMaxSecs = 0;
-unsigned g_uMaxMB = 500;
-
-PPSCORE g_PPScore = PPSCORE_LE;
-OBJSCORE g_ObjScore = OBJSCORE_SPM;
-
-SEQWEIGHT g_SeqWeight1 = SEQWEIGHT_ClustalW;
-SEQWEIGHT g_SeqWeight2 = SEQWEIGHT_ClustalW;
-
-DISTANCE g_Distance1 = DISTANCE_Kmer6_6;
-DISTANCE g_Distance2 = DISTANCE_PctIdKimura;
-
-CLUSTER g_Cluster1 = CLUSTER_UPGMB;
-CLUSTER g_Cluster2 = CLUSTER_UPGMB;
-
-ROOT g_Root1 = ROOT_Pseudo;
-ROOT g_Root2 = ROOT_Pseudo;
-
-bool g_bDiags;
-
-SEQTYPE g_SeqType = SEQTYPE_Auto;
-
-TERMGAPS g_TermGaps = TERMGAPS_Half;
-
-//------------------------------------------------------
-// These parameters depending on the chosen prof-prof
-// score (g_PPScore), initialized to "Undefined".
-float g_dSmoothScoreCeil = fInsane;
-float g_dMinBestColScore = fInsane;
-float g_dMinSmoothScore = fInsane;
-SCORE g_scoreGapOpen = fInsane;
-//------------------------------------------------------
-
-static unsigned atou(const char *s)
- {
- return (unsigned) atoi(s);
- }
-
-const char *MaxSecsToStr()
- {
- if (0 == g_ulMaxSecs)
- return "(No limit)";
- return SecsToStr(g_ulMaxSecs);
- }
-
-void ListParams()
- {
- Log("\n");
- Log("%s\n", MUSCLE_LONG_VERSION);
- Log("http://www.drive5.com/muscle\n");
- Log("\n");
- Log("Profile-profile score %s\n", PPSCOREToStr(g_PPScore));
- Log("Max iterations %u\n", g_uMaxIters);
- Log("Max trees %u\n", g_uMaxTreeRefineIters);
- Log("Max time %s\n", MaxSecsToStr());
- Log("Max MB %u\n", g_uMaxMB);
- Log("Gap open %g\n", g_scoreGapOpen);
- Log("Gap extend (dimer) %g\n", g_scoreGapExtend);
- Log("Gap ambig factor %g\n", g_scoreAmbigFactor);
- Log("Gap ambig penalty %g\n", g_scoreGapAmbig);
- Log("Center (LE) %g\n", g_scoreCenter);
- Log("Term gaps %s\n", TERMGAPSToStr(g_TermGaps));
-
- Log("Smooth window length %u\n", g_uSmoothWindowLength);
- Log("Refine window length %u\n", g_uRefineWindow);
- Log("Min anchor spacing %u\n", g_uAnchorSpacing);
- Log("Min diag length (lambda) %u\n", g_uMinDiagLength);
- Log("Diag margin (mu) %u\n", g_uDiagMargin);
- Log("Min diag break %u\n", g_uMaxDiagBreak);
- Log("Hydrophobic window %u\n", g_uHydrophobicRunLength);
-
- Log("Hydrophobic gap factor %g\n", g_dHydroFactor);
- Log("Smooth score ceiling %g\n", g_dSmoothScoreCeil);
- Log("Min best col score %g\n", g_dMinBestColScore);
- Log("Min anchor score %g\n", g_dMinSmoothScore);
- Log("SUEFF %g\n", g_dSUEFF);
-
- Log("Brenner root MSA %s\n", BoolToStr(g_bBrenner));
- Log("Normalize counts %s\n", BoolToStr(g_bNormalizeCounts));
- Log("Diagonals (1) %s\n", BoolToStr(g_bDiags1));
- Log("Diagonals (2) %s\n", BoolToStr(g_bDiags2));
- Log("Anchors %s\n", BoolToStr(g_bAnchors));
- Log("MSF output format %s\n", BoolToStr(g_bMSF));
- Log("Phylip interleaved %s\n", BoolToStr(g_bPHYI));
- Log("Phylip sequential %s\n", BoolToStr(g_bPHYS));
- Log("ClustalW output format %s\n", BoolToStr(g_bAln));
- Log("Catch exceptions %s\n", BoolToStr(g_bCatchExceptions));
- Log("Quiet %s\n", BoolToStr(g_bQuiet));
- Log("Refine %s\n", BoolToStr(g_bRefine));
- Log("ProdfDB %s\n", BoolToStr(g_bProfDB));
- Log("Low complexity profiles %s\n", BoolToStr(g_bLow));
-
- Log("Objective score %s\n", OBJSCOREToStr(g_ObjScore));
-
- Log("Distance method (1) %s\n", DISTANCEToStr(g_Distance1));
- Log("Clustering method (1) %s\n", CLUSTERToStr(g_Cluster1));
- Log("Root method (1) %s\n", ROOTToStr(g_Root1));
- Log("Sequence weighting (1) %s\n", SEQWEIGHTToStr(g_SeqWeight1));
-
- Log("Distance method (2) %s\n", DISTANCEToStr(g_Distance2));
- Log("Clustering method (2) %s\n", CLUSTERToStr(g_Cluster2));
- Log("Root method (2) %s\n", ROOTToStr(g_Root2));
- Log("Sequence weighting (2) %s\n", SEQWEIGHTToStr(g_SeqWeight2));
-
- Log("\n");
- }
-
-static void SetDefaultsLE()
- {
- g_ptrScoreMatrix = &VTML_LA;
-
- //g_scoreGapOpen = (SCORE) -3.00;
- //g_scoreCenter = (SCORE) -0.55;
- g_scoreGapOpen = (SCORE) -2.9;
- g_scoreCenter = (SCORE) -0.52;
-
- g_bNormalizeCounts = true;
-
- //g_dSmoothScoreCeil = 5.0;
- //g_dMinBestColScore = 4.0;
- //g_dMinSmoothScore = 2.0;
- g_dSmoothScoreCeil = 3.0;
- g_dMinBestColScore = 2.0;
- g_dMinSmoothScore = 1.0;
-
- g_Distance1 = DISTANCE_Kmer6_6;
- g_Distance2 = DISTANCE_PctIdKimura;
- }
-
-static void SetDefaultsSP()
- {
- g_ptrScoreMatrix = &PAM200;
-
- g_scoreGapOpen = -1439;
- g_scoreCenter = 0.0; // center pre-added into score mx
-
- g_bNormalizeCounts = false;
-
- g_dSmoothScoreCeil = 200.0;
- g_dMinBestColScore = 300.0;
- g_dMinSmoothScore = 125.0;
-
- g_Distance1 = DISTANCE_Kmer6_6;
- g_Distance2 = DISTANCE_PctIdKimura;
- }
-
-static void SetDefaultsSV()
- {
- g_ptrScoreMatrix = &VTML_SP;
-
- g_scoreGapOpen = -300;
- g_scoreCenter = 0.0; // center pre-added into score mx
-
- g_bNormalizeCounts = false;
-
- g_dSmoothScoreCeil = 90.0;
- g_dMinBestColScore = 130.0;
- g_dMinSmoothScore = 40.0;
-
- g_Distance1 = DISTANCE_Kmer6_6;
- g_Distance2 = DISTANCE_PctIdKimura;
- }
-
-//static void SetDefaultsSPN()
-// {
-// g_ptrScoreMatrix = &NUC_SP;
-//
-// g_scoreGapOpen = -400;
-// g_scoreCenter = 0.0; // center pre-added into score mx
-//
-// g_bNormalizeCounts = false;
-//
-// g_dSmoothScoreCeil = 999.0; // disable
-// g_dMinBestColScore = 90;
-// g_dMinSmoothScore = 90;
-//
-// g_Distance1 = DISTANCE_Kmer4_6;
-// g_Distance2 = DISTANCE_PctIdKimura;
-// }
-
-static void SetDefaultsSPN_DNA()
- {
- g_ptrScoreMatrix = &NUC_SP;
-
- g_scoreGapOpen = -400;
- g_scoreCenter = 0.0; // center pre-added into score mx
- g_scoreGapExtend = 0.0;
-
- g_bNormalizeCounts = false;
-
- g_dSmoothScoreCeil = 999.0; // disable
- g_dMinBestColScore = 90;
- g_dMinSmoothScore = 90;
-
- g_Distance1 = DISTANCE_Kmer4_6;
- g_Distance2 = DISTANCE_PctIdKimura;
- }
-
-static void SetDefaultsSPN_RNA()
- {
- g_ptrScoreMatrix = &NUC_SP;
-
- g_scoreGapOpen = -420;
- g_scoreCenter = -300; // total center = NUC_EXTEND - 300
- g_scoreGapExtend = 0.0;
-
- g_bNormalizeCounts = false;
-
- g_dSmoothScoreCeil = 999.0; // disable
- g_dMinBestColScore = 90;
- g_dMinSmoothScore = 90;
-
- g_Distance1 = DISTANCE_Kmer4_6;
- g_Distance2 = DISTANCE_PctIdKimura;
- }
-
-static void FlagParam(const char *OptName, bool *ptrParam, bool bValueIfFlagSet)
- {
- bool bIsSet = FlagOpt(OptName);
- if (bIsSet)
- *ptrParam = bValueIfFlagSet;
- }
-
-static void StrParam(const char *OptName, const char **ptrptrParam)
- {
- const char *opt = ValueOpt(OptName);
- if (0 != opt)
- *ptrptrParam = opt;
- }
-
-static void FloatParam(const char *OptName, float *ptrParam)
- {
- const char *opt = ValueOpt(OptName);
- if (0 != opt)
- *ptrParam = (float) atof(opt);
- }
-
-static void UintParam(const char *OptName, unsigned *ptrParam)
- {
- const char *opt = ValueOpt(OptName);
- if (0 != opt)
- *ptrParam = atou(opt);
- }
-
-static void EnumParam(const char *OptName, EnumOpt *Opts, int *Param)
- {
- const char *Value = ValueOpt(OptName);
- if (0 == Value)
- return;
-
- for (;;)
- {
- if (0 == Opts->pstrOpt)
- Quit("Invalid parameter -%s %s", OptName, Value);
- if (0 == stricmp(Value, Opts->pstrOpt))
- {
- *Param = Opts->iValue;
- return;
- }
- ++Opts;
- }
- }
-
-static void SetPPDefaultParams()
- {
- switch (g_PPScore)
- {
- case PPSCORE_SP:
- SetDefaultsSP();
- break;
-
- case PPSCORE_LE:
- SetDefaultsLE();
- break;
-
- case PPSCORE_SV:
- SetDefaultsSV();
- break;
-
- case PPSCORE_SPN:
- switch (g_Alpha)
- {
- case ALPHA_DNA:
- SetDefaultsSPN_DNA();
- break;
- case ALPHA_RNA:
- SetDefaultsSPN_RNA();
- break;
- default:
- Quit("Invalid alpha %d", g_Alpha);
- }
- break;
-
- default:
- Quit("Invalid g_PPScore");
- }
- }
-
-static void SetPPCommandLineParams()
- {
- FloatParam("GapOpen", &g_scoreGapOpen);
- FloatParam("GapOpen2", &g_scoreGapOpen2);
- FloatParam("GapExtend", &g_scoreGapExtend);
- FloatParam("GapExtend2", &g_scoreGapExtend2);
- FloatParam("GapAmbig", &g_scoreAmbigFactor);
- FloatParam("Center", &g_scoreCenter);
- FloatParam("SmoothScoreCeil", &g_dSmoothScoreCeil);
- FloatParam("MinBestColScore", &g_dMinBestColScore);
- FloatParam("MinSmoothScore", &g_dMinSmoothScore);
-
- EnumParam("Distance1", DISTANCE_Opts, (int *) &g_Distance1);
- EnumParam("Distance2", DISTANCE_Opts, (int *) &g_Distance2);
- }
-
-void SetPPScore(bool bRespectFlagOpts)
- {
- if (bRespectFlagOpts)
- {
- if (FlagOpt("SP"))
- g_PPScore = PPSCORE_SP;
- else if (FlagOpt("LE"))
- g_PPScore = PPSCORE_LE;
- else if (FlagOpt("SV"))
- g_PPScore = PPSCORE_SV;
- else if (FlagOpt("SPN"))
- g_PPScore = PPSCORE_SPN;
- }
-
- switch (g_PPScore)
- {
- case PPSCORE_LE:
- case PPSCORE_SP:
- case PPSCORE_SV:
- if (ALPHA_RNA == g_Alpha || ALPHA_DNA == g_Alpha)
- g_PPScore = PPSCORE_SPN;
- break;
- case PPSCORE_SPN:
- if (ALPHA_Amino == g_Alpha)
- g_PPScore = PPSCORE_LE;
- break;
- }
-
- SetPPDefaultParams();
- SetPPCommandLineParams();
-
- if (g_bVerbose)
- ListParams();
- }
-
-void SetPPScore(PPSCORE p)
- {
- g_PPScore = p;
- SetPPScore(true);
- }
-
-static void SetMaxSecs()
- {
- float fMaxHours = 0.0;
- FloatParam("MaxHours", &fMaxHours);
- if (0.0 == fMaxHours)
- return;
- g_ulMaxSecs = (unsigned long) (fMaxHours*60*60);
- }
-
-static bool CanDoLowComplexity()
- {
- if (g_SeqWeight1 != SEQWEIGHT_ClustalW)
- return false;
- if (1 == g_uMaxIters)
- return true;
- return g_SeqWeight2 == SEQWEIGHT_ClustalW;
- }
-
-bool MissingCommand()
- {
- if (strcmp(g_pstrInFileName, "-"))
- return false;
- if (0 != g_pstrFileName1)
- return false;
- if (0 != g_pstrSPFileName)
- return false;
- return true;
- }
-
-void SetParams()
- {
- SetMaxSecs();
-
- StrParam("in", &g_pstrInFileName);
- StrParam("out", &g_pstrOutFileName);
-
- StrParam("FASTAOut", &g_pstrFASTAOutFileName);
- StrParam("ClwOut", &g_pstrClwOutFileName);
- StrParam("ClwStrictOut", &g_pstrClwStrictOutFileName);
- StrParam("HTMLOut", &g_pstrHTMLOutFileName);
- StrParam("PHYIOut", &g_pstrPHYIOutFileName);
- StrParam("PHYSOut", &g_pstrPHYSOutFileName);
- StrParam("MSFOut", &g_pstrMSFOutFileName);
-
- StrParam("in1", &g_pstrFileName1);
- StrParam("in2", &g_pstrFileName2);
-
- StrParam("Matrix", &g_pstrMatrixFileName);
- StrParam("SPScore", &g_pstrSPFileName);
-
- StrParam("UseTree_NoWarn", &g_pstrUseTreeFileName);
- if (0 != g_pstrUseTreeFileName)
- g_bUseTreeNoWarn = true;
-
- StrParam("UseTree", &g_pstrUseTreeFileName);
- StrParam("ComputeWeights", &g_pstrComputeWeightsFileName);
- StrParam("ScoreFile", &g_pstrScoreFileName);
-
- FlagParam("Core", &g_bCatchExceptions, false);
- FlagParam("NoCore", &g_bCatchExceptions, true);
-
- FlagParam("Diags1", &g_bDiags1, true);
- FlagParam("Diags2", &g_bDiags2, true);
-
- bool Diags = false;
- FlagParam("Diags", &Diags, true);
- if (Diags)
- {
- g_bDiags1 = true;
- g_bDiags2 = true;
- }
-
- FlagParam("Anchors", &g_bAnchors, true);
- FlagParam("NoAnchors", &g_bAnchors, false);
-
- FlagParam("Quiet", &g_bQuiet, true);
- FlagParam("Verbose", &g_bVerbose, true);
- FlagParam("Version", &g_bVersion, true);
- FlagParam("Stable", &g_bStable, true);
- FlagParam("Group", &g_bStable, false);
- FlagParam("Refine", &g_bRefine, true);
- FlagParam("RefineW", &g_bRefineW, true);
- FlagParam("ProfDB", &g_bProfDB, true);
- FlagParam("SW", &g_bSW, true);
- FlagParam("Cluster", &g_bCluster, true);
- FlagParam("Profile", &g_bProfile, true);
- FlagParam("PPScore", &g_bPPScore, true);
- FlagParam("Brenner", &g_bBrenner, true);
- FlagParam("Dimer", &g_bDimer, true);
-
- FlagParam("MSF", &g_bMSF, true);
- FlagParam("PHYI", &g_bPHYI, true);
- FlagParam("PHYS", &g_bPHYS, true);
- FlagParam("clw", &g_bAln, true);
- FlagParam("HTML", &g_bHTML, true);
- FlagParam("FASTA", &g_bFASTA, true);
- FlagParam("PAS", &g_bPAS, true);
-
- bool b = false;
- FlagParam("clwstrict", &b, true);
- if (b)
- {
- g_bAln = true;
- g_bClwStrict = true;
- }
-
- UintParam("MaxIters", &g_uMaxIters);
- UintParam("MaxTrees", &g_uMaxTreeRefineIters);
- UintParam("SmoothWindow", &g_uSmoothWindowLength);
- UintParam("RefineWindow", &g_uRefineWindow);
- UintParam("FromWindow", &g_uWindowFrom);
- UintParam("ToWindow", &g_uWindowTo);
- UintParam("SaveWindow", &g_uSaveWindow);
- UintParam("WindowOffset", &g_uWindowOffset);
- UintParam("AnchorSpacing", &g_uAnchorSpacing);
- UintParam("DiagLength", &g_uMinDiagLength);
- UintParam("DiagMargin", &g_uDiagMargin);
- UintParam("DiagBreak", &g_uMaxDiagBreak);
- UintParam("Hydro", &g_uHydrophobicRunLength);
- UintParam("MaxSubFam", &g_uMaxSubFamCount);
-
- FloatParam("SUEFF", &g_dSUEFF);
- FloatParam("HydroFactor", &g_dHydroFactor);
-
- EnumParam("ObjScore", OBJSCORE_Opts, (int *) &g_ObjScore);
- EnumParam("TermGaps", TERMGAPS_Opts, (int *) &g_TermGaps);
-
- EnumParam("Weight1", SEQWEIGHT_Opts, (int *) &g_SeqWeight1);
- EnumParam("Weight2", SEQWEIGHT_Opts, (int *) &g_SeqWeight2);
-
- EnumParam("Cluster1", CLUSTER_Opts, (int *) &g_Cluster1);
- EnumParam("Cluster2", CLUSTER_Opts, (int *) &g_Cluster2);
-
- EnumParam("Root1", ROOT_Opts, (int *) &g_Root1);
- EnumParam("Root2", ROOT_Opts, (int *) &g_Root2);
-
- EnumParam("SeqType", SEQTYPE_Opts, (int *) &g_SeqType);
-
- g_scoreGapAmbig = g_scoreGapOpen*g_scoreAmbigFactor;
- g_bLow = CanDoLowComplexity();
-
- if (g_bDimer)
- g_bPrecompiledCenter = false;
-
- UintParam("MaxMB", &g_uMaxMB);
- if (0 == ValueOpt("MaxMB"))
- g_uMaxMB = (unsigned) (GetRAMSizeMB()*DEFAULT_MAX_MB_FRACT);
- }
Deleted: trunk/packages/muscle/trunk/params.h
===================================================================
--- trunk/packages/muscle/trunk/params.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/params.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,114 +0,0 @@
-#ifndef params_h
-#define params_h
-
-extern const char *g_pstrInFileName;
-extern const char *g_pstrOutFileName;
-
-extern const char *g_pstrFASTAOutFileName;
-extern const char *g_pstrMSFOutFileName;
-extern const char *g_pstrClwOutFileName;
-extern const char *g_pstrClwStrictOutFileName;
-extern const char *g_pstrHTMLOutFileName;
-extern const char *g_pstrPHYIOutFileName;
-extern const char *g_pstrPHYSOutFileName;
-
-extern const char *g_pstrFileName1;
-extern const char *g_pstrFileName2;
-
-extern const char *g_pstrSPFileName;
-extern const char *g_pstrMatrixFileName;
-
-extern const char *g_pstrUseTreeFileName;
-extern bool g_bUseTreeNoWarn;
-
-extern const char *g_pstrComputeWeightsFileName;
-extern const char *g_pstrScoreFileName;
-
-extern SCORE g_scoreGapOpen;
-extern SCORE g_scoreCenter;
-extern SCORE g_scoreGapExtend;
-extern SCORE g_scoreGapAmbig;
-
-#if DOUBLE_AFFINE
-extern SCORE g_scoreGapOpen2;
-extern SCORE g_scoreGapExtend2;
-#endif
-
-extern unsigned g_uSmoothWindowLength;
-extern unsigned g_uAnchorSpacing;
-extern unsigned g_uMaxTreeRefineIters;
-
-extern unsigned g_uMinDiagLength;
-extern unsigned g_uMaxDiagBreak;
-extern unsigned g_uDiagMargin;
-
-extern unsigned g_uRefineWindow;
-extern unsigned g_uWindowFrom;
-extern unsigned g_uWindowTo;
-extern unsigned g_uSaveWindow;
-extern unsigned g_uWindowOffset;
-
-extern unsigned g_uMaxSubFamCount;
-
-extern unsigned g_uHydrophobicRunLength;
-extern float g_dHydroFactor;
-
-extern float g_dSmoothScoreCeil;
-extern float g_dMinBestColScore;
-extern float g_dMinSmoothScore;
-extern float g_dSUEFF;
-
-extern bool g_bPrecompiledCenter;
-extern bool g_bNormalizeCounts;
-extern bool g_bDiags1;
-extern bool g_bDiags2;
-extern bool g_bDiags;
-extern bool g_bAnchors;
-extern bool g_bCatchExceptions;
-
-extern bool g_bMSF;
-extern bool g_bAln;
-extern bool g_bClwStrict;
-extern bool g_bHTML;
-extern bool g_bPHYI;
-extern bool g_bPHYS;
-
-extern bool g_bQuiet;
-extern bool g_bVerbose;
-extern bool g_bRefine;
-extern bool g_bRefineW;
-extern bool g_bRefineX;
-extern bool g_bLow;
-extern bool g_bSW;
-extern bool g_bCluster;
-extern bool g_bProfile;
-extern bool g_bProfDB;
-extern bool g_bPPScore;
-extern bool g_bBrenner;
-extern bool g_bDimer;
-extern bool g_bVersion;
-extern bool g_bStable;
-extern bool g_bFASTA;
-extern bool g_bPAS;
-
-extern PPSCORE g_PPScore;
-extern OBJSCORE g_ObjScore;
-
-extern DISTANCE g_Distance1;
-extern CLUSTER g_Cluster1;
-extern ROOT g_Root1;
-extern SEQWEIGHT g_SeqWeight1;
-
-extern DISTANCE g_Distance2;
-extern CLUSTER g_Cluster2;
-extern ROOT g_Root2;
-extern SEQWEIGHT g_SeqWeight2;
-
-extern unsigned g_uMaxIters;
-extern unsigned long g_ulMaxSecs;
-extern unsigned g_uMaxMB;
-
-extern SEQTYPE g_SeqType;
-extern TERMGAPS g_TermGaps;
-
-#endif // params_h
Deleted: trunk/packages/muscle/trunk/phy.cpp
===================================================================
--- trunk/packages/muscle/trunk/phy.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/phy.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,1069 +0,0 @@
-#include "muscle.h"
-#include "tree.h"
-#include <math.h>
-
-#define TRACE 0
-
-/***
-Node has 0 to 3 neighbors:
- 0 neighbors: singleton root
- 1 neighbor: leaf, neighbor is parent
- 2 neigbors: non-singleton root
- 3 neighbors: internal node (other than root)
-
-Minimal rooted tree is single node.
-Minimal unrooted tree is single edge.
-Leaf node always has nulls in neighbors 2 and 3, neighbor 1 is parent.
-When tree is rooted, neighbor 1=parent, 2=left, 3=right.
-***/
-
-void Tree::AssertAreNeighbors(unsigned uNodeIndex1, unsigned uNodeIndex2) const
- {
- if (uNodeIndex1 >= m_uNodeCount || uNodeIndex2 >= m_uNodeCount)
- Quit("AssertAreNeighbors(%u,%u), are %u nodes",
- uNodeIndex1, uNodeIndex2, m_uNodeCount);
-
- if (m_uNeighbor1[uNodeIndex1] != uNodeIndex2 &&
- m_uNeighbor2[uNodeIndex1] != uNodeIndex2 &&
- m_uNeighbor3[uNodeIndex1] != uNodeIndex2)
- {
- LogMe();
- Quit("AssertAreNeighbors(%u,%u) failed", uNodeIndex1, uNodeIndex2);
- }
-
- if (m_uNeighbor1[uNodeIndex2] != uNodeIndex1 &&
- m_uNeighbor2[uNodeIndex2] != uNodeIndex1 &&
- m_uNeighbor3[uNodeIndex2] != uNodeIndex1)
- {
- LogMe();
- Quit("AssertAreNeighbors(%u,%u) failed", uNodeIndex1, uNodeIndex2);
- }
-
- if (HasEdgeLength(uNodeIndex1, uNodeIndex2) &&
- GetEdgeLength(uNodeIndex1, uNodeIndex2) !=
- GetEdgeLength(uNodeIndex2, uNodeIndex1))
- {
- LogMe();
- Quit("Tree::AssertAreNeighbors, Edge length disagrees %u, %u",
- uNodeIndex1, uNodeIndex2);
- }
- }
-
-void Tree::ValidateNode(unsigned uNodeIndex) const
- {
- if (uNodeIndex >= m_uNodeCount)
- Quit("ValidateNode(%u), %u nodes", uNodeIndex, m_uNodeCount);
-
- const unsigned uNeighborCount = GetNeighborCount(uNodeIndex);
-
- if (2 == uNeighborCount)
- {
- if (!m_bRooted)
- {
- LogMe();
- Quit("Tree::ValidateNode: Node %u has two neighbors, tree is not rooted",
- uNodeIndex);
- }
- if (uNodeIndex != m_uRootNodeIndex)
- {
- LogMe();
- Quit("Tree::ValidateNode: Node %u has two neighbors, but not root node=%u",
- uNodeIndex, m_uRootNodeIndex);
- }
- }
-
- const unsigned n1 = m_uNeighbor1[uNodeIndex];
- const unsigned n2 = m_uNeighbor2[uNodeIndex];
- const unsigned n3 = m_uNeighbor3[uNodeIndex];
-
- if (NULL_NEIGHBOR == n2 && NULL_NEIGHBOR != n3)
- {
- LogMe();
- Quit("Tree::ValidateNode, n2=null, n3!=null", uNodeIndex);
- }
- if (NULL_NEIGHBOR == n3 && NULL_NEIGHBOR != n2)
- {
- LogMe();
- Quit("Tree::ValidateNode, n3=null, n2!=null", uNodeIndex);
- }
-
- if (n1 != NULL_NEIGHBOR)
- AssertAreNeighbors(uNodeIndex, n1);
- if (n2 != NULL_NEIGHBOR)
- AssertAreNeighbors(uNodeIndex, n2);
- if (n3 != NULL_NEIGHBOR)
- AssertAreNeighbors(uNodeIndex, n3);
-
- if (n1 != NULL_NEIGHBOR && (n1 == n2 || n1 == n3))
- {
- LogMe();
- Quit("Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
- }
- if (n2 != NULL_NEIGHBOR && (n2 == n1 || n2 == n3))
- {
- LogMe();
- Quit("Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
- }
- if (n3 != NULL_NEIGHBOR && (n3 == n1 || n3 == n2))
- {
- LogMe();
- Quit("Tree::ValidateNode, duplicate neighbors in node %u", uNodeIndex);
- }
-
- if (IsRooted())
- {
- if (NULL_NEIGHBOR == GetParent(uNodeIndex))
- {
- if (uNodeIndex != m_uRootNodeIndex)
- {
- LogMe();
- Quit("Tree::ValiateNode(%u), no parent", uNodeIndex);
- }
- }
- else if (GetLeft(GetParent(uNodeIndex)) != uNodeIndex &&
- GetRight(GetParent(uNodeIndex)) != uNodeIndex)
- {
- LogMe();
- Quit("Tree::ValidateNode(%u), parent / child mismatch", uNodeIndex);
- }
- }
- }
-
-void Tree::Validate() const
- {
- for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
- ValidateNode(uNodeIndex);
- }
-
-bool Tree::IsEdge(unsigned uNodeIndex1, unsigned uNodeIndex2) const
- {
- assert(uNodeIndex1 < m_uNodeCount && uNodeIndex2 < m_uNodeCount);
-
- return m_uNeighbor1[uNodeIndex1] == uNodeIndex2 ||
- m_uNeighbor2[uNodeIndex1] == uNodeIndex2 ||
- m_uNeighbor3[uNodeIndex1] == uNodeIndex2;
- }
-
-double Tree::GetEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2) const
- {
- assert(uNodeIndex1 < m_uNodeCount && uNodeIndex2 < m_uNodeCount);
- assert(HasEdgeLength(uNodeIndex1, uNodeIndex2));
-
- if (m_uNeighbor1[uNodeIndex1] == uNodeIndex2)
- return m_dEdgeLength1[uNodeIndex1];
- else if (m_uNeighbor2[uNodeIndex1] == uNodeIndex2)
- return m_dEdgeLength2[uNodeIndex1];
- assert(m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
- return m_dEdgeLength3[uNodeIndex1];
- }
-
-void Tree::ExpandCache()
- {
- const unsigned uNodeCount = 100;
- unsigned uNewCacheCount = m_uCacheCount + uNodeCount;
- unsigned *uNewNeighbor1 = new unsigned[uNewCacheCount];
- unsigned *uNewNeighbor2 = new unsigned[uNewCacheCount];
- unsigned *uNewNeighbor3 = new unsigned[uNewCacheCount];
-
- unsigned *uNewIds = new unsigned[uNewCacheCount];
- memset(uNewIds, 0xff, uNewCacheCount*sizeof(unsigned));
-
- double *dNewEdgeLength1 = new double[uNewCacheCount];
- double *dNewEdgeLength2 = new double[uNewCacheCount];
- double *dNewEdgeLength3 = new double[uNewCacheCount];
- double *dNewHeight = new double[uNewCacheCount];
-
- bool *bNewHasEdgeLength1 = new bool[uNewCacheCount];
- bool *bNewHasEdgeLength2 = new bool[uNewCacheCount];
- bool *bNewHasEdgeLength3 = new bool[uNewCacheCount];
- bool *bNewHasHeight = new bool[uNewCacheCount];
-
- char **ptrNewName = new char *[uNewCacheCount];
- memset(ptrNewName, 0, uNewCacheCount*sizeof(char *));
-
- if (m_uCacheCount > 0)
- {
- const unsigned uUnsignedBytes = m_uCacheCount*sizeof(unsigned);
- memcpy(uNewNeighbor1, m_uNeighbor1, uUnsignedBytes);
- memcpy(uNewNeighbor2, m_uNeighbor2, uUnsignedBytes);
- memcpy(uNewNeighbor3, m_uNeighbor3, uUnsignedBytes);
-
- memcpy(uNewIds, m_Ids, uUnsignedBytes);
-
- const unsigned uEdgeBytes = m_uCacheCount*sizeof(double);
- memcpy(dNewEdgeLength1, m_dEdgeLength1, uEdgeBytes);
- memcpy(dNewEdgeLength2, m_dEdgeLength2, uEdgeBytes);
- memcpy(dNewEdgeLength3, m_dEdgeLength3, uEdgeBytes);
- memcpy(dNewHeight, m_dHeight, uEdgeBytes);
-
- const unsigned uBoolBytes = m_uCacheCount*sizeof(bool);
- memcpy(bNewHasEdgeLength1, m_bHasEdgeLength1, uBoolBytes);
- memcpy(bNewHasEdgeLength2, m_bHasEdgeLength1, uBoolBytes);
- memcpy(bNewHasEdgeLength3, m_bHasEdgeLength1, uBoolBytes);
- memcpy(bNewHasHeight, m_bHasHeight, uBoolBytes);
-
- const unsigned uNameBytes = m_uCacheCount*sizeof(char *);
- memcpy(ptrNewName, m_ptrName, uNameBytes);
-
- delete[] m_uNeighbor1;
- delete[] m_uNeighbor2;
- delete[] m_uNeighbor3;
-
- delete[] m_Ids;
-
- delete[] m_dEdgeLength1;
- delete[] m_dEdgeLength2;
- delete[] m_dEdgeLength3;
-
- delete[] m_bHasEdgeLength1;
- delete[] m_bHasEdgeLength2;
- delete[] m_bHasEdgeLength3;
- delete[] m_bHasHeight;
-
- delete[] m_ptrName;
- }
- m_uCacheCount = uNewCacheCount;
- m_uNeighbor1 = uNewNeighbor1;
- m_uNeighbor2 = uNewNeighbor2;
- m_uNeighbor3 = uNewNeighbor3;
- m_Ids = uNewIds;
- m_dEdgeLength1 = dNewEdgeLength1;
- m_dEdgeLength2 = dNewEdgeLength2;
- m_dEdgeLength3 = dNewEdgeLength3;
- m_dHeight = dNewHeight;
- m_bHasEdgeLength1 = bNewHasEdgeLength1;
- m_bHasEdgeLength2 = bNewHasEdgeLength2;
- m_bHasEdgeLength3 = bNewHasEdgeLength3;
- m_bHasHeight = bNewHasHeight;
- m_ptrName = ptrNewName;
- }
-
-// Creates tree with single node, no edges.
-// Root node always has index 0.
-void Tree::CreateRooted()
- {
- Clear();
- ExpandCache();
- m_uNodeCount = 1;
-
- m_uNeighbor1[0] = NULL_NEIGHBOR;
- m_uNeighbor2[0] = NULL_NEIGHBOR;
- m_uNeighbor3[0] = NULL_NEIGHBOR;
-
- m_bHasEdgeLength1[0] = false;
- m_bHasEdgeLength2[0] = false;
- m_bHasEdgeLength3[0] = false;
- m_bHasHeight[0] = false;
-
- m_uRootNodeIndex = 0;
- m_bRooted = true;
-
-#if DEBUG
- Validate();
-#endif
- }
-
-// Creates unrooted tree with single edge.
-// Nodes for that edge are always 0 and 1.
-void Tree::CreateUnrooted(double dEdgeLength)
- {
- Clear();
- ExpandCache();
-
- m_uNeighbor1[0] = 1;
- m_uNeighbor2[0] = NULL_NEIGHBOR;
- m_uNeighbor3[0] = NULL_NEIGHBOR;
-
- m_uNeighbor1[1] = 0;
- m_uNeighbor2[1] = NULL_NEIGHBOR;
- m_uNeighbor3[1] = NULL_NEIGHBOR;
-
- m_dEdgeLength1[0] = dEdgeLength;
- m_dEdgeLength1[1] = dEdgeLength;
-
- m_bHasEdgeLength1[0] = true;
- m_bHasEdgeLength1[1] = true;
-
- m_bRooted = false;
-
-#if DEBUG
- Validate();
-#endif
- }
-
-void Tree::SetLeafName(unsigned uNodeIndex, const char *ptrName)
- {
- assert(uNodeIndex < m_uNodeCount);
- assert(IsLeaf(uNodeIndex));
- free(m_ptrName[uNodeIndex]);
- m_ptrName[uNodeIndex] = strsave(ptrName);
- }
-
-void Tree::SetLeafId(unsigned uNodeIndex, unsigned uId)
- {
- assert(uNodeIndex < m_uNodeCount);
- assert(IsLeaf(uNodeIndex));
- m_Ids[uNodeIndex] = uId;
- }
-
-const char *Tree::GetLeafName(unsigned uNodeIndex) const
- {
- assert(uNodeIndex < m_uNodeCount);
- assert(IsLeaf(uNodeIndex));
- return m_ptrName[uNodeIndex];
- }
-
-unsigned Tree::GetLeafId(unsigned uNodeIndex) const
- {
- assert(uNodeIndex < m_uNodeCount);
- assert(IsLeaf(uNodeIndex));
- return m_Ids[uNodeIndex];
- }
-
-// Append a new branch.
-// This adds two new nodes and joins them to an existing leaf node.
-// Return value is k, new nodes have indexes k and k+1 respectively.
-unsigned Tree::AppendBranch(unsigned uExistingLeafIndex)
- {
- if (0 == m_uNodeCount)
- Quit("Tree::AppendBranch: tree has not been created");
-
-#if DEBUG
- assert(uExistingLeafIndex < m_uNodeCount);
- if (!IsLeaf(uExistingLeafIndex))
- {
- LogMe();
- Quit("AppendBranch(%u): not leaf", uExistingLeafIndex);
- }
-#endif
-
- if (m_uNodeCount >= m_uCacheCount - 2)
- ExpandCache();
-
- const unsigned uNewLeaf1 = m_uNodeCount;
- const unsigned uNewLeaf2 = m_uNodeCount + 1;
-
- m_uNodeCount += 2;
-
- assert(m_uNeighbor2[uExistingLeafIndex] == NULL_NEIGHBOR);
- assert(m_uNeighbor3[uExistingLeafIndex] == NULL_NEIGHBOR);
-
- m_uNeighbor2[uExistingLeafIndex] = uNewLeaf1;
- m_uNeighbor3[uExistingLeafIndex] = uNewLeaf2;
-
- m_uNeighbor1[uNewLeaf1] = uExistingLeafIndex;
- m_uNeighbor1[uNewLeaf2] = uExistingLeafIndex;
-
- m_uNeighbor2[uNewLeaf1] = NULL_NEIGHBOR;
- m_uNeighbor2[uNewLeaf2] = NULL_NEIGHBOR;
-
- m_uNeighbor3[uNewLeaf1] = NULL_NEIGHBOR;
- m_uNeighbor3[uNewLeaf2] = NULL_NEIGHBOR;
-
- m_dEdgeLength2[uExistingLeafIndex] = 0;
- m_dEdgeLength3[uExistingLeafIndex] = 0;
-
- m_dEdgeLength1[uNewLeaf1] = 0;
- m_dEdgeLength2[uNewLeaf1] = 0;
- m_dEdgeLength3[uNewLeaf1] = 0;
-
- m_dEdgeLength1[uNewLeaf2] = 0;
- m_dEdgeLength2[uNewLeaf2] = 0;
- m_dEdgeLength3[uNewLeaf2] = 0;
-
- m_bHasEdgeLength1[uNewLeaf1] = false;
- m_bHasEdgeLength2[uNewLeaf1] = false;
- m_bHasEdgeLength3[uNewLeaf1] = false;
-
- m_bHasEdgeLength1[uNewLeaf2] = false;
- m_bHasEdgeLength2[uNewLeaf2] = false;
- m_bHasEdgeLength3[uNewLeaf2] = false;
-
- m_bHasHeight[uNewLeaf1] = false;
- m_bHasHeight[uNewLeaf2] = false;
-
- return uNewLeaf1;
- }
-
-void Tree::LogMe() const
- {
- Log("Tree::LogMe %u nodes, ", m_uNodeCount);
-
- if (IsRooted())
- {
- Log("rooted.\n");
- Log("\n");
- Log("Index Parnt LengthP Left LengthL Right LengthR Name\n");
- Log("----- ----- ------- ---- ------- ----- ------- ----\n");
- }
- else
- {
- Log("unrooted.\n");
- Log("\n");
- Log("Index Nbr_1 Length1 Nbr_2 Length2 Nbr_3 Length3 Name\n");
- Log("----- ----- ------- ----- ------- ----- ------- ----\n");
- }
-
- for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
- {
- Log("%5u ", uNodeIndex);
- const unsigned n1 = m_uNeighbor1[uNodeIndex];
- const unsigned n2 = m_uNeighbor2[uNodeIndex];
- const unsigned n3 = m_uNeighbor3[uNodeIndex];
- if (NULL_NEIGHBOR != n1)
- Log("%5u %7.3g ", n1, m_dEdgeLength1[uNodeIndex]);
- else
- Log(" ");
- if (NULL_NEIGHBOR != n2)
- Log("%5u %7.3g ", n2, m_dEdgeLength2[uNodeIndex]);
- else
- Log(" ");
- if (NULL_NEIGHBOR != n3)
- Log("%5u %7.3g ", n3, m_dEdgeLength3[uNodeIndex]);
- else
- Log(" ");
- if (m_bRooted && uNodeIndex == m_uRootNodeIndex)
- Log("[ROOT] ");
- const char *ptrName = m_ptrName[uNodeIndex];
- if (ptrName != 0)
- Log("%s", ptrName);
- Log("\n");
- }
- }
-
-void Tree::SetEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2,
- double dLength)
- {
- assert(uNodeIndex1 < m_uNodeCount && uNodeIndex2 < m_uNodeCount);
- assert(IsEdge(uNodeIndex1, uNodeIndex2));
-
- if (m_uNeighbor1[uNodeIndex1] == uNodeIndex2)
- {
- m_dEdgeLength1[uNodeIndex1] = dLength;
- m_bHasEdgeLength1[uNodeIndex1] = true;
- }
- else if (m_uNeighbor2[uNodeIndex1] == uNodeIndex2)
- {
- m_dEdgeLength2[uNodeIndex1] = dLength;
- m_bHasEdgeLength2[uNodeIndex1] = true;
- }
- else
- {
- assert(m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
- m_dEdgeLength3[uNodeIndex1] = dLength;
- m_bHasEdgeLength3[uNodeIndex1] = true;
- }
-
- if (m_uNeighbor1[uNodeIndex2] == uNodeIndex1)
- {
- m_dEdgeLength1[uNodeIndex2] = dLength;
- m_bHasEdgeLength1[uNodeIndex2] = true;
- }
- else if (m_uNeighbor2[uNodeIndex2] == uNodeIndex1)
- {
- m_dEdgeLength2[uNodeIndex2] = dLength;
- m_bHasEdgeLength2[uNodeIndex2] = true;
- }
- else
- {
- assert(m_uNeighbor3[uNodeIndex2] == uNodeIndex1);
- m_dEdgeLength3[uNodeIndex2] = dLength;
- m_bHasEdgeLength3[uNodeIndex2] = true;
- }
- }
-
-unsigned Tree::UnrootFromFile()
- {
-#if TRACE
- Log("Before unroot:\n");
- LogMe();
-#endif
-
- if (!m_bRooted)
- Quit("Tree::Unroot, not rooted");
-
-// Convention: root node is always node zero
- assert(IsRoot(0));
- assert(NULL_NEIGHBOR == m_uNeighbor1[0]);
-
- const unsigned uThirdNode = m_uNodeCount++;
-
- m_uNeighbor1[0] = uThirdNode;
- m_uNeighbor1[uThirdNode] = 0;
-
- m_uNeighbor2[uThirdNode] = NULL_NEIGHBOR;
- m_uNeighbor3[uThirdNode] = NULL_NEIGHBOR;
-
- m_dEdgeLength1[0] = 0;
- m_dEdgeLength1[uThirdNode] = 0;
- m_bHasEdgeLength1[uThirdNode] = true;
-
- m_bRooted = false;
-
-#if TRACE
- Log("After unroot:\n");
- LogMe();
-#endif
-
- return uThirdNode;
- }
-
-// In an unrooted tree, equivalent of GetLeft/Right is
-// GetFirst/SecondNeighbor.
-// uNeighborIndex must be a known neighbor of uNodeIndex.
-// This is the way to find the other two neighbor nodes of
-// an internal node.
-// The labeling as "First" and "Second" neighbor is arbitrary.
-// Calling these functions on a leaf returns NULL_NEIGHBOR, as
-// for GetLeft/Right.
-unsigned Tree::GetFirstNeighbor(unsigned uNodeIndex, unsigned uNeighborIndex) const
- {
- assert(uNodeIndex < m_uNodeCount);
- assert(uNeighborIndex < m_uNodeCount);
- assert(IsEdge(uNodeIndex, uNeighborIndex));
-
- for (unsigned n = 0; n < 3; ++n)
- {
- unsigned uNeighbor = GetNeighbor(uNodeIndex, n);
- if (NULL_NEIGHBOR != uNeighbor && uNeighborIndex != uNeighbor)
- return uNeighbor;
- }
- return NULL_NEIGHBOR;
- }
-
-unsigned Tree::GetSecondNeighbor(unsigned uNodeIndex, unsigned uNeighborIndex) const
- {
- assert(uNodeIndex < m_uNodeCount);
- assert(uNeighborIndex < m_uNodeCount);
- assert(IsEdge(uNodeIndex, uNeighborIndex));
-
- bool bFoundOne = false;
- for (unsigned n = 0; n < 3; ++n)
- {
- unsigned uNeighbor = GetNeighbor(uNodeIndex, n);
- if (NULL_NEIGHBOR != uNeighbor && uNeighborIndex != uNeighbor)
- {
- if (bFoundOne)
- return uNeighbor;
- else
- bFoundOne = true;
- }
- }
- return NULL_NEIGHBOR;
- }
-
-// Compute the number of leaves in the sub-tree defined by an edge
-// in an unrooted tree. Conceptually, the tree is cut at this edge,
-// and uNodeIndex2 considered the root of the sub-tree.
-unsigned Tree::GetLeafCountUnrooted(unsigned uNodeIndex1, unsigned uNodeIndex2,
- double *ptrdTotalDistance) const
- {
- assert(!IsRooted());
-
- if (IsLeaf(uNodeIndex2))
- {
- *ptrdTotalDistance = GetEdgeLength(uNodeIndex1, uNodeIndex2);
- return 1;
- }
-
-// Recurse down the rooted sub-tree defined by cutting the edge
-// and considering uNodeIndex2 as the root.
- const unsigned uLeft = GetFirstNeighbor(uNodeIndex2, uNodeIndex1);
- const unsigned uRight = GetSecondNeighbor(uNodeIndex2, uNodeIndex1);
-
- double dLeftDistance;
- double dRightDistance;
-
- const unsigned uLeftCount = GetLeafCountUnrooted(uNodeIndex2, uLeft,
- &dLeftDistance);
- const unsigned uRightCount = GetLeafCountUnrooted(uNodeIndex2, uRight,
- &dRightDistance);
-
- *ptrdTotalDistance = dLeftDistance + dRightDistance;
- return uLeftCount + uRightCount;
- }
-
-void Tree::RootUnrootedTree(ROOT Method)
- {
- assert(!IsRooted());
-#if TRACE
- Log("Tree::RootUnrootedTree, before:");
- LogMe();
-#endif
-
- unsigned uNode1;
- unsigned uNode2;
- double dLength1;
- double dLength2;
- FindRoot(*this, &uNode1, &uNode2, &dLength1, &dLength2, Method);
-
- if (m_uNodeCount == m_uCacheCount)
- ExpandCache();
- m_uRootNodeIndex = m_uNodeCount++;
-
- double dEdgeLength = GetEdgeLength(uNode1, uNode2);
-
- m_uNeighbor1[m_uRootNodeIndex] = NULL_NEIGHBOR;
- m_uNeighbor2[m_uRootNodeIndex] = uNode1;
- m_uNeighbor3[m_uRootNodeIndex] = uNode2;
-
- if (m_uNeighbor1[uNode1] == uNode2)
- m_uNeighbor1[uNode1] = m_uRootNodeIndex;
- else if (m_uNeighbor2[uNode1] == uNode2)
- m_uNeighbor2[uNode1] = m_uRootNodeIndex;
- else
- {
- assert(m_uNeighbor3[uNode1] == uNode2);
- m_uNeighbor3[uNode1] = m_uRootNodeIndex;
- }
-
- if (m_uNeighbor1[uNode2] == uNode1)
- m_uNeighbor1[uNode2] = m_uRootNodeIndex;
- else if (m_uNeighbor2[uNode2] == uNode1)
- m_uNeighbor2[uNode2] = m_uRootNodeIndex;
- else
- {
- assert(m_uNeighbor3[uNode2] == uNode1);
- m_uNeighbor3[uNode2] = m_uRootNodeIndex;
- }
-
- OrientParent(uNode1, m_uRootNodeIndex);
- OrientParent(uNode2, m_uRootNodeIndex);
-
- SetEdgeLength(m_uRootNodeIndex, uNode1, dLength1);
- SetEdgeLength(m_uRootNodeIndex, uNode2, dLength2);
-
- m_bHasHeight[m_uRootNodeIndex] = false;
-
- m_ptrName[m_uRootNodeIndex] = 0;
-
- m_bRooted = true;
-
-#if TRACE
- Log("\nPhy::RootUnrootedTree, after:");
- LogMe();
-#endif
-
- Validate();
- }
-
-bool Tree::HasEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2) const
- {
- assert(uNodeIndex1 < m_uNodeCount);
- assert(uNodeIndex2 < m_uNodeCount);
- assert(IsEdge(uNodeIndex1, uNodeIndex2));
-
- if (m_uNeighbor1[uNodeIndex1] == uNodeIndex2)
- return m_bHasEdgeLength1[uNodeIndex1];
- else if (m_uNeighbor2[uNodeIndex1] == uNodeIndex2)
- return m_bHasEdgeLength2[uNodeIndex1];
- assert(m_uNeighbor3[uNodeIndex1] == uNodeIndex2);
- return m_bHasEdgeLength3[uNodeIndex1];
- }
-
-void Tree::OrientParent(unsigned uNodeIndex, unsigned uParentNodeIndex)
- {
- if (NULL_NEIGHBOR == uNodeIndex)
- return;
-
- if (m_uNeighbor1[uNodeIndex] == uParentNodeIndex)
- ;
- else if (m_uNeighbor2[uNodeIndex] == uParentNodeIndex)
- {
- double dEdgeLength2 = m_dEdgeLength2[uNodeIndex];
- m_uNeighbor2[uNodeIndex] = m_uNeighbor1[uNodeIndex];
- m_dEdgeLength2[uNodeIndex] = m_dEdgeLength1[uNodeIndex];
- m_uNeighbor1[uNodeIndex] = uParentNodeIndex;
- m_dEdgeLength1[uNodeIndex] = dEdgeLength2;
- }
- else
- {
- assert(m_uNeighbor3[uNodeIndex] == uParentNodeIndex);
- double dEdgeLength3 = m_dEdgeLength3[uNodeIndex];
- m_uNeighbor3[uNodeIndex] = m_uNeighbor1[uNodeIndex];
- m_dEdgeLength3[uNodeIndex] = m_dEdgeLength1[uNodeIndex];
- m_uNeighbor1[uNodeIndex] = uParentNodeIndex;
- m_dEdgeLength1[uNodeIndex] = dEdgeLength3;
- }
-
- OrientParent(m_uNeighbor2[uNodeIndex], uNodeIndex);
- OrientParent(m_uNeighbor3[uNodeIndex], uNodeIndex);
- }
-
-unsigned Tree::FirstDepthFirstNode() const
- {
- assert(IsRooted());
-
-// Descend via left branches until we hit a leaf
- unsigned uNodeIndex = m_uRootNodeIndex;
- while (!IsLeaf(uNodeIndex))
- uNodeIndex = GetLeft(uNodeIndex);
- return uNodeIndex;
- }
-
-unsigned Tree::FirstDepthFirstNodeR() const
- {
- assert(IsRooted());
-
-// Descend via left branches until we hit a leaf
- unsigned uNodeIndex = m_uRootNodeIndex;
- while (!IsLeaf(uNodeIndex))
- uNodeIndex = GetRight(uNodeIndex);
- return uNodeIndex;
- }
-
-unsigned Tree::NextDepthFirstNode(unsigned uNodeIndex) const
- {
-#if TRACE
- Log("NextDepthFirstNode(%3u) ", uNodeIndex);
-#endif
-
- assert(IsRooted());
- assert(uNodeIndex < m_uNodeCount);
-
- if (IsRoot(uNodeIndex))
- {
-#if TRACE
- Log(">> Node %u is root, end of traversal\n", uNodeIndex);
-#endif
- return NULL_NEIGHBOR;
- }
-
- unsigned uParent = GetParent(uNodeIndex);
- if (GetRight(uParent) == uNodeIndex)
- {
-#if TRACE
- Log(">> Is right branch, return parent=%u\n", uParent);
-#endif
- return uParent;
- }
-
- uNodeIndex = GetRight(uParent);
-#if TRACE
- Log(">> Descend left from right sibling=%u ... ", uNodeIndex);
-#endif
- while (!IsLeaf(uNodeIndex))
- uNodeIndex = GetLeft(uNodeIndex);
-
-#if TRACE
- Log("bottom out at leaf=%u\n", uNodeIndex);
-#endif
- return uNodeIndex;
- }
-
-unsigned Tree::NextDepthFirstNodeR(unsigned uNodeIndex) const
- {
-#if TRACE
- Log("NextDepthFirstNode(%3u) ", uNodeIndex);
-#endif
-
- assert(IsRooted());
- assert(uNodeIndex < m_uNodeCount);
-
- if (IsRoot(uNodeIndex))
- {
-#if TRACE
- Log(">> Node %u is root, end of traversal\n", uNodeIndex);
-#endif
- return NULL_NEIGHBOR;
- }
-
- unsigned uParent = GetParent(uNodeIndex);
- if (GetLeft(uParent) == uNodeIndex)
- {
-#if TRACE
- Log(">> Is left branch, return parent=%u\n", uParent);
-#endif
- return uParent;
- }
-
- uNodeIndex = GetLeft(uParent);
-#if TRACE
- Log(">> Descend right from left sibling=%u ... ", uNodeIndex);
-#endif
- while (!IsLeaf(uNodeIndex))
- uNodeIndex = GetRight(uNodeIndex);
-
-#if TRACE
- Log("bottom out at leaf=%u\n", uNodeIndex);
-#endif
- return uNodeIndex;
- }
-
-void Tree::UnrootByDeletingRoot()
- {
- assert(IsRooted());
- assert(m_uNodeCount >= 3);
-
- const unsigned uLeft = GetLeft(m_uRootNodeIndex);
- const unsigned uRight = GetRight(m_uRootNodeIndex);
-
- m_uNeighbor1[uLeft] = uRight;
- m_uNeighbor1[uRight] = uLeft;
-
- bool bHasEdgeLength = HasEdgeLength(m_uRootNodeIndex, uLeft) &&
- HasEdgeLength(m_uRootNodeIndex, uRight);
- if (bHasEdgeLength)
- {
- double dEdgeLength = GetEdgeLength(m_uRootNodeIndex, uLeft) +
- GetEdgeLength(m_uRootNodeIndex, uRight);
- m_dEdgeLength1[uLeft] = dEdgeLength;
- m_dEdgeLength1[uRight] = dEdgeLength;
- }
-
-// Remove root node entry from arrays
- const unsigned uMoveCount = m_uNodeCount - m_uRootNodeIndex;
- const unsigned uUnsBytes = uMoveCount*sizeof(unsigned);
- memmove(m_uNeighbor1 + m_uRootNodeIndex, m_uNeighbor1 + m_uRootNodeIndex + 1,
- uUnsBytes);
- memmove(m_uNeighbor2 + m_uRootNodeIndex, m_uNeighbor2 + m_uRootNodeIndex + 1,
- uUnsBytes);
- memmove(m_uNeighbor3 + m_uRootNodeIndex, m_uNeighbor3 + m_uRootNodeIndex + 1,
- uUnsBytes);
-
- const unsigned uDoubleBytes = uMoveCount*sizeof(double);
- memmove(m_dEdgeLength1 + m_uRootNodeIndex, m_dEdgeLength1 + m_uRootNodeIndex + 1,
- uDoubleBytes);
- memmove(m_dEdgeLength2 + m_uRootNodeIndex, m_dEdgeLength2 + m_uRootNodeIndex + 1,
- uDoubleBytes);
- memmove(m_dEdgeLength3 + m_uRootNodeIndex, m_dEdgeLength3 + m_uRootNodeIndex + 1,
- uDoubleBytes);
-
- const unsigned uBoolBytes = uMoveCount*sizeof(bool);
- memmove(m_bHasEdgeLength1 + m_uRootNodeIndex, m_bHasEdgeLength1 + m_uRootNodeIndex + 1,
- uBoolBytes);
- memmove(m_bHasEdgeLength2 + m_uRootNodeIndex, m_bHasEdgeLength2 + m_uRootNodeIndex + 1,
- uBoolBytes);
- memmove(m_bHasEdgeLength3 + m_uRootNodeIndex, m_bHasEdgeLength3 + m_uRootNodeIndex + 1,
- uBoolBytes);
-
- const unsigned uPtrBytes = uMoveCount*sizeof(char *);
- memmove(m_ptrName + m_uRootNodeIndex, m_ptrName + m_uRootNodeIndex + 1, uPtrBytes);
-
- --m_uNodeCount;
- m_bRooted = false;
-
-// Fix up table entries
- for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
- {
-#define DEC(x) if (x != NULL_NEIGHBOR && x > m_uRootNodeIndex) --x;
- DEC(m_uNeighbor1[uNodeIndex])
- DEC(m_uNeighbor2[uNodeIndex])
- DEC(m_uNeighbor3[uNodeIndex])
-#undef DEC
- }
-
- Validate();
- }
-
-unsigned Tree::GetLeafParent(unsigned uNodeIndex) const
- {
- assert(IsLeaf(uNodeIndex));
-
- if (IsRooted())
- return GetParent(uNodeIndex);
-
- if (m_uNeighbor1[uNodeIndex] != NULL_NEIGHBOR)
- return m_uNeighbor1[uNodeIndex];
- if (m_uNeighbor2[uNodeIndex] != NULL_NEIGHBOR)
- return m_uNeighbor2[uNodeIndex];
- return m_uNeighbor3[uNodeIndex];
- }
-
-// TODO: This is not efficient for large trees, should cache.
-double Tree::GetNodeHeight(unsigned uNodeIndex) const
- {
- if (!IsRooted())
- Quit("Tree::GetNodeHeight: undefined unless rooted tree");
-
- if (IsLeaf(uNodeIndex))
- return 0.0;
-
- if (m_bHasHeight[uNodeIndex])
- return m_dHeight[uNodeIndex];
-
- const unsigned uLeft = GetLeft(uNodeIndex);
- const unsigned uRight = GetRight(uNodeIndex);
- double dLeftLength = GetEdgeLength(uNodeIndex, uLeft);
- double dRightLength = GetEdgeLength(uNodeIndex, uRight);
-
- if (dLeftLength < 0)
- dLeftLength = 0;
- if (dRightLength < 0)
- dRightLength = 0;
-
- const double dLeftHeight = dLeftLength + GetNodeHeight(uLeft);
- const double dRightHeight = dRightLength + GetNodeHeight(uRight);
- const double dHeight = (dLeftHeight + dRightHeight)/2;
- m_bHasHeight[uNodeIndex] = true;
- m_dHeight[uNodeIndex] = dHeight;
- return dHeight;
- }
-
-unsigned Tree::GetNeighborSubscript(unsigned uNodeIndex, unsigned uNeighborIndex) const
- {
- assert(uNodeIndex < m_uNodeCount);
- assert(uNeighborIndex < m_uNodeCount);
- if (uNeighborIndex == m_uNeighbor1[uNodeIndex])
- return 0;
- if (uNeighborIndex == m_uNeighbor2[uNodeIndex])
- return 1;
- if (uNeighborIndex == m_uNeighbor3[uNodeIndex])
- return 2;
- return NULL_NEIGHBOR;
- }
-
-unsigned Tree::GetNeighbor(unsigned uNodeIndex, unsigned uNeighborSubscript) const
- {
- switch (uNeighborSubscript)
- {
- case 0:
- return m_uNeighbor1[uNodeIndex];
- case 1:
- return m_uNeighbor2[uNodeIndex];
- case 2:
- return m_uNeighbor3[uNodeIndex];
- }
- Quit("Tree::GetNeighbor, sub=%u", uNeighborSubscript);
- return NULL_NEIGHBOR;
- }
-
-// TODO: check if this is a performance issue, could cache a lookup table
-unsigned Tree::LeafIndexToNodeIndex(unsigned uLeafIndex) const
- {
- const unsigned uNodeCount = GetNodeCount();
- unsigned uLeafCount = 0;
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- {
- if (IsLeaf(uNodeIndex))
- {
- if (uLeafCount == uLeafIndex)
- return uNodeIndex;
- else
- ++uLeafCount;
- }
- }
- Quit("LeafIndexToNodeIndex: out of range");
- return 0;
- }
-
-unsigned Tree::GetLeafNodeIndex(const char *ptrName) const
- {
- const unsigned uNodeCount = GetNodeCount();
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- {
- if (!IsLeaf(uNodeIndex))
- continue;
- const char *ptrLeafName = GetLeafName(uNodeIndex);
- if (0 == strcmp(ptrName, ptrLeafName))
- return uNodeIndex;
- }
- Quit("Tree::GetLeafNodeIndex, name not found");
- return 0;
- }
-
-void Tree::Copy(const Tree &tree)
- {
- const unsigned uNodeCount = tree.GetNodeCount();
- InitCache(uNodeCount);
-
- m_uNodeCount = uNodeCount;
-
- const size_t UnsignedBytes = uNodeCount*sizeof(unsigned);
- const size_t DoubleBytes = uNodeCount*sizeof(double);
- const size_t BoolBytes = uNodeCount*sizeof(bool);
-
- memcpy(m_uNeighbor1, tree.m_uNeighbor1, UnsignedBytes);
- memcpy(m_uNeighbor2, tree.m_uNeighbor2, UnsignedBytes);
- memcpy(m_uNeighbor3, tree.m_uNeighbor3, UnsignedBytes);
-
- memcpy(m_Ids, tree.m_Ids, UnsignedBytes);
-
- memcpy(m_dEdgeLength1, tree.m_dEdgeLength1, DoubleBytes);
- memcpy(m_dEdgeLength2, tree.m_dEdgeLength2, DoubleBytes);
- memcpy(m_dEdgeLength3, tree.m_dEdgeLength3, DoubleBytes);
-
- memcpy(m_dHeight, tree.m_dHeight, DoubleBytes);
-
- memcpy(m_bHasEdgeLength1, tree.m_bHasEdgeLength1, BoolBytes);
- memcpy(m_bHasEdgeLength2, tree.m_bHasEdgeLength2, BoolBytes);
- memcpy(m_bHasEdgeLength3, tree.m_bHasEdgeLength3, BoolBytes);
-
- memcpy(m_bHasHeight, tree.m_bHasHeight, BoolBytes);
-
- m_uRootNodeIndex = tree.m_uRootNodeIndex;
- m_bRooted = tree.m_bRooted;
-
- for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
- {
- if (tree.IsLeaf(uNodeIndex))
- {
- const char *ptrName = tree.GetLeafName(uNodeIndex);
- m_ptrName[uNodeIndex] = strsave(ptrName);
- }
- else
- m_ptrName[uNodeIndex] = 0;
- }
-
-#if DEBUG
- Validate();
-#endif
- }
-
-// Create rooted tree from a vector description.
-// Node indexes are 0..N-1 for leaves, N..2N-2 for
-// internal nodes.
-// Vector subscripts are i-N and have values for
-// internal nodes only, but those values are node
-// indexes 0..2N-2. So e.g. if N=6 and Left[2]=1,
-// this means that the third internal node (node index 8)
-// has the second leaf (node index 1) as its left child.
-// uRoot gives the vector subscript of the root, so add N
-// to get the node index.
-void Tree::Create(unsigned uLeafCount, unsigned uRoot, const unsigned Left[],
- const unsigned Right[], const float LeftLength[], const float RightLength[],
- const unsigned LeafIds[], char **LeafNames)
- {
- Clear();
-
- m_uNodeCount = 2*uLeafCount - 1;
- InitCache(m_uNodeCount);
-
- for (unsigned uNodeIndex = 0; uNodeIndex < uLeafCount; ++uNodeIndex)
- {
- m_Ids[uNodeIndex] = LeafIds[uNodeIndex];
- m_ptrName[uNodeIndex] = strsave(LeafNames[uNodeIndex]);
- }
-
- for (unsigned uNodeIndex = uLeafCount; uNodeIndex < m_uNodeCount; ++uNodeIndex)
- {
- unsigned v = uNodeIndex - uLeafCount;
- unsigned uLeft = Left[v];
- unsigned uRight = Right[v];
- float fLeft = LeftLength[v];
- float fRight = RightLength[v];
-
- m_uNeighbor2[uNodeIndex] = uLeft;
- m_uNeighbor3[uNodeIndex] = uRight;
-
- m_bHasEdgeLength2[uNodeIndex] = true;
- m_bHasEdgeLength3[uNodeIndex] = true;
-
- m_dEdgeLength2[uNodeIndex] = fLeft;
- m_dEdgeLength3[uNodeIndex] = fRight;
-
- m_uNeighbor1[uLeft] = uNodeIndex;
- m_uNeighbor1[uRight] = uNodeIndex;
-
- m_dEdgeLength1[uLeft] = fLeft;
- m_dEdgeLength1[uRight] = fRight;
-
- m_bHasEdgeLength1[uLeft] = true;
- m_bHasEdgeLength1[uRight] = true;
- }
-
- m_bRooted = true;
- m_uRootNodeIndex = uRoot + uLeafCount;
-
- Validate();
- }
Deleted: trunk/packages/muscle/trunk/phy2.cpp
===================================================================
--- trunk/packages/muscle/trunk/phy2.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/phy2.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,282 +0,0 @@
-#include "muscle.h"
-#include "tree.h"
-
-#define TRACE 0
-
-// Return false when done
-bool PhyEnumEdges(const Tree &tree, PhyEnumEdgeState &ES)
- {
- unsigned uNode1 = uInsane;
-
- if (!ES.m_bInit)
- {
- if (tree.GetNodeCount() <= 1)
- {
- ES.m_uNodeIndex1 = NULL_NEIGHBOR;
- ES.m_uNodeIndex2 = NULL_NEIGHBOR;
- return false;
- }
- uNode1 = tree.FirstDepthFirstNode();
- ES.m_bInit = true;
- }
- else
- {
- uNode1 = tree.NextDepthFirstNode(ES.m_uNodeIndex1);
- if (NULL_NEIGHBOR == uNode1)
- return false;
- if (tree.IsRooted() && tree.IsRoot(uNode1))
- {
- uNode1 = tree.NextDepthFirstNode(uNode1);
- if (NULL_NEIGHBOR == uNode1)
- return false;
- }
- }
- unsigned uNode2 = tree.GetParent(uNode1);
-
- ES.m_uNodeIndex1 = uNode1;
- ES.m_uNodeIndex2 = uNode2;
- return true;
- }
-
-bool PhyEnumEdgesR(const Tree &tree, PhyEnumEdgeState &ES)
- {
- unsigned uNode1 = uInsane;
-
- if (!ES.m_bInit)
- {
- if (tree.GetNodeCount() <= 1)
- {
- ES.m_uNodeIndex1 = NULL_NEIGHBOR;
- ES.m_uNodeIndex2 = NULL_NEIGHBOR;
- return false;
- }
- uNode1 = tree.FirstDepthFirstNodeR();
- ES.m_bInit = true;
- }
- else
- {
- uNode1 = tree.NextDepthFirstNodeR(ES.m_uNodeIndex1);
- if (NULL_NEIGHBOR == uNode1)
- return false;
- if (tree.IsRooted() && tree.IsRoot(uNode1))
- {
- uNode1 = tree.NextDepthFirstNode(uNode1);
- if (NULL_NEIGHBOR == uNode1)
- return false;
- }
- }
- unsigned uNode2 = tree.GetParent(uNode1);
-
- ES.m_uNodeIndex1 = uNode1;
- ES.m_uNodeIndex2 = uNode2;
- return true;
- }
-
-static void GetLeavesSubtree(const Tree &tree, unsigned uNodeIndex1,
- const unsigned uNodeIndex2, unsigned Leaves[], unsigned *ptruCount)
- {
- if (tree.IsLeaf(uNodeIndex1))
- {
- Leaves[*ptruCount] = uNodeIndex1;
- ++(*ptruCount);
- return;
- }
-
- const unsigned uLeft = tree.GetFirstNeighbor(uNodeIndex1, uNodeIndex2);
- const unsigned uRight = tree.GetSecondNeighbor(uNodeIndex1, uNodeIndex2);
- if (NULL_NEIGHBOR != uLeft)
- GetLeavesSubtree(tree, uLeft, uNodeIndex1, Leaves, ptruCount);
- if (NULL_NEIGHBOR != uRight)
- GetLeavesSubtree(tree, uRight, uNodeIndex1, Leaves, ptruCount);
- }
-
-static void PhyGetLeaves(const Tree &tree, unsigned uNodeIndex1, unsigned uNodeIndex2,
- unsigned Leaves[], unsigned *ptruCount)
- {
- *ptruCount = 0;
- GetLeavesSubtree(tree, uNodeIndex1, uNodeIndex2, Leaves, ptruCount);
- }
-
-bool PhyEnumBiParts(const Tree &tree, PhyEnumEdgeState &ES,
- unsigned Leaves1[], unsigned *ptruCount1,
- unsigned Leaves2[], unsigned *ptruCount2)
- {
- bool bOk = PhyEnumEdges(tree, ES);
- if (!bOk)
- {
- *ptruCount1 = 0;
- *ptruCount2 = 0;
- return false;
- }
-
-// Special case: in a rooted tree, both edges from the root
-// give the same bipartition, so skip one of them.
- if (tree.IsRooted() && tree.IsRoot(ES.m_uNodeIndex2)
- && tree.GetRight(ES.m_uNodeIndex2) == ES.m_uNodeIndex1)
- {
- bOk = PhyEnumEdges(tree, ES);
- if (!bOk)
- return false;
- }
-
- PhyGetLeaves(tree, ES.m_uNodeIndex1, ES.m_uNodeIndex2, Leaves1, ptruCount1);
- PhyGetLeaves(tree, ES.m_uNodeIndex2, ES.m_uNodeIndex1, Leaves2, ptruCount2);
-
- if (*ptruCount1 + *ptruCount2 != tree.GetLeafCount())
- Quit("PhyEnumBiParts %u + %u != %u",
- *ptruCount1, *ptruCount2, tree.GetLeafCount());
-#if DEBUG
- {
- for (unsigned i = 0; i < *ptruCount1; ++i)
- {
- if (!tree.IsLeaf(Leaves1[i]))
- Quit("PhyEnumByParts: not leaf");
- for (unsigned j = 0; j < *ptruCount2; ++j)
- {
- if (!tree.IsLeaf(Leaves2[j]))
- Quit("PhyEnumByParts: not leaf");
- if (Leaves1[i] == Leaves2[j])
- Quit("PhyEnumByParts: dupe");
- }
- }
- }
-#endif
-
- return true;
- }
-
-#if 0
-void TestBiPart()
- {
- SetListFileName("c:\\tmp\\lobster.log", false);
- Tree tree;
- TextFile fileIn("c:\\tmp\\test.phy");
- tree.FromFile(fileIn);
- tree.LogMe();
-
- const unsigned uNodeCount = tree.GetNodeCount();
- unsigned *Leaves1 = new unsigned[uNodeCount];
- unsigned *Leaves2 = new unsigned[uNodeCount];
-
- PhyEnumEdgeState ES;
- bool bDone = false;
- for (;;)
- {
- unsigned uCount1 = uInsane;
- unsigned uCount2 = uInsane;
- bool bOk = PhyEnumBiParts(tree, ES, Leaves1, &uCount1, Leaves2, &uCount2);
- Log("PEBP=%d ES.Init=%d ES.ni1=%d ES.ni2=%d\n",
- bOk,
- ES.m_bInit,
- ES.m_uNodeIndex1,
- ES.m_uNodeIndex2);
- if (!bOk)
- break;
- Log("\n");
- Log("Part1: ");
- for (unsigned n = 0; n < uCount1; ++n)
- Log(" %d(%s)", Leaves1[n], tree.GetLeafName(Leaves1[n]));
- Log("\n");
- Log("Part2: ");
- for (unsigned n = 0; n < uCount2; ++n)
- Log(" %d(%s)", Leaves2[n], tree.GetLeafName(Leaves2[n]));
- Log("\n");
- }
- }
-#endif
-
-static void GetLeavesSubtreeExcluding(const Tree &tree, unsigned uNodeIndex,
- unsigned uExclude, unsigned Leaves[], unsigned *ptruCount)
- {
- if (uNodeIndex == uExclude)
- return;
-
- if (tree.IsLeaf(uNodeIndex))
- {
- Leaves[*ptruCount] = uNodeIndex;
- ++(*ptruCount);
- return;
- }
-
- const unsigned uLeft = tree.GetLeft(uNodeIndex);
- const unsigned uRight = tree.GetRight(uNodeIndex);
- if (NULL_NEIGHBOR != uLeft)
- GetLeavesSubtreeExcluding(tree, uLeft, uExclude, Leaves, ptruCount);
- if (NULL_NEIGHBOR != uRight)
- GetLeavesSubtreeExcluding(tree, uRight, uExclude, Leaves, ptruCount);
- }
-
-void GetLeavesExcluding(const Tree &tree, unsigned uNodeIndex,
- unsigned uExclude, unsigned Leaves[], unsigned *ptruCount)
- {
- *ptruCount = 0;
- GetLeavesSubtreeExcluding(tree, uNodeIndex, uExclude, Leaves, ptruCount);
- }
-
-void GetInternalNodesInHeightOrder(const Tree &tree, unsigned NodeIndexes[])
- {
- const unsigned uNodeCount = tree.GetNodeCount();
- if (uNodeCount < 3)
- Quit("GetInternalNodesInHeightOrder: %u nodes, none are internal",
- uNodeCount);
- const unsigned uInternalNodeCount = (uNodeCount - 1)/2;
- double *Heights = new double[uInternalNodeCount];
-
- unsigned uIndex = 0;
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- {
- if (tree.IsLeaf(uNodeIndex))
- continue;
- NodeIndexes[uIndex] = uNodeIndex;
- Heights[uIndex] = tree.GetNodeHeight(uNodeIndex);
- ++uIndex;
- }
- if (uIndex != uInternalNodeCount)
- Quit("Internal error: GetInternalNodesInHeightOrder");
-
-// Simple but slow bubble sort (probably don't care about speed here)
- bool bDone = false;
- while (!bDone)
- {
- bDone = true;
- for (unsigned i = 0; i < uInternalNodeCount - 1; ++i)
- {
- if (Heights[i] > Heights[i+1])
- {
- double dTmp = Heights[i];
- Heights[i] = Heights[i+1];
- Heights[i+1] = dTmp;
-
- unsigned uTmp = NodeIndexes[i];
- NodeIndexes[i] = NodeIndexes[i+1];
- NodeIndexes[i+1] = uTmp;
- bDone = false;
- }
- }
- }
-#if TRACE
- Log("Internal node index Height\n");
- Log("------------------- --------\n");
- // 1234567890123456789 123456789
- for (unsigned n = 0; n < uInternalNodeCount; ++n)
- Log("%19u %9.3f\n", NodeIndexes[n], Heights[n]);
-#endif
- delete[] Heights;
- }
-
-void ApplyMinEdgeLength(Tree &tree, double dMinEdgeLength)
- {
- const unsigned uNodeCount = tree.GetNodeCount();
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- {
- const unsigned uNeighborCount = tree.GetNeighborCount(uNodeIndex);
- for (unsigned n = 0; n < uNeighborCount; ++n)
- {
- const unsigned uNeighborNodeIndex = tree.GetNeighbor(uNodeIndex, n);
- if (!tree.HasEdgeLength(uNodeIndex, uNeighborNodeIndex))
- continue;
- if (tree.GetEdgeLength(uNodeIndex, uNeighborNodeIndex) < dMinEdgeLength)
- tree.SetEdgeLength(uNodeIndex, uNeighborNodeIndex, dMinEdgeLength);
- }
- }
- }
Deleted: trunk/packages/muscle/trunk/phy3.cpp
===================================================================
--- trunk/packages/muscle/trunk/phy3.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/phy3.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,469 +0,0 @@
-#include "muscle.h"
-#include "tree.h"
-#include "edgelist.h"
-
-#define TRACE 0
-
-struct EdgeInfo
- {
- EdgeInfo()
- {
- m_bSet = false;
- }
-// Is data in this structure valid (i.e, has been set)?
- bool m_bSet;
-
-// Node at start of this edge
- unsigned m_uNode1;
-
-// Node at end of this edge
- unsigned m_uNode2;
-
-// Maximum distance from Node2 to a leaf
- double m_dMaxDistToLeaf;
-
-// Sum of distances from Node2 to all leaves under Node2
- double m_dTotalDistToLeaves;
-
-// Next node on path from Node2 to most distant leaf
- unsigned m_uMaxStep;
-
-// Most distant leaf from Node2 (used for debugging only)
- unsigned m_uMostDistantLeaf;
-
-// Number of leaves under Node2
- unsigned m_uLeafCount;
- };
-
-static void RootByMidLongestSpan(const Tree &tree, EdgeInfo **EIs,
- unsigned *ptruNode1, unsigned *ptruNode2,
- double *ptrdLength1, double *ptrdLength2);
-static void RootByMinAvgLeafDist(const Tree &tree, EdgeInfo **EIs,
- unsigned *ptruNode1, unsigned *ptruNode2,
- double *ptrdLength1, double *ptrdLength2);
-
-static void ListEIs(EdgeInfo **EIs, unsigned uNodeCount)
- {
- Log("Node1 Node2 MaxDist TotDist MostDist LeafCount Step\n");
- Log("----- ----- ------- ------- -------- --------- ----\n");
- // 12345 12345 1234567 1234567 12345678 123456789
-
- for (unsigned uNode = 0; uNode < uNodeCount; ++uNode)
- for (unsigned uNeighbor = 0; uNeighbor < 3; ++uNeighbor)
- {
- const EdgeInfo &EI = EIs[uNode][uNeighbor];
- if (!EI.m_bSet)
- continue;
- Log("%5u %5u %7.3g %7.3g %8u %9u",
- EI.m_uNode1,
- EI.m_uNode2,
- EI.m_dMaxDistToLeaf,
- EI.m_dTotalDistToLeaves,
- EI.m_uMostDistantLeaf,
- EI.m_uLeafCount);
- if (NULL_NEIGHBOR != EI.m_uMaxStep)
- Log(" %4u", EI.m_uMaxStep);
- Log("\n");
- }
- }
-
-static void CalcInfo(const Tree &tree, unsigned uNode1, unsigned uNode2, EdgeInfo **EIs)
- {
- const unsigned uNeighborIndex = tree.GetNeighborSubscript(uNode1, uNode2);
- EdgeInfo &EI = EIs[uNode1][uNeighborIndex];
- EI.m_uNode1 = uNode1;
- EI.m_uNode2 = uNode2;
-
- if (tree.IsLeaf(uNode2))
- {
- EI.m_dMaxDistToLeaf = 0;
- EI.m_dTotalDistToLeaves = 0;
- EI.m_uMaxStep = NULL_NEIGHBOR;
- EI.m_uMostDistantLeaf = uNode2;
- EI.m_uLeafCount = 1;
- EI.m_bSet = true;
- return;
- }
-
- double dMaxDistToLeaf = -1e29;
- double dTotalDistToLeaves = 0.0;
- unsigned uLeafCount = 0;
- unsigned uMostDistantLeaf = NULL_NEIGHBOR;
- unsigned uMaxStep = NULL_NEIGHBOR;
-
- const unsigned uNeighborCount = tree.GetNeighborCount(uNode2);
- for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)
- {
- const unsigned uNode3 = tree.GetNeighbor(uNode2, uSub);
- if (uNode3 == uNode1)
- continue;
- const EdgeInfo &EINext = EIs[uNode2][uSub];
- if (!EINext.m_bSet)
- Quit("CalcInfo: internal error, dist %u->%u not known",
- uNode2, uNode3);
-
-
- uLeafCount += EINext.m_uLeafCount;
-
- const double dEdgeLength = tree.GetEdgeLength(uNode2, uNode3);
- const double dTotalDist = EINext.m_dTotalDistToLeaves +
- EINext.m_uLeafCount*dEdgeLength;
- dTotalDistToLeaves += dTotalDist;
-
- const double dDist = EINext.m_dMaxDistToLeaf + dEdgeLength;
- if (dDist > dMaxDistToLeaf)
- {
- dMaxDistToLeaf = dDist;
- uMostDistantLeaf = EINext.m_uMostDistantLeaf;
- uMaxStep = uNode3;
- }
- }
- if (NULL_NEIGHBOR == uMaxStep || NULL_NEIGHBOR == uMostDistantLeaf ||
- 0 == uLeafCount)
- Quit("CalcInfo: internal error 2");
-
- const double dThisDist = tree.GetEdgeLength(uNode1, uNode2);
- EI.m_dMaxDistToLeaf = dMaxDistToLeaf;
- EI.m_dTotalDistToLeaves = dTotalDistToLeaves;
- EI.m_uMaxStep = uMaxStep;
- EI.m_uMostDistantLeaf = uMostDistantLeaf;
- EI.m_uLeafCount = uLeafCount;
- EI.m_bSet = true;
- }
-
-static bool Known(const Tree &tree, EdgeInfo **EIs, unsigned uNodeFrom,
- unsigned uNodeTo)
- {
- const unsigned uSub = tree.GetNeighborSubscript(uNodeFrom, uNodeTo);
- return EIs[uNodeFrom][uSub].m_bSet;
- }
-
-static bool AllKnownOut(const Tree &tree, EdgeInfo **EIs, unsigned uNodeFrom,
- unsigned uNodeTo)
- {
- const unsigned uNeighborCount = tree.GetNeighborCount(uNodeTo);
- for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)
- {
- unsigned uNeighborIndex = tree.GetNeighbor(uNodeTo, uSub);
- if (uNeighborIndex == uNodeFrom)
- continue;
- if (!EIs[uNodeTo][uSub].m_bSet)
- return false;
- }
- return true;
- }
-
-void FindRoot(const Tree &tree, unsigned *ptruNode1, unsigned *ptruNode2,
- double *ptrdLength1, double *ptrdLength2,
- ROOT RootMethod)
- {
-#if TRACE
- tree.LogMe();
-#endif
- if (tree.IsRooted())
- Quit("FindRoot: tree already rooted");
-
- const unsigned uNodeCount = tree.GetNodeCount();
- const unsigned uLeafCount = tree.GetLeafCount();
-
- if (uNodeCount < 2)
- Quit("Root: don't support trees with < 2 edges");
-
- EdgeInfo **EIs = new EdgeInfo *[uNodeCount];
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- EIs[uNodeIndex] = new EdgeInfo[3];
-
- EdgeList Edges;
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- if (tree.IsLeaf(uNodeIndex))
- {
- unsigned uParent = tree.GetNeighbor1(uNodeIndex);
- Edges.Add(uParent, uNodeIndex);
- }
-
-#if TRACE
- Log("Edges: ");
- Edges.LogMe();
-#endif
-
-// Main loop: iterate until all distances known
- double dAllMaxDist = -1e20;
- unsigned uMaxFrom = NULL_NEIGHBOR;
- unsigned uMaxTo = NULL_NEIGHBOR;
- for (;;)
- {
- EdgeList NextEdges;
-
-#if TRACE
- Log("\nTop of main loop\n");
- Log("Edges: ");
- Edges.LogMe();
- Log("MDs:\n");
- ListEIs(EIs, uNodeCount);
-#endif
-
- // For all edges
- const unsigned uEdgeCount = Edges.GetCount();
- if (0 == uEdgeCount)
- break;
- for (unsigned n = 0; n < uEdgeCount; ++n)
- {
- unsigned uNodeFrom;
- unsigned uNodeTo;
- Edges.GetEdge(n, &uNodeFrom, &uNodeTo);
-
- CalcInfo(tree, uNodeFrom, uNodeTo, EIs);
-#if TRACE
- Log("Edge %u -> %u\n", uNodeFrom, uNodeTo);
-#endif
- const unsigned uNeighborCount = tree.GetNeighborCount(uNodeFrom);
- for (unsigned i = 0; i < uNeighborCount; ++i)
- {
- const unsigned uNeighborIndex = tree.GetNeighbor(uNodeFrom, i);
- if (!Known(tree, EIs, uNeighborIndex, uNodeFrom) &&
- AllKnownOut(tree, EIs, uNeighborIndex, uNodeFrom))
- NextEdges.Add(uNeighborIndex, uNodeFrom);
- }
- }
- Edges.Copy(NextEdges);
- }
-
-#if TRACE
- ListEIs(EIs, uNodeCount);
-#endif
-
- switch (RootMethod)
- {
- case ROOT_MidLongestSpan:
- RootByMidLongestSpan(tree, EIs, ptruNode1, ptruNode2,
- ptrdLength1, ptrdLength2);
- break;
-
- case ROOT_MinAvgLeafDist:
- RootByMinAvgLeafDist(tree, EIs, ptruNode1, ptruNode2,
- ptrdLength1, ptrdLength2);
- break;
-
- default:
- Quit("Invalid RootMethod=%d", RootMethod);
- }
-
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- delete[] EIs[uNodeIndex];
- delete[] EIs;
- }
-
-static void RootByMidLongestSpan(const Tree &tree, EdgeInfo **EIs,
- unsigned *ptruNode1, unsigned *ptruNode2,
- double *ptrdLength1, double *ptrdLength2)
- {
- const unsigned uNodeCount = tree.GetNodeCount();
-
- unsigned uLeaf1 = NULL_NEIGHBOR;
- unsigned uMostDistantLeaf = NULL_NEIGHBOR;
- double dMaxDist = -VERY_LARGE_DOUBLE;
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- {
- if (!tree.IsLeaf(uNodeIndex))
- continue;
-
- const unsigned uNode2 = tree.GetNeighbor1(uNodeIndex);
- if (NULL_NEIGHBOR == uNode2)
- Quit("RootByMidLongestSpan: internal error 0");
- const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNode2);
- const EdgeInfo &EI = EIs[uNodeIndex][0];
- if (!EI.m_bSet)
- Quit("RootByMidLongestSpan: internal error 1");
- if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNode2)
- Quit("RootByMidLongestSpan: internal error 2");
- const double dSpanLength = dEdgeLength + EI.m_dMaxDistToLeaf;
- if (dSpanLength > dMaxDist)
- {
- dMaxDist = dSpanLength;
- uLeaf1 = uNodeIndex;
- uMostDistantLeaf = EI.m_uMostDistantLeaf;
- }
- }
-
- if (NULL_NEIGHBOR == uLeaf1)
- Quit("RootByMidLongestSpan: internal error 3");
-
- const double dTreeHeight = dMaxDist/2.0;
- unsigned uNode1 = uLeaf1;
- unsigned uNode2 = tree.GetNeighbor1(uLeaf1);
- double dAccumSpanLength = 0;
-
-#if TRACE
- Log("RootByMidLongestSpan: span=%u", uLeaf1);
-#endif
-
- for (;;)
- {
- const double dEdgeLength = tree.GetEdgeLength(uNode1, uNode2);
-#if TRACE
- Log("->%u(%g;%g)", uNode2, dEdgeLength, dAccumSpanLength);
-#endif
- if (dAccumSpanLength + dEdgeLength >= dTreeHeight)
- {
- *ptruNode1 = uNode1;
- *ptruNode2 = uNode2;
- *ptrdLength1 = dTreeHeight - dAccumSpanLength;
- *ptrdLength2 = dEdgeLength - *ptrdLength1;
-#if TRACE
- {
- const EdgeInfo &EI = EIs[uLeaf1][0];
- Log("...\n");
- Log("Midpoint: Leaf1=%u Leaf2=%u Node1=%u Node2=%u Length1=%g Length2=%g\n",
- uLeaf1, EI.m_uMostDistantLeaf, *ptruNode1, *ptruNode2, *ptrdLength1, *ptrdLength2);
- }
-#endif
- return;
- }
-
- if (tree.IsLeaf(uNode2))
- Quit("RootByMidLongestSpan: internal error 4");
-
- dAccumSpanLength += dEdgeLength;
- const unsigned uSub = tree.GetNeighborSubscript(uNode1, uNode2);
- const EdgeInfo &EI = EIs[uNode1][uSub];
- if (!EI.m_bSet)
- Quit("RootByMidLongestSpan: internal error 5");
-
- uNode1 = uNode2;
- uNode2 = EI.m_uMaxStep;
- }
- }
-
-/***
-Root by balancing average distance to leaves.
-The root is a point p such that the average
-distance to leaves to the left of p is the
-same as the to the right.
-
-This is the method used by CLUSTALW, which
-was originally used in PROFILEWEIGHT:
-
- Thompson et al. (1994) CABIOS (10) 1, 19-29.
-***/
-
-static void RootByMinAvgLeafDist(const Tree &tree, EdgeInfo **EIs,
- unsigned *ptruNode1, unsigned *ptruNode2,
- double *ptrdLength1, double *ptrdLength2)
- {
- const unsigned uNodeCount = tree.GetNodeCount();
- const unsigned uLeafCount = tree.GetLeafCount();
- unsigned uNode1 = NULL_NEIGHBOR;
- unsigned uNode2 = NULL_NEIGHBOR;
- double dMinHeight = VERY_LARGE_DOUBLE;
- double dBestLength1 = VERY_LARGE_DOUBLE;
- double dBestLength2 = VERY_LARGE_DOUBLE;
-
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- {
- const unsigned uNeighborCount = tree.GetNeighborCount(uNodeIndex);
- for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)
- {
- const unsigned uNeighborIndex = tree.GetNeighbor(uNodeIndex, uSub);
-
- // Avoid visiting same edge a second time in reversed order.
- if (uNeighborIndex < uNodeIndex)
- continue;
-
- const unsigned uSubRev = tree.GetNeighborSubscript(uNeighborIndex, uNodeIndex);
- if (NULL_NEIGHBOR == uSubRev)
- Quit("RootByMinAvgLeafDist, internal error 1");
-
- // Get info for edges Node1->Node2 and Node2->Node1 (reversed)
- const EdgeInfo &EI = EIs[uNodeIndex][uSub];
- const EdgeInfo &EIRev = EIs[uNeighborIndex][uSubRev];
-
- if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNeighborIndex ||
- EIRev.m_uNode1 != uNeighborIndex || EIRev.m_uNode2 != uNodeIndex)
- Quit("RootByMinAvgLeafDist, internal error 2");
- if (!EI.m_bSet)
- Quit("RootByMinAvgLeafDist, internal error 3");
- if (uLeafCount != EI.m_uLeafCount + EIRev.m_uLeafCount)
- Quit("RootByMinAvgLeafDist, internal error 4");
-
- const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNeighborIndex);
- if (dEdgeLength != tree.GetEdgeLength(uNeighborIndex, uNodeIndex))
- Quit("RootByMinAvgLeafDist, internal error 5");
-
- // Consider point p on edge 12 in tree (1=Node, 2=Neighbor).
- //
- // ----- ----
- // | |
- // 1----p--2
- // | |
- // ----- ----
- //
- // Define:
- // ADLp = average distance to leaves to left of point p.
- // ADRp = average distance to leaves to right of point p.
- // L = edge length = distance 12
- // x = distance 1p
- // So distance p2 = L - x.
- // Average distance from p to leaves on left of p is:
- // ADLp = ADL1 + x
- // Average distance from p to leaves on right of p is:
- // ADRp = ADR2 + (L - x)
- // To be a root, we require these two distances to be equal,
- // ADLp = ADRp
- // ADL1 + x = ADR2 + (L - x)
- // Solving for x,
- // x = (ADR2 - ADL1 + L)/2
- // If 0 <= x <= L, we can place the root on edge 12.
-
- const double ADL1 = EI.m_dTotalDistToLeaves / EI.m_uLeafCount;
- const double ADR2 = EIRev.m_dTotalDistToLeaves / EIRev.m_uLeafCount;
-
- const double x = (ADR2 - ADL1 + dEdgeLength)/2.0;
- if (x >= 0 && x <= dEdgeLength)
- {
- const double dLength1 = x;
- const double dLength2 = dEdgeLength - x;
- const double dHeight1 = EI.m_dMaxDistToLeaf + dLength1;
- const double dHeight2 = EIRev.m_dMaxDistToLeaf + dLength2;
- const double dHeight = dHeight1 >= dHeight2 ? dHeight1 : dHeight2;
-#if TRACE
- Log("Candidate root Node1=%u Node2=%u Height=%g\n",
- uNodeIndex, uNeighborIndex, dHeight);
-#endif
- if (dHeight < dMinHeight)
- {
- uNode1 = uNodeIndex;
- uNode2 = uNeighborIndex;
- dBestLength1 = dLength1;
- dBestLength2 = dLength2;
- dMinHeight = dHeight;
- }
- }
- }
- }
-
- if (NULL_NEIGHBOR == uNode1 || NULL_NEIGHBOR == uNode2)
- Quit("RootByMinAvgLeafDist, internal error 6");
-
-#if TRACE
- Log("Best root Node1=%u Node2=%u Length1=%g Length2=%g Height=%g\n",
- uNode1, uNode2, dBestLength1, dBestLength2, dMinHeight);
-#endif
-
- *ptruNode1 = uNode1;
- *ptruNode2 = uNode2;
- *ptrdLength1 = dBestLength1;
- *ptrdLength2 = dBestLength2;
- }
-
-void FixRoot(Tree &tree, ROOT Method)
- {
- if (!tree.IsRooted())
- Quit("FixRoot: expecting rooted tree");
-
- // Pseudo-root: keep root assigned by clustering
- if (ROOT_Pseudo == Method)
- return;
-
- tree.UnrootByDeletingRoot();
- tree.RootUnrootedTree(Method);
- }
Deleted: trunk/packages/muscle/trunk/phy4.cpp
===================================================================
--- trunk/packages/muscle/trunk/phy4.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/phy4.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,295 +0,0 @@
-#include "muscle.h"
-#include "tree.h"
-#include <stdio.h>
-
-#define TRACE 0
-
-void ClusterByHeight(const Tree &tree, double dMaxHeight, unsigned Subtrees[],
- unsigned *ptruSubtreeCount)
- {
- if (!tree.IsRooted())
- Quit("ClusterByHeight: requires rooted tree");
-
-#if TRACE
- Log("ClusterByHeight, max height=%g\n", dMaxHeight);
-#endif
-
- unsigned uSubtreeCount = 0;
- const unsigned uNodeCount = tree.GetNodeCount();
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- {
- if (tree.IsRoot(uNodeIndex))
- continue;
- unsigned uParent = tree.GetParent(uNodeIndex);
- double dHeight = tree.GetNodeHeight(uNodeIndex);
- double dParentHeight = tree.GetNodeHeight(uParent);
-
-#if TRACE
- Log("Node %3u Height %5.2f ParentHeight %5.2f\n",
- uNodeIndex, dHeight, dParentHeight);
-#endif
- if (dParentHeight > dMaxHeight && dHeight <= dMaxHeight)
- {
- Subtrees[uSubtreeCount] = uNodeIndex;
-#if TRACE
- Log("Subtree[%u]=%u\n", uSubtreeCount, uNodeIndex);
-#endif
- ++uSubtreeCount;
- }
- }
- *ptruSubtreeCount = uSubtreeCount;
- }
-
-static void ClusterBySubfamCount_Iteration(const Tree &tree, unsigned Subfams[],
- unsigned uCount)
- {
-// Find highest child node of current set of subfamilies.
- double dHighestHeight = -1e20;
- int iParentSubscript = -1;
-
- for (int n = 0; n < (int) uCount; ++n)
- {
- const unsigned uNodeIndex = Subfams[n];
- if (tree.IsLeaf(uNodeIndex))
- continue;
-
- const unsigned uLeft = tree.GetLeft(uNodeIndex);
- const double dHeightLeft = tree.GetNodeHeight(uLeft);
- if (dHeightLeft > dHighestHeight)
- {
- dHighestHeight = dHeightLeft;
- iParentSubscript = n;
- }
-
- const unsigned uRight = tree.GetRight(uNodeIndex);
- const double dHeightRight = tree.GetNodeHeight(uRight);
- if (dHeightRight > dHighestHeight)
- {
- dHighestHeight = dHeightRight;
- iParentSubscript = n;
- }
- }
-
- if (-1 == iParentSubscript)
- Quit("CBSFCIter: failed to find highest child");
-
- const unsigned uNodeIndex = Subfams[iParentSubscript];
- const unsigned uLeft = tree.GetLeft(uNodeIndex);
- const unsigned uRight = tree.GetRight(uNodeIndex);
-
-// Delete parent by replacing with left child
- Subfams[iParentSubscript] = uLeft;
-
-// Append right child to list
- Subfams[uCount] = uRight;
-
-#if TRACE
- {
- Log("Iter %3u:", uCount);
- for (unsigned n = 0; n < uCount; ++n)
- Log(" %u", Subfams[n]);
- Log("\n");
- }
-#endif
- }
-
-// Divide a tree containing N leaves into k families by
-// cutting the tree at a horizontal line at some height.
-// Each internal node defines a height for the cut,
-// considering all internal nodes enumerates all distinct
-// cuts. Visit internal nodes in decreasing order of height.
-// Visiting the node corresponds to moving the horizontal
-// line down to cut the tree at the height of that node.
-// We consider the cut to be "infinitestimally below"
-// the node, so the effect is to remove the current node
-// from the list of subfamilies and add its two children.
-// We must visit a parent before its children (so care may
-// be needed to handle zero edge lengths properly).
-// We assume that N is small, and write dumb O(N^2) code.
-// More efficient strategies are possible for large N
-// by maintaining a list of nodes sorted by height.
-void ClusterBySubfamCount(const Tree &tree, unsigned uSubfamCount,
- unsigned Subfams[], unsigned *ptruSubfamCount)
- {
- const unsigned uNodeCount = tree.GetNodeCount();
- const unsigned uLeafCount = (uNodeCount + 1)/2;
-
-// Special case: empty tree
- if (0 == uNodeCount)
- {
- *ptruSubfamCount = 0;
- return;
- }
-
-// Special case: more subfamilies than leaves
- if (uSubfamCount >= uLeafCount)
- {
- for (unsigned n = 0; n < uLeafCount; ++n)
- Subfams[n] = n;
- *ptruSubfamCount = uLeafCount;
- return;
- }
-
-// Initialize list of subfamilies to be root
- Subfams[0] = tree.GetRootNodeIndex();
-
-// Iterate
- for (unsigned i = 1; i < uSubfamCount; ++i)
- ClusterBySubfamCount_Iteration(tree, Subfams, i);
-
- *ptruSubfamCount = uSubfamCount;
- }
-
-static void GetLeavesRecurse(const Tree &tree, unsigned uNodeIndex,
- unsigned Leaves[], unsigned &uLeafCount /* in-out */)
- {
- if (tree.IsLeaf(uNodeIndex))
- {
- Leaves[uLeafCount] = uNodeIndex;
- ++uLeafCount;
- return;
- }
-
- const unsigned uLeft = tree.GetLeft(uNodeIndex);
- const unsigned uRight = tree.GetRight(uNodeIndex);
-
- GetLeavesRecurse(tree, uLeft, Leaves, uLeafCount);
- GetLeavesRecurse(tree, uRight, Leaves, uLeafCount);
- }
-
-void GetLeaves(const Tree &tree, unsigned uNodeIndex, unsigned Leaves[],
- unsigned *ptruLeafCount)
- {
- unsigned uLeafCount = 0;
- GetLeavesRecurse(tree, uNodeIndex, Leaves, uLeafCount);
- *ptruLeafCount = uLeafCount;
- }
-
-void Tree::PruneTree(const Tree &tree, unsigned Subfams[],
- unsigned uSubfamCount)
- {
- if (!tree.IsRooted())
- Quit("Tree::PruneTree: requires rooted tree");
-
- Clear();
-
- m_uNodeCount = 2*uSubfamCount - 1;
- InitCache(m_uNodeCount);
-
- const unsigned uUnprunedNodeCount = tree.GetNodeCount();
-
- unsigned *uUnprunedToPrunedIndex = new unsigned[uUnprunedNodeCount];
- unsigned *uPrunedToUnprunedIndex = new unsigned[m_uNodeCount];
-
- for (unsigned n = 0; n < uUnprunedNodeCount; ++n)
- uUnprunedToPrunedIndex[n] = NULL_NEIGHBOR;
-
- for (unsigned n = 0; n < m_uNodeCount; ++n)
- uPrunedToUnprunedIndex[n] = NULL_NEIGHBOR;
-
-// Create mapping between unpruned and pruned node indexes
- unsigned uInternalNodeIndex = uSubfamCount;
- for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
- {
- unsigned uUnprunedNodeIndex = Subfams[uSubfamIndex];
- uUnprunedToPrunedIndex[uUnprunedNodeIndex] = uSubfamIndex;
- uPrunedToUnprunedIndex[uSubfamIndex] = uUnprunedNodeIndex;
- for (;;)
- {
- uUnprunedNodeIndex = tree.GetParent(uUnprunedNodeIndex);
- if (tree.IsRoot(uUnprunedNodeIndex))
- break;
-
- // Already visited this node?
- if (NULL_NEIGHBOR != uUnprunedToPrunedIndex[uUnprunedNodeIndex])
- break;
-
- uUnprunedToPrunedIndex[uUnprunedNodeIndex] = uInternalNodeIndex;
- uPrunedToUnprunedIndex[uInternalNodeIndex] = uUnprunedNodeIndex;
-
- ++uInternalNodeIndex;
- }
- }
-
- const unsigned uUnprunedRootIndex = tree.GetRootNodeIndex();
- uUnprunedToPrunedIndex[uUnprunedRootIndex] = uInternalNodeIndex;
- uPrunedToUnprunedIndex[uInternalNodeIndex] = uUnprunedRootIndex;
-
-#if TRACE
- {
- Log("Pruned to unpruned:\n");
- for (unsigned i = 0; i < m_uNodeCount; ++i)
- Log(" [%u]=%u", i, uPrunedToUnprunedIndex[i]);
- Log("\n");
- Log("Unpruned to pruned:\n");
- for (unsigned i = 0; i < uUnprunedNodeCount; ++i)
- {
- unsigned n = uUnprunedToPrunedIndex[i];
- if (n != NULL_NEIGHBOR)
- Log(" [%u]=%u", i, n);
- }
- Log("\n");
- }
-#endif
-
- if (uInternalNodeIndex != m_uNodeCount - 1)
- Quit("Tree::PruneTree, Internal error");
-
-// Nodes 0, 1 ... are the leaves
- for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
- {
- char szName[32];
- sprintf(szName, "Subfam_%u", uSubfamIndex + 1);
- m_ptrName[uSubfamIndex] = strsave(szName);
- }
-
- for (unsigned uPrunedNodeIndex = uSubfamCount; uPrunedNodeIndex < m_uNodeCount;
- ++uPrunedNodeIndex)
- {
- unsigned uUnprunedNodeIndex = uPrunedToUnprunedIndex[uPrunedNodeIndex];
-
- const unsigned uUnprunedLeft = tree.GetLeft(uUnprunedNodeIndex);
- const unsigned uUnprunedRight = tree.GetRight(uUnprunedNodeIndex);
-
- const unsigned uPrunedLeft = uUnprunedToPrunedIndex[uUnprunedLeft];
- const unsigned uPrunedRight = uUnprunedToPrunedIndex[uUnprunedRight];
-
- const double dLeftLength =
- tree.GetEdgeLength(uUnprunedNodeIndex, uUnprunedLeft);
- const double dRightLength =
- tree.GetEdgeLength(uUnprunedNodeIndex, uUnprunedRight);
-
- m_uNeighbor2[uPrunedNodeIndex] = uPrunedLeft;
- m_uNeighbor3[uPrunedNodeIndex] = uPrunedRight;
-
- m_dEdgeLength1[uPrunedLeft] = dLeftLength;
- m_dEdgeLength1[uPrunedRight] = dRightLength;
-
- m_uNeighbor1[uPrunedLeft] = uPrunedNodeIndex;
- m_uNeighbor1[uPrunedRight] = uPrunedNodeIndex;
-
- m_bHasEdgeLength1[uPrunedLeft] = true;
- m_bHasEdgeLength1[uPrunedRight] = true;
-
- m_dEdgeLength2[uPrunedNodeIndex] = dLeftLength;
- m_dEdgeLength3[uPrunedNodeIndex] = dRightLength;
-
- m_bHasEdgeLength2[uPrunedNodeIndex] = true;
- m_bHasEdgeLength3[uPrunedNodeIndex] = true;
- }
-
- m_uRootNodeIndex = uUnprunedToPrunedIndex[uUnprunedRootIndex];
-
- m_bRooted = true;
-
- Validate();
-
- delete[] uUnprunedToPrunedIndex;
- }
-
-void LeafIndexesToIds(const Tree &tree, const unsigned Leaves[], unsigned uCount,
- unsigned Ids[])
- {
- for (unsigned n = 0; n < uCount; ++n)
- Ids[n] = tree.GetLeafId(Leaves[n]);
- }
Deleted: trunk/packages/muscle/trunk/phyfromclust.cpp
===================================================================
--- trunk/packages/muscle/trunk/phyfromclust.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/phyfromclust.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,95 +0,0 @@
-#include "muscle.h"
-#include "tree.h"
-#include "clust.h"
-
-void Tree::InitCache(unsigned uCacheCount)
- {
- m_uCacheCount = uCacheCount;
-
- m_uNeighbor1 = new unsigned[m_uCacheCount];
- m_uNeighbor2 = new unsigned[m_uCacheCount];
- m_uNeighbor3 = new unsigned[m_uCacheCount];
-
- m_Ids = new unsigned[m_uCacheCount];
-
- m_dEdgeLength1 = new double[m_uCacheCount];
- m_dEdgeLength2 = new double[m_uCacheCount];
- m_dEdgeLength3 = new double[m_uCacheCount];
- m_dHeight = new double[m_uCacheCount];
-
- m_bHasEdgeLength1 = new bool[m_uCacheCount];
- m_bHasEdgeLength2 = new bool[m_uCacheCount];
- m_bHasEdgeLength3 = new bool[m_uCacheCount];
- m_bHasHeight = new bool[m_uCacheCount];
-
- m_ptrName = new char *[m_uCacheCount];
-
- for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
- {
- m_uNeighbor1[uNodeIndex] = NULL_NEIGHBOR;
- m_uNeighbor2[uNodeIndex] = NULL_NEIGHBOR;
- m_uNeighbor3[uNodeIndex] = NULL_NEIGHBOR;
- m_bHasEdgeLength1[uNodeIndex] = false;
- m_bHasEdgeLength2[uNodeIndex] = false;
- m_bHasEdgeLength3[uNodeIndex] = false;
- m_bHasHeight[uNodeIndex] = false;
- m_dEdgeLength1[uNodeIndex] = dInsane;
- m_dEdgeLength2[uNodeIndex] = dInsane;
- m_dEdgeLength3[uNodeIndex] = dInsane;
- m_dHeight[uNodeIndex] = dInsane;
- m_ptrName[uNodeIndex] = 0;
- m_Ids[uNodeIndex] = uInsane;
- }
- }
-
-void Tree::FromClust(Clust &C)
- {
- Clear();
-
- m_uNodeCount = C.GetNodeCount();
- InitCache(m_uNodeCount);
-
-// Cluster is always rooted. An unrooted cluster
-// is represented by a pseudo-root, which we fix later.
- m_bRooted = true;
- const unsigned uRoot = C.GetRootNodeIndex();
- m_uRootNodeIndex = uRoot;
- m_uNeighbor1[uRoot] = NULL_NEIGHBOR;
- m_bHasEdgeLength1[uRoot] = false;
-
- for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
- {
- if (C.IsLeaf(uNodeIndex))
- {
- const char *ptrName = C.GetNodeName(uNodeIndex);
- m_ptrName[uNodeIndex] = strsave(ptrName);
- m_Ids[uNodeIndex] = C.GetNodeId(uNodeIndex);
- continue;
- }
-
- const unsigned uLeft = C.GetLeftIndex(uNodeIndex);
- const unsigned uRight = C.GetRightIndex(uNodeIndex);
-
- const double dLeftLength = C.GetLength(uLeft);
- const double dRightLength = C.GetLength(uRight);
-
- m_uNeighbor2[uNodeIndex] = uLeft;
- m_uNeighbor3[uNodeIndex] = uRight;
-
- m_dEdgeLength1[uLeft] = dLeftLength;
- m_dEdgeLength1[uRight] = dRightLength;
-
- m_uNeighbor1[uLeft] = uNodeIndex;
- m_uNeighbor1[uRight] = uNodeIndex;
-
- m_bHasEdgeLength1[uLeft] = true;
- m_bHasEdgeLength1[uRight] = true;
-
- m_dEdgeLength2[uNodeIndex] = dLeftLength;
- m_dEdgeLength3[uNodeIndex] = dRightLength;
-
- m_bHasEdgeLength2[uNodeIndex] = true;
- m_bHasEdgeLength3[uNodeIndex] = true;
- }
- Validate();
- }
Deleted: trunk/packages/muscle/trunk/phyfromfile.cpp
===================================================================
--- trunk/packages/muscle/trunk/phyfromfile.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/phyfromfile.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,269 +0,0 @@
-#include "muscle.h"
-#include "tree.h"
-#include "textfile.h"
-
-#define TRACE 0
-
-// Tokens in Newick files are:
-// ( ) : , ;
-// string
-// 'string'
-// "string"
-// [ comment ]
-//
-// We can't safely distinguish between identifiers and floating point
-// numbers at the lexical level (because identifiers may be numeric,
-// or start with digits), so both edge lengths and identifiers are
-// returned as strings.
-
-const char *Tree::NTTStr(NEWICK_TOKEN_TYPE NTT) const
- {
- switch (NTT)
- {
-#define c(x) case NTT_##x: return #x;
- c(Unknown)
- c(Lparen)
- c(Rparen)
- c(Colon)
- c(Comma)
- c(Semicolon)
- c(String)
- c(SingleQuotedString)
- c(DoubleQuotedString)
- c(Comment)
-#undef c
- }
- return "??";
- }
-
-NEWICK_TOKEN_TYPE Tree::GetToken(TextFile &File, char szToken[], unsigned uBytes) const
- {
-// Skip leading white space
- File.SkipWhite();
-
- char c;
- File.GetCharX(c);
-
-// In case a single-character token
- szToken[0] = c;
- szToken[1] = 0;
-
- unsigned uBytesCopied = 0;
- NEWICK_TOKEN_TYPE TT;
- switch (c)
- {
- case '(':
- return NTT_Lparen;
-
- case ')':
- return NTT_Rparen;
-
- case ':':
- return NTT_Colon;
-
- case ';':
- return NTT_Semicolon;
-
- case ',':
- return NTT_Comma;
-
- case '\'':
- TT = NTT_SingleQuotedString;
- File.GetCharX(c);
- break;
-
- case '"':
- TT = NTT_DoubleQuotedString;
- File.GetCharX(c);
- break;
-
- case '[':
- TT = NTT_Comment;
- break;
-
- default:
- TT = NTT_String;
- break;
- }
-
- for (;;)
- {
- if (TT != NTT_Comment)
- {
- if (uBytesCopied < uBytes - 2)
- {
- szToken[uBytesCopied++] = c;
- szToken[uBytesCopied] = 0;
- }
- else
- Quit("Tree::GetToken: input buffer too small, token so far='%s'", szToken);
- }
- bool bEof = File.GetChar(c);
- if (bEof)
- return TT;
-
- switch (TT)
- {
- case NTT_String:
- if (0 != strchr("():;,", c))
- {
- File.PushBack(c);
- return NTT_String;
- }
- if (isspace(c))
- return NTT_String;
- break;
-
- case NTT_SingleQuotedString:
- if ('\'' == c)
- return NTT_String;
- break;
-
- case NTT_DoubleQuotedString:
- if ('"' == c)
- return NTT_String;
- break;
-
- case NTT_Comment:
- if (']' == c)
- return GetToken(File, szToken, uBytes);
- break;
-
- default:
- Quit("Tree::GetToken, invalid TT=%u", TT);
- }
- }
- }
-
-// NOTE: this hack must come after definition of Tree::GetToken.
-#if TRACE
-#define GetToken GetTokenVerbose
-#endif
-
-void Tree::FromFile(TextFile &File)
- {
-// Assume rooted.
-// If we discover that it is unrooted, will convert on the fly.
- CreateRooted();
-
- double dEdgeLength;
- bool bEdgeLength = GetGroupFromFile(File, 0, &dEdgeLength);
-
-// Next token should be either ';' for rooted tree or ',' for unrooted.
- char szToken[16];
- NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, sizeof(szToken));
-
-// If rooted, all done.
- if (NTT_Semicolon == NTT)
- {
- if (bEdgeLength)
- Log(" *** Warning *** edge length on root group in Newick file %s\n",
- File.GetFileName());
- Validate();
- return;
- }
-
- if (NTT_Comma != NTT)
- Quit("Tree::FromFile, expected ';' or ',', got '%s'", szToken);
-
- const unsigned uThirdNode = UnrootFromFile();
- bEdgeLength = GetGroupFromFile(File, uThirdNode, &dEdgeLength);
- if (bEdgeLength)
- SetEdgeLength(0, uThirdNode, dEdgeLength);
- Validate();
- }
-
-// Return true if edge length for this group.
-bool Tree::GetGroupFromFile(TextFile &File, unsigned uNodeIndex,
- double *ptrdEdgeLength)
- {
- char szToken[1024];
- NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, sizeof(szToken));
-
-// Group is either leaf name or (left, right).
- if (NTT_String == NTT)
- {
- SetLeafName(uNodeIndex, szToken);
-#if TRACE
- Log("Group is leaf '%s'\n", szToken);
-#endif
- }
- else if (NTT_Lparen == NTT)
- {
- const unsigned uLeft = AppendBranch(uNodeIndex);
- const unsigned uRight = uLeft + 1;
-
- // Left sub-group...
-#if TRACE
- Log("Got '(', group is compound, expect left sub-group\n");
-#endif
- double dEdgeLength;
- bool bLeftLength = GetGroupFromFile(File, uLeft, &dEdgeLength);
-#if TRACE
- if (bLeftLength)
- Log("Edge length for left sub-group: %.3g\n", dEdgeLength);
- else
- Log("No edge length for left sub-group\n");
-#endif
- if (bLeftLength)
- SetEdgeLength(uNodeIndex, uLeft, dEdgeLength);
-
- // ... then comma ...
-#if TRACE
- Log("Expect comma\n");
-#endif
- NTT = GetToken(File, szToken, sizeof(szToken));
- if (NTT_Comma != NTT)
- Quit("Tree::GetGroupFromFile, expected ',', got '%s'", szToken);
-
- // ...then right sub-group...
-#if TRACE
- Log("Expect right sub-group\n");
-#endif
- bool bRightLength = GetGroupFromFile(File, uRight, &dEdgeLength);
- if (bRightLength)
- SetEdgeLength(uNodeIndex, uRight, dEdgeLength);
-#if TRACE
- if (bRightLength)
- Log("Edge length for right sub-group: %.3g\n", dEdgeLength);
- else
- Log("No edge length for right sub-group\n");
-#endif
-
- // ... then closing parenthesis.
-#if TRACE
- Log("Expect closing parenthesis (or comma if > 2-ary)\n");
-#endif
- NTT = GetToken(File, szToken, sizeof(szToken));
- if (NTT_Rparen == NTT)
- ;
- else if (NTT_Comma == NTT)
- {
- File.PushBack(',');
- return false;
- }
- else
- Quit("Tree::GetGroupFromFile, expected ')' or ',', got '%s'", szToken);
- }
- else
- Quit("Tree::GetGroupFromFile, expected '(' or leaf name, got '%s'",
- szToken);
-
-// Group may optionally be followed by edge length.
- File.SkipWhite();
- char c;
- File.GetCharX(c);
-#if TRACE
- Log("Character following group, could be colon, is '%c'\n", c);
-#endif
- if (':' == c)
- {
- NTT = GetToken(File, szToken, sizeof(szToken));
- if (NTT_String != NTT)
- Quit("Tree::GetGroupFromFile, expected edge length, got '%s'", szToken);
- *ptrdEdgeLength = atof(szToken);
- return true;
- }
- File.PushBack(c);
- return false;
- }
Deleted: trunk/packages/muscle/trunk/physeq.cpp
===================================================================
--- trunk/packages/muscle/trunk/physeq.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/physeq.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,128 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "textfile.h"
-
-const int BLOCKSIZE = 60;
-
-static char FixChar(char c)
- {
- switch (c)
- {
- case '(':
- case ')':
- case '[':
- case ']':
- case ':':
- case ';':
- case ',':
- return '_';
- }
- if (!isprint(c))
- return '_';
- return c;
- }
-
-static void FixName(char Name[])
- {
- while (char c = *Name)
- *Name++ = FixChar(c);
- }
-
-void MSA::ToPhySequentialFile(TextFile &File) const
- {
- const unsigned SeqCount = GetSeqCount();
- const unsigned ColCount = GetColCount();
-
- File.PutFormat("%d %d\n", SeqCount, ColCount);
-
- if (0 == ColCount)
- return;
-
- for (unsigned Seq = 0; Seq < SeqCount; ++Seq)
- {
- char Name[11];
- const char *ptrName = GetSeqName(Seq);
- size_t n = strlen(ptrName);
- if (n > 10)
- n = 10;
- memcpy(Name, ptrName, n);
- Name[n] = 0;
- FixName(Name);
- File.PutFormat("%-10.10s", Name);
-
- int BlockIndex = 0;
- int Col = 0;
- for (;;)
- {
- const unsigned MaxCols = (BlockIndex == 0) ? (BLOCKSIZE - 10) : BLOCKSIZE;
- for (unsigned ColsThisBlock = 0; ColsThisBlock < MaxCols; ++ColsThisBlock)
- {
- if (Col == ColCount)
- break;
- if (ColsThisBlock%10 == 0 && (BlockIndex == 0 || ColsThisBlock > 0))
- File.PutChar(' ');
- char c = GetChar(Seq, Col);
- if (isalpha(c))
- c = toupper(c);
- File.PutChar(c);
- ++Col;
- }
- File.PutChar('\n');
- if (Col == ColCount)
- break;
- ++BlockIndex;
- }
- }
- }
-
-void MSA::ToPhyInterleavedFile(TextFile &File) const
- {
- const unsigned SeqCount = GetSeqCount();
- const unsigned ColCount = GetColCount();
-
- File.PutFormat("%d %d\n", SeqCount, ColCount);
-
- if (0 == ColCount)
- return;
-
- int Col = 0;
- for (;;)
- {
- const unsigned ColBlockStart = Col;
- const unsigned MaxCols = (ColBlockStart == 0) ? (BLOCKSIZE - 10) : BLOCKSIZE;
-
- for (unsigned Seq = 0; Seq < SeqCount; ++Seq)
- {
- if (0 == ColBlockStart)
- {
- char Name[11];
- const char *ptrName = GetSeqName(Seq);
- size_t n = strlen(ptrName);
- if (n > 10)
- n = 10;
- memcpy(Name, ptrName, n);
- Name[n] = 0;
- FixName(Name);
- File.PutFormat("%-10.10s", Name);
- }
-
- Col = ColBlockStart;
- for (unsigned ColsThisBlock = 0; ColsThisBlock < MaxCols; ++ColsThisBlock)
- {
- if (Col == ColCount)
- break;
- if (ColsThisBlock%10 == 0 && (0 == ColBlockStart || ColsThisBlock > 0))
- File.PutChar(' ');
- char c = GetChar(Seq, Col);
- if (isalpha(c))
- c = toupper(c);
- File.PutChar(c);
- ++Col;
- }
- File.PutChar('\n');
- }
- if (Col == ColCount)
- break;
- File.PutChar('\n');
- }
- }
Deleted: trunk/packages/muscle/trunk/phytofile.cpp
===================================================================
--- trunk/packages/muscle/trunk/phytofile.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/phytofile.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,86 +0,0 @@
-#include "muscle.h"
-#include "tree.h"
-#include "textfile.h"
-
-unsigned Tree::GetAnyNonLeafNode() const
- {
- for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
- if (!IsLeaf(uNodeIndex))
- return uNodeIndex;
- return NULL_NEIGHBOR;
- }
-
-void Tree::ToFile(TextFile &File) const
- {
- if (IsRooted())
- {
- ToFileNodeRooted(File, m_uRootNodeIndex);
- File.PutString(";\n");
- return;
- }
-
-// Unrooted.
- unsigned uNodeIndex = GetAnyNonLeafNode();
-
- File.PutString("(\n");
- ToFileNodeUnrooted(File, m_uNeighbor1[uNodeIndex], uNodeIndex);
- File.PutString(",\n");
- ToFileNodeUnrooted(File, m_uNeighbor2[uNodeIndex], uNodeIndex);
- File.PutString(",\n");
- ToFileNodeUnrooted(File, m_uNeighbor3[uNodeIndex], uNodeIndex);
- File.PutString(");\n");
- }
-
-void Tree::ToFileNodeUnrooted(TextFile &File, unsigned uNodeIndex, unsigned uParent) const
- {
- assert(!IsRooted());
-
- bool bGroup = !IsLeaf(uNodeIndex);
- if (bGroup)
- File.PutString("(\n");
-
- if (IsLeaf(uNodeIndex))
- File.PutString(GetName(uNodeIndex));
- else
- {
- ToFileNodeUnrooted(File, GetFirstNeighbor(uNodeIndex, uParent), uNodeIndex);
- File.PutString(",\n");
- ToFileNodeUnrooted(File, GetSecondNeighbor(uNodeIndex, uParent), uNodeIndex);
- }
-
- if (bGroup)
- File.PutString(")");
-
- if (HasEdgeLength(uNodeIndex, uParent))
- File.PutFormat(":%g", GetEdgeLength(uNodeIndex, uParent));
- File.PutString("\n");
- }
-
-void Tree::ToFileNodeRooted(TextFile &File, unsigned uNodeIndex) const
- {
- assert(IsRooted());
-
- bool bGroup = !IsLeaf(uNodeIndex) || IsRoot(uNodeIndex);
- if (bGroup)
- File.PutString("(\n");
-
- if (IsLeaf(uNodeIndex))
- File.PutString(GetName(uNodeIndex));
- else
- {
- ToFileNodeRooted(File, GetLeft(uNodeIndex));
- File.PutString(",\n");
- ToFileNodeRooted(File, GetRight(uNodeIndex));
- }
-
- if (bGroup)
- File.PutString(")");
-
- if (!IsRoot(uNodeIndex))
- {
- unsigned uParent = GetParent(uNodeIndex);
- if (HasEdgeLength(uNodeIndex, uParent))
- File.PutFormat(":%g", GetEdgeLength(uNodeIndex, uParent));
- }
- File.PutString("\n");
- }
Deleted: trunk/packages/muscle/trunk/posgap.cpp
===================================================================
--- trunk/packages/muscle/trunk/posgap.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/posgap.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,141 +0,0 @@
-#include "muscle.h"
-
-// Pascaralle and Argos gap factors
-// after Table 1 in Thompson et. al. ClustalW NAR paper.
-static double PAFFacs[20] =
- {
- 1.13, // A
- 1.13, // C
- 0.96, // D
- 1.31, // E
- 1.20, // F
- 0.61, // G
- 1.00, // H
- 1.32, // I
- 0.96, // K
- 1.21, // L
- 1.29, // M
- 0.62, // N
- 0.74, // P
- 1.07, // Q
- 0.72, // R
- 0.76, // S
- 0.89, // T
- 1.25, // V
- 1.00, // Y
- 1.23, // W
- };
-
-// (Not used: does not appear to work well).
-SCORE PAFactor(const FCOUNT fcCounts[])
- {
- if (ALPHA_Amino != g_Alpha)
- Quit("PAFFactor: requires amino acid sequence");
-
- FCOUNT fLetterCount = 0;
- double dSum = 0;
- for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
- {
- const FCOUNT fCount = fcCounts[uLetter];
- dSum += fCount*PAFFacs[uLetter];
- fLetterCount += fCount;
- }
- if (0 == fLetterCount)
- return 0.5;
- return (SCORE) (dSum/fLetterCount);
- }
-
-static bool Hydrophilic[20] =
- {
- false, // A
- false, // C
- true, // D
- true, // E
- false, // F
- true, // G
- false, // H
- false, // I
- true, // K
- false, // L
- false, // M
- true, // N
- true, // P
- true, // Q
- true, // R
- true, // S
- false, // T
- false, // V
- false, // Y
- false, // W
- };
-
-bool IsHydrophilic(const FCOUNT fcCounts[])
- {
- if (ALPHA_Amino != g_Alpha)
- Quit("IsHydrophilic: requires amino acid sequence");
-
- for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
- if (fcCounts[uLetter] > 0 && !Hydrophilic[uLetter])
- return false;
- return true;
- }
-
-bool IsHydrophilic(const unsigned uCounts[])
- {
- if (ALPHA_Amino != g_Alpha)
- Quit("IsHydrophilic: requires amino acid sequence");
-
- for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
- if (uCounts[uLetter] > 0 && !Hydrophilic[uLetter])
- return false;
- return true;
- }
-
-// LIVCATMFYWHK
-// Venn Pascaralla B&T Me
-// L y y y
-// I y y y
-// V y y y
-// C y n
-// A y y y
-// T N n
-// M y y y
-// F y y y
-// Y n n
-// W y n
-// H n n
-// K n n
-static bool Hydrophobic[20] =
- {
- true, // A
- true, // C
- false, // D
- false, // E
- true, // F
- false, // G
- true, // H
- true, // I
- false, // K
- true, // L
- true, // M
- false, // N
- false, // P
- false, // Q
- false, // R
- false, // S
- true, // T
- true, // V
- true, // Y
- true, // W
- };
-
-bool IsHydrophobic(const FCOUNT fcCounts[])
- {
- if (ALPHA_Amino != g_Alpha)
- Quit("IsHydrophobic: requires amino acid sequence");
-
- for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
- if (fcCounts[uLetter] > 0.0 && !Hydrophobic[uLetter])
- return false;
- return true;
- }
Deleted: trunk/packages/muscle/trunk/ppscore.cpp
===================================================================
--- trunk/packages/muscle/trunk/ppscore.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/ppscore.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,93 +0,0 @@
-#include "muscle.h"
-#include "textfile.h"
-#include "msa.h"
-#include "tree.h"
-#include "profile.h"
-#include "objscore.h"
-
-bool g_bTracePPScore = false;
-MSA *g_ptrPPScoreMSA1 = 0;
-MSA *g_ptrPPScoreMSA2 = 0;
-
-static ProfPos *ProfileFromMSALocal(MSA &msa, Tree &tree)
- {
- const unsigned uSeqCount = msa.GetSeqCount();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- msa.SetSeqId(uSeqIndex, uSeqIndex);
-
- TreeFromMSA(msa, tree, g_Cluster2, g_Distance2, g_Root1);
- SetMuscleTree(tree);
- return ProfileFromMSA(msa);
- }
-
-void PPScore()
- {
- if (0 == g_pstrFileName1 || 0 == g_pstrFileName2)
- Quit("-ppscore needs -in1 and -in2");
-
- SetSeqWeightMethod(g_SeqWeight1);
-
- TextFile file1(g_pstrFileName1);
- TextFile file2(g_pstrFileName2);
-
- MSA msa1;
- MSA msa2;
-
- msa1.FromFile(file1);
- msa2.FromFile(file2);
-
- const unsigned uLength1 = msa1.GetColCount();
- const unsigned uLength2 = msa2.GetColCount();
-
- if (uLength1 != uLength2)
- Quit("Profiles must have the same length");
-
- ALPHA Alpha = ALPHA_Undefined;
- switch (g_SeqType)
- {
- case SEQTYPE_Auto:
- Alpha = msa1.GuessAlpha();
- break;
-
- case SEQTYPE_Protein:
- Alpha = ALPHA_Amino;
- break;
-
- case SEQTYPE_DNA:
- Alpha = ALPHA_DNA;
- break;
-
- case SEQTYPE_RNA:
- Alpha = ALPHA_RNA;
- break;
-
- default:
- Quit("Invalid SeqType");
- }
- SetAlpha(Alpha);
-
- msa1.FixAlpha();
- msa2.FixAlpha();
-
- if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
- SetPPScore(PPSCORE_SPN);
-
- const unsigned uSeqCount1 = msa1.GetSeqCount();
- const unsigned uSeqCount2 = msa2.GetSeqCount();
- const unsigned uMaxSeqCount = (uSeqCount1 > uSeqCount2 ? uSeqCount1 : uSeqCount2);
- MSA::SetIdCount(uMaxSeqCount);
-
- Tree tree1;
- Tree tree2;
- ProfPos *Prof1 = ProfileFromMSALocal(msa1, tree1);
- ProfPos *Prof2 = ProfileFromMSALocal(msa2, tree2);
-
- g_bTracePPScore = true;
- g_ptrPPScoreMSA1 = &msa1;
- g_ptrPPScoreMSA2 = &msa2;
-
- SCORE Score = ObjScoreDP_Profs(Prof1, Prof2, uLength1);
-
- Log("Score=%.4g\n", Score);
- printf("Score=%.4g\n", Score);
- }
Deleted: trunk/packages/muscle/trunk/profdb.cpp
===================================================================
--- trunk/packages/muscle/trunk/profdb.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/profdb.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,54 +0,0 @@
-#include "muscle.h"
-#include "textfile.h"
-#include "seqvect.h"
-#include "distfunc.h"
-#include "msa.h"
-#include "tree.h"
-#include "clust.h"
-#include "profile.h"
-#include "clustsetmsa.h"
-
-void ProfDB()
- {
- SetOutputFileName(g_pstrOutFileName);
- SetInputFileName(g_pstrFileName2);
- SetStartTime();
-
- TextFile file1(g_pstrFileName1);
- TextFile file2(g_pstrFileName2);
-
- SetMaxIters(g_uMaxIters);
- SetSeqWeightMethod(g_SeqWeight1);
-
- TextFile fileIn(g_pstrFileName1);
- MSA msa1;
- msa1.FromFile(fileIn);
-
- const unsigned uSeqCount1 = msa1.GetSeqCount();
- if (0 == uSeqCount1)
- Quit("No sequences in input alignment");
-
- SeqVect v;
- v.FromFASTAFile(file2);
- const unsigned uSeqCount2 = v.Length();
- if (0 == uSeqCount2)
- Quit("No sequences in input alignment");
-
- MSA::SetIdCount(uSeqCount1 + uSeqCount2);
- SetProgressDesc("Align sequence database to profile");
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount2; ++uSeqIndex)
- {
- Progress(uSeqIndex, uSeqCount2);
- Seq &s = *(v[uSeqIndex]);
- s.SetId(0);
- MSA msaTmp;
- msaTmp.FromSeq(s);
- MSA msaOut;
- ProfileProfile(msa1, msaTmp, msaOut);
- msa1.Copy(msaOut);
- }
- ProgressStepsDone();
-
- TextFile fileOut(g_pstrOutFileName, true);
- msa1.ToFile(fileOut);
- }
Deleted: trunk/packages/muscle/trunk/profile.cpp
===================================================================
--- trunk/packages/muscle/trunk/profile.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/profile.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,180 +0,0 @@
-#include "muscle.h"
-#include "textfile.h"
-#include "msa.h"
-#include "tree.h"
-#include "profile.h"
-#include "objscore.h"
-
-static ProfPos *ProfileFromMSALocal(MSA &msa, Tree &tree)
- {
- const unsigned uSeqCount = msa.GetSeqCount();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- msa.SetSeqId(uSeqIndex, uSeqIndex);
-
- TreeFromMSA(msa, tree, g_Cluster2, g_Distance2, g_Root1);
- SetMuscleTree(tree);
- return ProfileFromMSA(msa);
- }
-
-void ProfileProfile(MSA &msa1, MSA &msa2, MSA &msaOut)
- {
- ALPHA Alpha = ALPHA_Undefined;
- switch (g_SeqType)
- {
- case SEQTYPE_Auto:
- Alpha = msa1.GuessAlpha();
- break;
-
- case SEQTYPE_Protein:
- Alpha = ALPHA_Amino;
- break;
-
- case SEQTYPE_DNA:
- Alpha = ALPHA_DNA;
- break;
-
- case SEQTYPE_RNA:
- Alpha = ALPHA_RNA;
- break;
-
- default:
- Quit("Invalid SeqType");
- }
- SetAlpha(Alpha);
-
- msa1.FixAlpha();
- msa2.FixAlpha();
-
- if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
- SetPPScore(PPSCORE_SPN);
-
- unsigned uLength1;
- unsigned uLength2;
-
- uLength1 = msa1.GetColCount();
- uLength2 = msa2.GetColCount();
-
- Tree tree1;
- Tree tree2;
- ProfPos *Prof1 = ProfileFromMSALocal(msa1, tree1);
- ProfPos *Prof2 = ProfileFromMSALocal(msa2, tree2);
-
- PWPath Path;
- ProfPos *ProfOut;
- unsigned uLengthOut;
- AlignTwoProfs(Prof1, uLength1, 1.0, Prof2, uLength2, 1.0, Path, &ProfOut, &uLengthOut);
-
- AlignTwoMSAsGivenPath(Path, msa1, msa2, msaOut);
- }
-
-// Do profile-profile alignment
-void Profile()
- {
- if (0 == g_pstrFileName1 || 0 == g_pstrFileName2)
- Quit("-profile needs -in1 and -in2");
-
- SetSeqWeightMethod(g_SeqWeight1);
-
- TextFile file1(g_pstrFileName1);
- TextFile file2(g_pstrFileName2);
-
- MSA msa1;
- MSA msa2;
- MSA msaOut;
-
- msa1.FromFile(file1);
- msa2.FromFile(file2);
-
- ALPHA Alpha = ALPHA_Undefined;
- switch (g_SeqType)
- {
- case SEQTYPE_Auto:
- Alpha = msa1.GuessAlpha();
- break;
-
- case SEQTYPE_Protein:
- Alpha = ALPHA_Amino;
- break;
-
- case SEQTYPE_DNA:
- Alpha = ALPHA_DNA;
- break;
-
- case SEQTYPE_RNA:
- Alpha = ALPHA_RNA;
- break;
-
- default:
- Quit("Invalid seq type");
- }
- SetAlpha(Alpha);
- msa1.FixAlpha();
- msa2.FixAlpha();
- SetPPScore();
-
- const unsigned uSeqCount1 = msa1.GetSeqCount();
- const unsigned uSeqCount2 = msa2.GetSeqCount();
- //const unsigned uMaxSeqCount = (uSeqCount1 > uSeqCount2 ? uSeqCount1 : uSeqCount2);
- //MSA::SetIdCount(uMaxSeqCount);
- const unsigned uSumSeqCount = uSeqCount1 + uSeqCount2;
- MSA::SetIdCount(uSumSeqCount);
-
- //msa1.FromFile(file1);
- //msa2.FromFile(file2);
-
- //ALPHA Alpha = ALPHA_Undefined;
- //switch (g_SeqType)
- // {
- //case SEQTYPE_Auto:
- // Alpha = msa1.GuessAlpha();
- // break;
-
- //case SEQTYPE_Protein:
- // Alpha = ALPHA_Amino;
- // break;
-
- //case SEQTYPE_Nucleo:
- // Alpha = ALPHA_Nucleo;
- // break;
-
- //default:
- // Quit("Invalid SeqType");
- // }
- //SetAlpha(Alpha);
-
- //msa1.FixAlpha();
- //msa2.FixAlpha();
-
- //if (ALPHA_Nucleo == Alpha)
- // SetPPScore(PPSCORE_SPN);
-
- //unsigned uLength1;
- //unsigned uLength2;
-
- //uLength1 = msa1.GetColCount();
- //uLength2 = msa2.GetColCount();
-
- //const unsigned uSeqCount1 = msa1.GetSeqCount();
- //const unsigned uSeqCount2 = msa2.GetSeqCount();
- //const unsigned uMaxSeqCount = (uSeqCount1 > uSeqCount2 ? uSeqCount1 : uSeqCount2);
- //MSA::SetIdCount(uMaxSeqCount);
-
- //Tree tree1;
- //Tree tree2;
- //ProfPos *Prof1 = ProfileFromMSALocal(msa1, tree1);
- //ProfPos *Prof2 = ProfileFromMSALocal(msa2, tree2);
-
- //PWPath Path;
- //ProfPos *ProfOut;
- //unsigned uLengthOut;
- //AlignTwoProfs(Prof1, uLength1, 1.0, Prof2, uLength2, 1.0, Path, &ProfOut, &uLengthOut);
-
- //MSA msaOut;
- //AlignTwoMSAsGivenPath(Path, msa1, msa2, msaOut);
-
- ProfileProfile(msa1, msa2, msaOut);
-
-// TextFile fileOut(g_pstrOutFileName, true);
-// msaOut.ToFile(fileOut);
- MuscleOutput(msaOut);
- }
Deleted: trunk/packages/muscle/trunk/profile.h
===================================================================
--- trunk/packages/muscle/trunk/profile.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/profile.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,127 +0,0 @@
-#ifndef FastProf2_h
-#define FastProf2_h
-
-#include "msa.h"
-#include "pwpath.h"
-#include <math.h> // for log function
-
-class DiagList;
-class WeightList;
-
-struct ProfPos
- {
- bool m_bAllGaps;
- unsigned m_uSortOrder[21];
- FCOUNT m_fcCounts[20];
- FCOUNT m_LL;
- FCOUNT m_LG;
- FCOUNT m_GL;
- FCOUNT m_GG;
- SCORE m_AAScores[20];
- unsigned m_uResidueGroup;
- FCOUNT m_fOcc;
- FCOUNT m_fcStartOcc;
- FCOUNT m_fcEndOcc;
- SCORE m_scoreGapOpen;
- SCORE m_scoreGapClose;
-#if DOUBLE_AFFINE
- SCORE m_scoreGapOpen2;
- SCORE m_scoreGapClose2;
-#endif
-// SCORE m_scoreGapExtend;
- };
-
-struct ProgNode
- {
- ProgNode()
- {
- m_Prof = 0;
- m_EstringL = 0;
- m_EstringR = 0;
- }
- MSA m_MSA;
- ProfPos *m_Prof;
- PWPath m_Path;
- short *m_EstringL;
- short *m_EstringR;
- unsigned m_uLength;
- WEIGHT m_Weight;
- };
-
-extern unsigned ResidueGroup[];
-const unsigned RESIDUE_GROUP_MULTIPLE = (unsigned) ~0;
-
-extern PTR_SCOREMATRIX g_ptrScoreMatrix;
-
-ProfPos *ProfileFromMSA(const MSA &a);
-
-SCORE TraceBack(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
- PWPath &Path);
-SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path);
-void ProgressiveAlign(const SeqVect &v, const Tree &tree, MSA &a);
-SCORE MSAPairSP(const MSA &msa1, const MSA &msa2);
-
-void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,
- MSA &msaCombined);
-
-void ListProfile(const ProfPos *Prof, unsigned uLength, const MSA *ptrMSA = 0);
-SCORE ScoreProfPos2(const ProfPos &PPA, const ProfPos &PPB);
-SCORE FastScorePath2(const ProfPos *PA, unsigned uLengthA,
- const ProfPos *PB, unsigned uLengthB, const PWPath &Path);
-bool IsHydrophilic(const FCOUNT fcCounts[]);
-int PAM200_Letter(unsigned uLetter1, unsigned uLetter2);
-SCORE AverageMatchScore(const PWPath &Path, unsigned uEdgeIndex,
- unsigned uWindowLength);
-void WindowSmooth(const SCORE Score[], unsigned uCount, unsigned uWindowLength,
- SCORE SmoothScore[], double dCeil = 9e29);
-SCORE FastScoreMSA_LA(const MSA &msa, SCORE MatchScore[] = 0);
-SCORE FastScoreMSA_NS(const MSA &msa, SCORE MatchScore[] = 0);
-SCORE FastScoreMSA_SP(const MSA &msa, SCORE MatchScore[] = 0);
-bool RefineMSA(MSA &msa, const Tree &tree);
-SCORE MSAQScore(const MSA &msa, SCORE MatchScore[] = 0);
-bool RefineBiParts(MSA &msa, const Tree &tree, bool R);
-void FindAnchorCols(const MSA &msa, unsigned AnchorCols[],
- unsigned *ptruAnchorColCount);
-double PctIdToHeight(double dPctId);
-double PctIdToHeightKimura(double dPctId);
-double PctIdToHeightMAFFT(double dPctId);
-double PctIdToMAFFTDist(double dPctId);
-bool RefineBlocks(MSA &msa, const Tree &tree);
-bool RefineSubfams(MSA &msaIn, const Tree &tree, unsigned uIters);
-void SetMuscleTree(const Tree &tree);
-void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[]);
-void RealignDiffs(const MSA &msaIn, const Tree &Diffs,
- const unsigned IdToDiffsTreeNodeIndex[], MSA &msaOut);
-void RealignDiffsE(const MSA &msaIn, const SeqVect &v,
- const Tree &NewTree, const Tree &OldTree,
- const unsigned uNewNodeIndexToOldNodeIndex[],
- MSA &msaOut, ProgNode *OldProgNodes);
-void RefineTree(MSA &msa, Tree &tree);
-void RefineTreeE(MSA &msa, const SeqVect &v, Tree &tree, ProgNode *ProgNodes);
-void SetScoreMatrix();
-extern bool IsHydrophobic(const FCOUNT fcCounts[]);
-void Hydro(ProfPos *Prof, unsigned uLength);
-void SetTermGaps(const ProfPos *Prof, unsigned uLength);
-
-// Macros to simulate 2D matrices
-#define DPL(PLA, PLB) DPL_[(PLB)*uPrefixCountA + (PLA)]
-#define DPM(PLA, PLB) DPM_[(PLB)*uPrefixCountA + (PLA)]
-#define DPD(PLA, PLB) DPD_[(PLB)*uPrefixCountA + (PLA)]
-#define DPE(PLA, PLB) DPE_[(PLB)*uPrefixCountA + (PLA)]
-#define DPI(PLA, PLB) DPI_[(PLB)*uPrefixCountA + (PLA)]
-#define DPJ(PLA, PLB) DPJ_[(PLB)*uPrefixCountA + (PLA)]
-#define DPU(PLA, PLB) DPU_[(PLB)*uPrefixCountA + (PLA)]
-#define TBM(PLA, PLB) TBM_[(PLB)*uPrefixCountA + (PLA)]
-#define TBD(PLA, PLB) TBD_[(PLB)*uPrefixCountA + (PLA)]
-#define TBE(PLA, PLB) TBE_[(PLB)*uPrefixCountA + (PLA)]
-#define TBI(PLA, PLB) TBI_[(PLB)*uPrefixCountA + (PLA)]
-#define TBJ(PLA, PLB) TBJ_[(PLB)*uPrefixCountA + (PLA)]
-
-SCORE ScoreProfPos2LA(const ProfPos &PPA, const ProfPos &PPB);
-SCORE ScoreProfPos2NS(const ProfPos &PPA, const ProfPos &PPB);
-SCORE ScoreProfPos2SP(const ProfPos &PPA, const ProfPos &PPB);
-SCORE ScoreProfPos2SPN(const ProfPos &PPA, const ProfPos &PPB);
-
-#endif // FastProf_h
Deleted: trunk/packages/muscle/trunk/profilefrommsa.cpp
===================================================================
--- trunk/packages/muscle/trunk/profilefrommsa.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/profilefrommsa.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,318 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "profile.h"
-
-#define TRACE 0
-
-static void LogF(FCOUNT f)
- {
- if (f > -0.00001 && f < 0.00001)
- Log(" ");
- else
- Log(" %5.3f", f);
- }
-
-static const char *LocalScoreToStr(SCORE s)
- {
- static char str[16];
- if (s < -1e10 || s > 1e10)
- return " *";
- sprintf(str, "%5.1f", s);
- return str;
- }
-
-#if DOUBLE_AFFINE
-void ListProfile(const ProfPos *Prof, unsigned uLength, const MSA *ptrMSA)
- {
- Log(" Pos Occ LL LG GL GG Open Close Open2 Clos2\n");
- Log(" --- --- -- -- -- -- ---- ----- ----- -----\n");
- for (unsigned n = 0; n < uLength; ++n)
- {
- const ProfPos &PP = Prof[n];
- Log("%5u", n);
- LogF(PP.m_fOcc);
- LogF(PP.m_LL);
- LogF(PP.m_LG);
- LogF(PP.m_GL);
- LogF(PP.m_GG);
- Log(" %s", LocalScoreToStr(-PP.m_scoreGapOpen));
- Log(" %s", LocalScoreToStr(-PP.m_scoreGapClose));
- Log(" %s", LocalScoreToStr(-PP.m_scoreGapOpen2));
- Log(" %s", LocalScoreToStr(-PP.m_scoreGapClose2));
- if (0 != ptrMSA)
- {
- const unsigned uSeqCount = ptrMSA->GetSeqCount();
- Log(" ");
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- Log("%c", ptrMSA->GetChar(uSeqIndex, n));
- }
- Log("\n");
- }
-
- Log("\n");
- Log(" Pos G");
- for (unsigned n = 0; n < g_AlphaSize; ++n)
- Log(" %c", LetterExToChar(n));
- Log("\n");
- Log(" --- -");
- for (unsigned n = 0; n < g_AlphaSize; ++n)
- Log(" -----");
- Log("\n");
-
- for (unsigned n = 0; n < uLength; ++n)
- {
- const ProfPos &PP = Prof[n];
- Log("%5u", n);
- if (-1 == PP.m_uResidueGroup)
- Log(" -", PP.m_uResidueGroup);
- else
- Log(" %d", PP.m_uResidueGroup);
-
- for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
- {
- FCOUNT f = PP.m_fcCounts[uLetter];
- if (f == 0.0)
- Log(" ");
- else
- Log(" %5.3f", f);
- }
- if (0 != ptrMSA)
- {
- const unsigned uSeqCount = ptrMSA->GetSeqCount();
- Log(" ");
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- Log("%c", ptrMSA->GetChar(uSeqIndex, n));
- }
- Log("\n");
- }
- }
-#endif // DOUBLE_AFFINE
-
-#if SINGLE_AFFINE
-void ListProfile(const ProfPos *Prof, unsigned uLength, const MSA *ptrMSA)
- {
- Log(" Pos Occ LL LG GL GG Open Close\n");
- Log(" --- --- -- -- -- -- ---- -----\n");
- for (unsigned n = 0; n < uLength; ++n)
- {
- const ProfPos &PP = Prof[n];
- Log("%5u", n);
- LogF(PP.m_fOcc);
- LogF(PP.m_LL);
- LogF(PP.m_LG);
- LogF(PP.m_GL);
- LogF(PP.m_GG);
- Log(" %5.1f", -PP.m_scoreGapOpen);
- Log(" %5.1f", -PP.m_scoreGapClose);
- if (0 != ptrMSA)
- {
- const unsigned uSeqCount = ptrMSA->GetSeqCount();
- Log(" ");
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- Log("%c", ptrMSA->GetChar(uSeqIndex, n));
- }
- Log("\n");
- }
-
- Log("\n");
- Log(" Pos G");
- for (unsigned n = 0; n < g_AlphaSize; ++n)
- Log(" %c", LetterExToChar(n));
- Log("\n");
- Log(" --- -");
- for (unsigned n = 0; n < g_AlphaSize; ++n)
- Log(" -----");
- Log("\n");
-
- for (unsigned n = 0; n < uLength; ++n)
- {
- const ProfPos &PP = Prof[n];
- Log("%5u", n);
- if (-1 == PP.m_uResidueGroup)
- Log(" -", PP.m_uResidueGroup);
- else
- Log(" %d", PP.m_uResidueGroup);
-
- for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
- {
- FCOUNT f = PP.m_fcCounts[uLetter];
- if (f == 0.0)
- Log(" ");
- else
- Log(" %5.3f", f);
- }
- if (0 != ptrMSA)
- {
- const unsigned uSeqCount = ptrMSA->GetSeqCount();
- Log(" ");
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- Log("%c", ptrMSA->GetChar(uSeqIndex, n));
- }
- Log("\n");
- }
- }
-#endif
-
-void SortCounts(const FCOUNT fcCounts[], unsigned SortOrder[])
- {
- static unsigned InitialSortOrder[MAX_ALPHA] =
- {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
- };
- memcpy(SortOrder, InitialSortOrder, g_AlphaSize*sizeof(unsigned));
-
- bool bAny = true;
- while (bAny)
- {
- bAny = false;
- for (unsigned n = 0; n < g_AlphaSize - 1; ++n)
- {
- unsigned i1 = SortOrder[n];
- unsigned i2 = SortOrder[n+1];
- if (fcCounts[i1] < fcCounts[i2])
- {
- SortOrder[n+1] = i1;
- SortOrder[n] = i2;
- bAny = true;
- }
- }
- }
- }
-
-static unsigned AminoGroupFromFCounts(const FCOUNT fcCounts[])
- {
- bool bAny = false;
- unsigned uConsensusResidueGroup = RESIDUE_GROUP_MULTIPLE;
- for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
- {
- if (0 == fcCounts[uLetter])
- continue;
- const unsigned uResidueGroup = ResidueGroup[uLetter];
- if (bAny)
- {
- if (uResidueGroup != uConsensusResidueGroup)
- return RESIDUE_GROUP_MULTIPLE;
- }
- else
- {
- bAny = true;
- uConsensusResidueGroup = uResidueGroup;
- }
- }
- return uConsensusResidueGroup;
- }
-
-static unsigned NucleoGroupFromFCounts(const FCOUNT fcCounts[])
- {
- bool bAny = false;
- unsigned uConsensusResidueGroup = RESIDUE_GROUP_MULTIPLE;
- for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
- {
- if (0 == fcCounts[uLetter])
- continue;
- const unsigned uResidueGroup = uLetter;
- if (bAny)
- {
- if (uResidueGroup != uConsensusResidueGroup)
- return RESIDUE_GROUP_MULTIPLE;
- }
- else
- {
- bAny = true;
- uConsensusResidueGroup = uResidueGroup;
- }
- }
- return uConsensusResidueGroup;
- }
-
-unsigned ResidueGroupFromFCounts(const FCOUNT fcCounts[])
- {
- switch (g_Alpha)
- {
- case ALPHA_Amino:
- return AminoGroupFromFCounts(fcCounts);
-
- case ALPHA_DNA:
- case ALPHA_RNA:
- return NucleoGroupFromFCounts(fcCounts);
- }
- Quit("ResidueGroupFromFCounts: bad alpha");
- return 0;
- }
-
-ProfPos *ProfileFromMSA(const MSA &a)
- {
- const unsigned uSeqCount = a.GetSeqCount();
- const unsigned uColCount = a.GetColCount();
-
-// Yuck -- cast away const (inconsistent design here).
- SetMSAWeightsMuscle((MSA &) a);
-
- ProfPos *Pos = new ProfPos[uColCount];
-
- unsigned uHydrophobicRunLength = 0;
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- ProfPos &PP = Pos[uColIndex];
-
- PP.m_bAllGaps = a.IsGapColumn(uColIndex);
-
- FCOUNT fcGapStart;
- FCOUNT fcGapEnd;
- FCOUNT fcGapExtend;
- FCOUNT fOcc;
- a.GetFractionalWeightedCounts(uColIndex, g_bNormalizeCounts, PP.m_fcCounts,
- &fcGapStart, &fcGapEnd, &fcGapExtend, &fOcc,
- &PP.m_LL, &PP.m_LG, &PP.m_GL, &PP.m_GG);
- PP.m_fOcc = fOcc;
-
- SortCounts(PP.m_fcCounts, PP.m_uSortOrder);
-
- PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts);
-
- for (unsigned i = 0; i < g_AlphaSize; ++i)
- {
- SCORE scoreSum = 0;
- for (unsigned j = 0; j < g_AlphaSize; ++j)
- scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j];
- PP.m_AAScores[i] = scoreSum;
- }
-
- SCORE sStartOcc = (SCORE) (1.0 - fcGapStart);
- SCORE sEndOcc = (SCORE) (1.0 - fcGapEnd);
-
- PP.m_fcStartOcc = sStartOcc;
- PP.m_fcEndOcc = sEndOcc;
-
- PP.m_scoreGapOpen = sStartOcc*g_scoreGapOpen/2;
- PP.m_scoreGapClose = sEndOcc*g_scoreGapOpen/2;
-#if DOUBLE_AFFINE
- PP.m_scoreGapOpen2 = sStartOcc*g_scoreGapOpen2/2;
- PP.m_scoreGapClose2 = sEndOcc*g_scoreGapOpen2/2;
-#endif
-// PP.m_scoreGapExtend = (SCORE) ((1.0 - fcGapExtend)*scoreGapExtend);
-
-#if PAF
- if (ALHPA_Amino == g_Alpha && sStartOcc > 0.5)
- {
- extern SCORE PAFactor(const FCOUNT fcCounts[]);
- SCORE paf = PAFactor(PP.m_fcCounts);
- PP.m_scoreGapOpen *= paf;
- PP.m_scoreGapClose *= paf;
- }
-#endif
- }
-
-#if HYDRO
- if (ALPHA_Amino == g_Alpha)
- Hydro(Pos, uColCount);
-#endif
-
-#if TRACE
- {
- Log("ProfileFromMSA\n");
- ListProfile(Pos, uColCount, &a);
- }
-#endif
- return Pos;
- }
Deleted: trunk/packages/muscle/trunk/progalign.cpp
===================================================================
--- trunk/packages/muscle/trunk/progalign.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/progalign.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,206 +0,0 @@
-#include "muscle.h"
-#include "tree.h"
-#include "seqvect.h"
-#include "profile.h"
-#include "msa.h"
-#include "pwpath.h"
-#include "distfunc.h"
-#include "textfile.h"
-#include "estring.h"
-
-#define TRACE 0
-#define VALIDATE 0
-#define TRACE_LENGTH_DELTA 0
-
-static void LogLeafNames(const Tree &tree, unsigned uNodeIndex)
- {
- const unsigned uNodeCount = tree.GetNodeCount();
- unsigned *Leaves = new unsigned[uNodeCount];
- unsigned uLeafCount;
- GetLeaves(tree, uNodeIndex, Leaves, &uLeafCount);
- for (unsigned i = 0; i < uLeafCount; ++i)
- {
- if (i > 0)
- Log(",");
- Log("%s", tree.GetLeafName(Leaves[i]));
- }
- delete[] Leaves;
- }
-
-ProgNode *ProgressiveAlignE(const SeqVect &v, const Tree &GuideTree, MSA &a)
- {
- assert(GuideTree.IsRooted());
-
-#if TRACE
- Log("GuideTree:\n");
- GuideTree.LogMe();
-#endif
-
- const unsigned uSeqCount = v.Length();
- const unsigned uNodeCount = 2*uSeqCount - 1;
- const unsigned uIterCount = uSeqCount - 1;
-
- WEIGHT *Weights = new WEIGHT[uSeqCount];
- CalcClustalWWeights(GuideTree, Weights);
-
- ProgNode *ProgNodes = new ProgNode[uNodeCount];
-
- unsigned uJoin = 0;
- unsigned uTreeNodeIndex = GuideTree.FirstDepthFirstNode();
- SetProgressDesc("Align node");
- do
- {
- if (GuideTree.IsLeaf(uTreeNodeIndex))
- {
- if (uTreeNodeIndex >= uNodeCount)
- Quit("TreeNodeIndex=%u NodeCount=%u\n", uTreeNodeIndex, uNodeCount);
- ProgNode &Node = ProgNodes[uTreeNodeIndex];
- unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);
- if (uId >= uSeqCount)
- Quit("Seq index out of range");
- const Seq &s = *(v[uId]);
- Node.m_MSA.FromSeq(s);
- Node.m_MSA.SetSeqId(0, uId);
- Node.m_uLength = Node.m_MSA.GetColCount();
- Node.m_Weight = Weights[uId];
- // TODO: Term gaps settable
- Node.m_Prof = ProfileFromMSA(Node.m_MSA);
- Node.m_EstringL = 0;
- Node.m_EstringR = 0;
-#if TRACE
- Log("Leaf id=%u\n", uId);
- Log("MSA=\n");
- Node.m_MSA.LogMe();
- Log("Profile (from MSA)=\n");
- ListProfile(Node.m_Prof, Node.m_uLength, &Node.m_MSA);
-#endif
- }
- else
- {
- Progress(uJoin, uSeqCount - 1);
- ++uJoin;
-
- const unsigned uMergeNodeIndex = uTreeNodeIndex;
- ProgNode &Parent = ProgNodes[uMergeNodeIndex];
-
- const unsigned uLeft = GuideTree.GetLeft(uTreeNodeIndex);
- const unsigned uRight = GuideTree.GetRight(uTreeNodeIndex);
-
- if (g_bVerbose)
- {
- Log("Align: (");
- LogLeafNames(GuideTree, uLeft);
- Log(") (");
- LogLeafNames(GuideTree, uRight);
- Log(")\n");
- }
-
- ProgNode &Node1 = ProgNodes[uLeft];
- ProgNode &Node2 = ProgNodes[uRight];
-
-#if TRACE
- Log("AlignTwoMSAs:\n");
-#endif
- AlignTwoProfs(
- Node1.m_Prof, Node1.m_uLength, Node1.m_Weight,
- Node2.m_Prof, Node2.m_uLength, Node2.m_Weight,
- Parent.m_Path,
- &Parent.m_Prof, &Parent.m_uLength);
-#if TRACE_LENGTH_DELTA
- {
- unsigned L = Node1.m_uLength;
- unsigned R = Node2.m_uLength;
- unsigned P = Parent.m_Path.GetEdgeCount();
- unsigned Max = L > R ? L : R;
- unsigned d = P - Max;
- Log("LD%u;%u;%u;%u\n", L, R, P, d);
- }
-#endif
- PathToEstrings(Parent.m_Path, &Parent.m_EstringL, &Parent.m_EstringR);
-
- Parent.m_Weight = Node1.m_Weight + Node2.m_Weight;
-
-#if VALIDATE
- {
-#if TRACE
- Log("AlignTwoMSAs:\n");
-#endif
- PWPath TmpPath;
- AlignTwoMSAs(Node1.m_MSA, Node2.m_MSA, Parent.m_MSA, TmpPath);
- ProfPos *P1 = ProfileFromMSA(Node1.m_MSA, true);
- ProfPos *P2 = ProfileFromMSA(Node2.m_MSA, true);
- unsigned uLength = Parent.m_MSA.GetColCount();
- ProfPos *TmpProf = ProfileFromMSA(Parent.m_MSA, true);
-
-#if TRACE
- Log("Node1 MSA=\n");
- Node1.m_MSA.LogMe();
-
- Log("Node1 prof=\n");
- ListProfile(Node1.m_Prof, Node1.m_MSA.GetColCount(), &Node1.m_MSA);
- Log("Node1 prof (from MSA)=\n");
- ListProfile(P1, Node1.m_MSA.GetColCount(), &Node1.m_MSA);
-
- AssertProfsEq(Node1.m_Prof, Node1.m_uLength, P1, Node1.m_MSA.GetColCount());
-
- Log("Node2 prof=\n");
- ListProfile(Node2.m_Prof, Node2.m_MSA.GetColCount(), &Node2.m_MSA);
-
- Log("Node2 MSA=\n");
- Node2.m_MSA.LogMe();
-
- Log("Node2 prof (from MSA)=\n");
- ListProfile(P2, Node2.m_MSA.GetColCount(), &Node2.m_MSA);
-
- AssertProfsEq(Node2.m_Prof, Node2.m_uLength, P2, Node2.m_MSA.GetColCount());
-
- TmpPath.AssertEqual(Parent.m_Path);
-
- Log("Parent MSA=\n");
- Parent.m_MSA.LogMe();
-
- Log("Parent prof=\n");
- ListProfile(Parent.m_Prof, Parent.m_uLength, &Parent.m_MSA);
-
- Log("Parent prof (from MSA)=\n");
- ListProfile(TmpProf, Parent.m_MSA.GetColCount(), &Parent.m_MSA);
-
-#endif // TRACE
- AssertProfsEq(Parent.m_Prof, Parent.m_uLength,
- TmpProf, Parent.m_MSA.GetColCount());
- delete[] P1;
- delete[] P2;
- delete[] TmpProf;
- }
-#endif // VALIDATE
-
- Node1.m_MSA.Clear();
- Node2.m_MSA.Clear();
-
- // Don't delete profiles, may need them for tree refinement.
- //delete[] Node1.m_Prof;
- //delete[] Node2.m_Prof;
- //Node1.m_Prof = 0;
- //Node2.m_Prof = 0;
- }
- uTreeNodeIndex = GuideTree.NextDepthFirstNode(uTreeNodeIndex);
- }
- while (NULL_NEIGHBOR != uTreeNodeIndex);
- ProgressStepsDone();
-
- if (g_bBrenner)
- MakeRootMSABrenner((SeqVect &) v, GuideTree, ProgNodes, a);
- else
- MakeRootMSA(v, GuideTree, ProgNodes, a);
-
-#if VALIDATE
- {
- unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
- const ProgNode &RootProgNode = ProgNodes[uRootNodeIndex];
- AssertMSAEq(a, RootProgNode.m_MSA);
- }
-#endif
-
- delete[] Weights;
- return ProgNodes;
- }
Deleted: trunk/packages/muscle/trunk/progress.cpp
===================================================================
--- trunk/packages/muscle/trunk/progress.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/progress.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,186 +0,0 @@
-#include "muscle.h"
-#include <stdio.h>
-#include <time.h>
-
-// Functions that provide visible feedback to the user
-// that progress is being made.
-
-static unsigned g_uIter = 0; // Main MUSCLE iteration 1, 2..
-static unsigned g_uLocalMaxIters = 0; // Max iters
-static FILE *g_fProgress = stderr; // Default to standard error
-static char g_strFileName[32]; // File name
-static time_t g_tLocalStart; // Start time
-static char g_strDesc[32]; // Description
-static bool g_bWipeDesc = false;
-static int g_nPrevDescLength;
-static unsigned g_uTotalSteps;
-
-double GetCheckMemUseMB()
- {
- unsigned MB = (unsigned) GetMemUseMB();
- if (0 == g_uMaxMB || MB <= g_uMaxMB)
- return MB;
- fprintf(stderr, "\n\n*** MAX MEMORY %u MB EXCEEDED***\n", g_uMaxMB);
- fprintf(stderr, "Memory allocated so far %u MB, physical RAM %u MB\n",
- MB, (unsigned) GetRAMSizeMB());
- fprintf(stderr, "Use -maxmb <n> option to increase limit, where <n> is in MB.\n");
- SaveCurrentAlignment();
- exit(EXIT_FatalError);
- return MB;
- }
-
-const char *ElapsedTimeAsStr()
- {
- time_t Now = time(0);
- unsigned long ElapsedSecs = (unsigned long) (Now - g_tLocalStart);
- return SecsToStr(ElapsedSecs);
- }
-
-const char *MemToStr(double MB)
- {
- if (MB < 0)
- return "";
-
- static char Str[9];
- static double MaxMB = 0;
- static double RAMMB = 0;
-
- if (RAMMB == 0)
- RAMMB = GetRAMSizeMB();
-
- if (MB > MaxMB)
- MaxMB = MB;
- double Pct = (MaxMB*100.0)/RAMMB;
- if (Pct > 100)
- Pct = 100;
- sprintf(Str, "%.0f MB(%.0f%%)", MaxMB, Pct);
- return Str;
- }
-
-void SetInputFileName(const char *pstrFileName)
- {
- NameFromPath(pstrFileName, g_strFileName, sizeof(g_strFileName));
- }
-
-void SetSeqStats(unsigned uSeqCount, unsigned uMaxL, unsigned uAvgL)
- {
- if (g_bQuiet)
- return;
-
- fprintf(g_fProgress, "%s %u seqs, max length %u, avg length %u\n",
- g_strFileName, uSeqCount, uMaxL, uAvgL);
- if (g_bVerbose)
- Log("%u seqs, max length %u, avg length %u\n",
- uSeqCount, uMaxL, uAvgL);
- }
-
-void SetStartTime()
- {
- time(&g_tLocalStart);
- }
-
-unsigned long GetStartTime()
- {
- return (unsigned long) g_tLocalStart;
- }
-
-void SetIter(unsigned uIter)
- {
- g_uIter = uIter;
- }
-
-void IncIter()
- {
- ++g_uIter;
- }
-
-void SetMaxIters(unsigned uMaxIters)
- {
- g_uLocalMaxIters = uMaxIters;
- }
-
-void SetProgressDesc(const char szDesc[])
- {
- strncpy(g_strDesc, szDesc, sizeof(g_strDesc));
- g_strDesc[sizeof(g_strDesc) - 1] = 0;
- }
-
-static void Wipe(int n)
- {
- for (int i = 0; i < n; ++i)
- fprintf(g_fProgress, " ");
- }
-
-void Progress(const char *szFormat, ...)
- {
- CheckMaxTime();
-
- if (g_bQuiet)
- return;
-
- double MB = GetCheckMemUseMB();
-
- char szStr[4096];
- va_list ArgList;
- va_start(ArgList, szFormat);
- vsprintf(szStr, szFormat, ArgList);
-
- fprintf(g_fProgress, "\n%8.8s %12s %s",
- ElapsedTimeAsStr(),
- MemToStr(MB),
- szStr);
-
- fprintf(g_fProgress, "\n");
- fflush(g_fProgress);
- }
-
-void Progress(unsigned uStep, unsigned uTotalSteps)
- {
- CheckMaxTime();
-
- if (g_bQuiet)
- return;
-
- double dPct = ((uStep + 1)*100.0)/uTotalSteps;
- double MB = GetCheckMemUseMB();
- fprintf(g_fProgress, "%8.8s %12s Iter %3u %6.2f%% %s",
- ElapsedTimeAsStr(),
- MemToStr(MB),
- g_uIter,
- dPct,
- g_strDesc);
-
- if (g_bWipeDesc)
- {
- int n = g_nPrevDescLength - (int) strlen(g_strDesc);
- Wipe(n);
- g_bWipeDesc = false;
- }
-
- fprintf(g_fProgress, "\r");
-
- g_uTotalSteps = uTotalSteps;
- }
-
-void ProgressStepsDone()
- {
- CheckMaxTime();
-
- if (g_bVerbose)
- {
- double MB = GetCheckMemUseMB();
- Log("Elapsed time %8.8s Peak memory use %12s Iteration %3u %s\n",
- ElapsedTimeAsStr(),
- MemToStr(MB),
- g_uIter,
- g_strDesc);
- }
-
- if (g_bQuiet)
- return;
-
- Progress(g_uTotalSteps - 1, g_uTotalSteps);
- fprintf(g_fProgress, "\n");
- g_bWipeDesc = true;
- g_nPrevDescLength = (int) strlen(g_strDesc);
- }
Deleted: trunk/packages/muscle/trunk/progressivealign.cpp
===================================================================
--- trunk/packages/muscle/trunk/progressivealign.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/progressivealign.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,76 +0,0 @@
-#include "muscle.h"
-#include <math.h>
-#include "tree.h"
-#include "seqvect.h"
-#include "profile.h"
-#include "msa.h"
-#include "pwpath.h"
-#include "distfunc.h"
-
-#define TRACE 0
-
-void ProgressiveAlign(const SeqVect &v, const Tree &GuideTree, MSA &a)
- {
- assert(GuideTree.IsRooted());
-
-#if TRACE
- Log("GuideTree:\n");
- GuideTree.LogMe();
-#endif
-
- const unsigned uSeqCount = v.Length();
- const unsigned uNodeCount = 2*uSeqCount - 1;
-
- ProgNode *ProgNodes = new ProgNode[uNodeCount];
-
- unsigned uJoin = 0;
- unsigned uTreeNodeIndex = GuideTree.FirstDepthFirstNode();
- SetProgressDesc("Align node");
- do
- {
- if (GuideTree.IsLeaf(uTreeNodeIndex))
- {
- if (uTreeNodeIndex >= uNodeCount)
- Quit("TreeNodeIndex=%u NodeCount=%u\n", uTreeNodeIndex, uNodeCount);
- ProgNode &Node = ProgNodes[uTreeNodeIndex];
- unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);
- if (uId >= uSeqCount)
- Quit("Seq index out of range");
- const Seq &s = *(v[uId]);
- Node.m_MSA.FromSeq(s);
- Node.m_MSA.SetSeqId(0, uId);
- Node.m_uLength = Node.m_MSA.GetColCount();
- }
- else
- {
- Progress(uJoin, uSeqCount - 1);
- ++uJoin;
-
- const unsigned uMergeNodeIndex = uTreeNodeIndex;
- ProgNode &Parent = ProgNodes[uMergeNodeIndex];
-
- const unsigned uLeft = GuideTree.GetLeft(uTreeNodeIndex);
- const unsigned uRight = GuideTree.GetRight(uTreeNodeIndex);
-
- ProgNode &Node1 = ProgNodes[uLeft];
- ProgNode &Node2 = ProgNodes[uRight];
-
- PWPath Path;
- AlignTwoMSAs(Node1.m_MSA, Node2.m_MSA, Parent.m_MSA, Path);
- Parent.m_uLength = Parent.m_MSA.GetColCount();
-
- Node1.m_MSA.Clear();
- Node2.m_MSA.Clear();
- }
- uTreeNodeIndex = GuideTree.NextDepthFirstNode(uTreeNodeIndex);
- }
- while (NULL_NEIGHBOR != uTreeNodeIndex);
- ProgressStepsDone();
-
- unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
- const ProgNode &RootProgNode = ProgNodes[uRootNodeIndex];
- a.Copy(RootProgNode.m_MSA);
-
- delete[] ProgNodes;
- ProgNodes = 0;
- }
Deleted: trunk/packages/muscle/trunk/pwpath.cpp
===================================================================
--- trunk/packages/muscle/trunk/pwpath.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/pwpath.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,386 +0,0 @@
-#include "muscle.h"
-#include "pwpath.h"
-#include "seq.h"
-#include "textfile.h"
-#include "msa.h"
-
-PWPath::PWPath()
- {
- m_uArraySize = 0;
- m_uEdgeCount = 0;
- m_Edges = 0;
- }
-
-PWPath::~PWPath()
- {
- Clear();
- }
-
-void PWPath::Clear()
- {
- delete[] m_Edges;
- m_Edges = 0;
- m_uArraySize = 0;
- m_uEdgeCount = 0;
- }
-
-void PWPath::ExpandPath(unsigned uAdditionalEdgeCount)
- {
- PWEdge *OldPath = m_Edges;
- unsigned uEdgeCount = m_uArraySize + uAdditionalEdgeCount;
-
- m_Edges = new PWEdge[uEdgeCount];
- m_uArraySize = uEdgeCount;
- if (m_uEdgeCount > 0)
- memcpy(m_Edges, OldPath, m_uEdgeCount*sizeof(PWEdge));
- delete[] OldPath;
- }
-
-void PWPath::AppendEdge(const PWEdge &Edge)
- {
- if (0 == m_uArraySize || m_uEdgeCount + 1 == m_uArraySize)
- ExpandPath(200);
-
- m_Edges[m_uEdgeCount] = Edge;
- ++m_uEdgeCount;
- }
-
-void PWPath::AppendEdge(char cType, unsigned uPrefixLengthA, unsigned uPrefixLengthB)
- {
- PWEdge e;
- e.uPrefixLengthA = uPrefixLengthA;
- e.uPrefixLengthB = uPrefixLengthB;
- e.cType = cType;
- AppendEdge(e);
- }
-
-void PWPath::PrependEdge(const PWEdge &Edge)
- {
- if (0 == m_uArraySize || m_uEdgeCount + 1 == m_uArraySize)
- ExpandPath(1000);
- if (m_uEdgeCount > 0)
- memmove(m_Edges + 1, m_Edges, sizeof(PWEdge)*m_uEdgeCount);
- m_Edges[0] = Edge;
- ++m_uEdgeCount;
- }
-
-const PWEdge &PWPath::GetEdge(unsigned uEdgeIndex) const
- {
- assert(uEdgeIndex < m_uEdgeCount);
- return m_Edges[uEdgeIndex];
- }
-
-void PWPath::Validate() const
- {
- const unsigned uEdgeCount = GetEdgeCount();
- if (0 == uEdgeCount)
- return;
- const PWEdge &FirstEdge = GetEdge(0);
- const PWEdge &LastEdge = GetEdge(uEdgeCount - 1);
- unsigned uStartA = FirstEdge.uPrefixLengthA;
- unsigned uStartB = FirstEdge.uPrefixLengthB;
- if (FirstEdge.cType != 'I')
- --uStartA;
- if (FirstEdge.cType != 'D')
- --uStartB;
-
- unsigned uPrefixLengthA = FirstEdge.uPrefixLengthA;
- unsigned uPrefixLengthB = FirstEdge.uPrefixLengthB;
- for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const PWEdge &Edge = GetEdge(uEdgeIndex);
- switch (Edge.cType)
- {
- case 'M':
- if (uPrefixLengthA + 1 != Edge.uPrefixLengthA)
- Quit("PWPath::Validate MA %u", uPrefixLengthA);
- if (uPrefixLengthB + 1 != Edge.uPrefixLengthB)
- Quit("PWPath::Validate MB %u", uPrefixLengthB);
- ++uPrefixLengthA;
- ++uPrefixLengthB;
- break;
- case 'D':
- if (uPrefixLengthA + 1 != Edge.uPrefixLengthA)
- Quit("PWPath::Validate DA %u", uPrefixLengthA);
- if (uPrefixLengthB != Edge.uPrefixLengthB)
- Quit("PWPath::Validate DB %u", uPrefixLengthB);
- ++uPrefixLengthA;
- break;
- case 'I':
- if (uPrefixLengthA != Edge.uPrefixLengthA)
- Quit("PWPath::Validate IA %u", uPrefixLengthA);
- if (uPrefixLengthB + 1 != Edge.uPrefixLengthB)
- Quit("PWPath::Validate IB %u", uPrefixLengthB);
- ++uPrefixLengthB;
- break;
- }
- }
- }
-
-void PWPath::LogMe() const
- {
- for (unsigned uEdgeIndex = 0; uEdgeIndex < GetEdgeCount(); ++uEdgeIndex)
- {
- const PWEdge &Edge = GetEdge(uEdgeIndex);
- if (uEdgeIndex > 0)
- Log(" ");
- Log("%c%d.%d",
- Edge.cType,
- Edge.uPrefixLengthA,
- Edge.uPrefixLengthB);
- if ((uEdgeIndex > 0 && uEdgeIndex%10 == 0) ||
- uEdgeIndex == GetEdgeCount() - 1)
- Log("\n");
- }
- }
-
-void PWPath::Copy(const PWPath &Path)
- {
- Clear();
- const unsigned uEdgeCount = Path.GetEdgeCount();
- for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
- AppendEdge(Edge);
- }
- }
-
-void PWPath::FromMSAPair(const MSA &msaA, const MSA &msaB)
- {
- const unsigned uColCount = msaA.GetColCount();
- if (uColCount != msaB.GetColCount())
- Quit("PWPath::FromMSAPair, lengths differ");
-
- Clear();
-
- unsigned uPrefixLengthA = 0;
- unsigned uPrefixLengthB = 0;
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- bool bIsGapA = msaA.IsGapColumn(uColIndex);
- bool bIsGapB = msaB.IsGapColumn(uColIndex);
-
- PWEdge Edge;
- char cType;
- if (!bIsGapA && !bIsGapB)
- {
- cType = 'M';
- ++uPrefixLengthA;
- ++uPrefixLengthB;
- }
- else if (bIsGapA && !bIsGapB)
- {
- cType = 'I';
- ++uPrefixLengthB;
- }
- else if (!bIsGapA && bIsGapB)
- {
- cType = 'D';
- ++uPrefixLengthA;
- }
- else
- {
- assert(bIsGapB && bIsGapA);
- continue;
- }
-
- Edge.cType = cType;
- Edge.uPrefixLengthA = uPrefixLengthA;
- Edge.uPrefixLengthB = uPrefixLengthB;
- AppendEdge(Edge);
- }
- }
-
-// Very similar to HMMPath::FromFile, should consolidate.
-void PWPath::FromFile(TextFile &File)
- {
- Clear();
- char szToken[1024];
- File.GetTokenX(szToken, sizeof(szToken));
- if (0 != strcmp(szToken, "Path"))
- Quit("Invalid path file (Path)");
-
- File.GetTokenX(szToken, sizeof(szToken));
- if (0 != strcmp(szToken, "edges"))
- Quit("Invalid path file (edges)");
-
- File.GetTokenX(szToken, sizeof(szToken));
- if (!IsValidInteger(szToken))
- Quit("Invalid path file (edges value)");
-
- const unsigned uEdgeCount = (unsigned) atoi(szToken);
- unsigned uEdgeIndex = 0;
- for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- // index
- File.GetTokenX(szToken, sizeof(szToken));
- if (!IsValidInteger(szToken))
- Quit("Invalid path file, invalid index '%s'", szToken);
- unsigned n = (unsigned) atoi(szToken);
- if (n != uEdgeIndex)
- Quit("Invalid path file, expecting edge %u got %u", uEdgeIndex, n);
-
- // type
- File.GetTokenX(szToken, sizeof(szToken));
- if (1 != strlen(szToken))
- Quit("Invalid path file, expecting state, got '%s'", szToken);
- const char cType = szToken[0];
- if ('M' != cType && 'D' != cType && cType != 'I' && 'S' != cType)
- Quit("Invalid path file, expecting state, got '%c'", cType);
-
- // prefix length A
- File.GetTokenX(szToken, sizeof(szToken));
- if (!IsValidInteger(szToken))
- Quit("Invalid path file, bad prefix length A '%s'", szToken);
- const unsigned uPrefixLengthA = (unsigned) atoi(szToken);
-
- // prefix length B
- File.GetTokenX(szToken, sizeof(szToken));
- if (!IsValidInteger(szToken))
- Quit("Invalid path file, bad prefix length B '%s'", szToken);
- const unsigned uPrefixLengthB = (unsigned) atoi(szToken);
-
- PWEdge Edge;
- Edge.cType = cType;
- Edge.uPrefixLengthA = uPrefixLengthA;
- Edge.uPrefixLengthB = uPrefixLengthB;
- AppendEdge(Edge);
- }
- File.GetTokenX(szToken, sizeof(szToken));
- if (0 != strcmp(szToken, "//"))
- Quit("Invalid path file (//)");
- }
-
-void PWPath::ToFile(TextFile &File) const
- {
- const unsigned uEdgeCount = GetEdgeCount();
-
- File.PutString("Path\n");
- File.PutFormat("edges %u\n", uEdgeCount);
- for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const PWEdge &Edge = GetEdge(uEdgeIndex);
- File.PutFormat("%u %c %u %u\n",
- uEdgeIndex,
- Edge.cType,
- Edge.uPrefixLengthA,
- Edge.uPrefixLengthB);
- }
- File.PutString("//\n");
- }
-
-void PWPath::AssertEqual(const PWPath &Path) const
- {
- const unsigned uEdgeCount = GetEdgeCount();
- if (uEdgeCount != Path.GetEdgeCount())
- {
- Log("PWPath::AssertEqual, this=\n");
- LogMe();
- Log("\nOther path=\n");
- Path.LogMe();
- Log("\n");
- Quit("PWPath::AssertEqual, Edge count different %u %u\n",
- uEdgeCount, Path.GetEdgeCount());
- }
-
- for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const PWEdge &e1 = GetEdge(uEdgeIndex);
- const PWEdge &e2 = Path.GetEdge(uEdgeIndex);
- if (e1.cType != e2.cType || e1.uPrefixLengthA != e2.uPrefixLengthA ||
- e1.uPrefixLengthB != e2.uPrefixLengthB)
- {
- Log("PWPath::AssertEqual, this=\n");
- LogMe();
- Log("\nOther path=\n");
- Path.LogMe();
- Log("\n");
- Log("This edge %c%u.%u, other edge %c%u.%u\n",
- e1.cType, e1.uPrefixLengthA, e1.uPrefixLengthB,
- e2.cType, e2.uPrefixLengthA, e2.uPrefixLengthB);
- Quit("PWPath::AssertEqual, edge %u different\n", uEdgeIndex);
- }
- }
- }
-
-bool PWPath::Equal(const PWPath &Path) const
- {
- const unsigned uEdgeCount = GetEdgeCount();
- if (uEdgeCount != Path.GetEdgeCount())
- return false;
-
- for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const PWEdge &e1 = GetEdge(uEdgeIndex);
- const PWEdge &e2 = Path.GetEdge(uEdgeIndex);
- if (e1.cType != e2.cType || e1.uPrefixLengthA != e2.uPrefixLengthA ||
- e1.uPrefixLengthB != e2.uPrefixLengthB)
- return false;
- }
- return true;
- }
-
-unsigned PWPath::GetMatchCount() const
- {
- unsigned uMatchCount = 0;
- const unsigned uEdgeCount = GetEdgeCount();
- for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const PWEdge &e = GetEdge(uEdgeIndex);
- if ('M' == e.cType)
- ++uMatchCount;
- }
- return uMatchCount;
- }
-
-unsigned PWPath::GetInsertCount() const
- {
- unsigned uInsertCount = 0;
- const unsigned uEdgeCount = GetEdgeCount();
- for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const PWEdge &e = GetEdge(uEdgeIndex);
- if ('I' == e.cType)
- ++uInsertCount;
- }
- return uInsertCount;
- }
-
-unsigned PWPath::GetDeleteCount() const
- {
- unsigned uDeleteCount = 0;
- const unsigned uEdgeCount = GetEdgeCount();
- for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
- {
- const PWEdge &e = GetEdge(uEdgeIndex);
- if ('D' == e.cType)
- ++uDeleteCount;
- }
- return uDeleteCount;
- }
-
-void PWPath::FromStr(const char Str[])
- {
- Clear();
- unsigned uPrefixLengthA = 0;
- unsigned uPrefixLengthB = 0;
- while (char c = *Str++)
- {
- switch (c)
- {
- case 'M':
- ++uPrefixLengthA;
- ++uPrefixLengthB;
- break;
- case 'D':
- ++uPrefixLengthA;
- break;
- case 'I':
- ++uPrefixLengthB;
- break;
- default:
- Quit("PWPath::FromStr, invalid state %c", c);
- }
- AppendEdge(c, uPrefixLengthA, uPrefixLengthB);
- }
- }
Deleted: trunk/packages/muscle/trunk/pwpath.h
===================================================================
--- trunk/packages/muscle/trunk/pwpath.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/pwpath.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,100 +0,0 @@
-#ifndef PWPath_h
-#define PWPath_h
-
-/***
-Each PWEdge in a PWPath specifies a column in a pair-wise (PW) alignment.
-"Path" is by analogy with the path through an HMM.
-Edge types are:
-
- 'M' LetterA + LetterB
- 'D' LetterA + GapB
- 'I' GapB + LetterA
-
-The mnemomic is Match, Delete, Insert (with respect to A).
-Here is a global alignment of sequences A and B.
-
- A: AMQT-F
- B: -M-TIF
-
-The path for this example is:
-
- Edge cType uPrefixLengthA uPrefixLengthB
- 0 D 1 0
- 1 M 2 1
- 2 D 3 1
- 3 M 4 2
- 4 I 4 3
- 5 M 5 4
-
-Given the starting positions in each alignment (e.g., column zero for
-a global alignment), the prefix length fields are redundant; they are
-included only for convenience and as a sanity check, we are not trying
-to optimize for speed or space here. We use prefix lengths rather than
-column indexes because of the problem of representing the special case
-of a gap in the first position.
-***/
-
-class Seq;
-class MSA;
-class SatchmoParams;
-class PW;
-class TextFile;
-class PWScore;
-
-class PWEdge
- {
-public:
- char cType;
- unsigned uPrefixLengthA;
- unsigned uPrefixLengthB;
-
- bool Equal(const PWEdge &e) const
- {
- return uPrefixLengthA == e.uPrefixLengthA &&
- uPrefixLengthB == e.uPrefixLengthB &&
- cType == e.cType;
- }
- };
-
-class PWPath
- {
-// Disable compiler defaults
-private:
- PWPath &operator=(const PWPath &rhs);
- PWPath(const PWPath &rhs);
-
-public:
- PWPath();
- virtual ~PWPath();
-
-public:
- void Clear();
- void FromStr(const char Str[]);
- void Copy(const PWPath &Path);
- void AppendEdge(const PWEdge &Edge);
- void AppendEdge(char cType, unsigned uPrefixLengthA, unsigned uPrefixLengthB);
- void PrependEdge(const PWEdge &Edge);
- unsigned GetEdgeCount() const { return m_uEdgeCount; }
- const PWEdge &GetEdge(unsigned uEdgeIndex) const;
- void Validate(const PWScore &PWS) const;
- void Validate() const;
- void LogMe() const;
- void FromFile(TextFile &File);
- void ToFile(TextFile &File) const;
- void FromMSAPair(const MSA &msaA, const MSA &msaB);
- void AssertEqual(const PWPath &Path) const;
- bool Equal(const PWPath &Path) const;
- unsigned GetMatchCount() const;
- unsigned GetDeleteCount() const;
- unsigned GetInsertCount() const;
-
-private:
- void ExpandPath(unsigned uAdditionalEdgeCount);
-
-private:
- unsigned m_uEdgeCount;
- unsigned m_uArraySize;
- PWEdge *m_Edges;
- };
-
-#endif // PWPath_h
Deleted: trunk/packages/muscle/trunk/readmx.cpp
===================================================================
--- trunk/packages/muscle/trunk/readmx.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/readmx.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,156 +0,0 @@
-#include "muscle.h"
-#include "textfile.h"
-
-#define TRACE 0
-
-const int MAX_LINE = 4096;
-const int MAX_HEADINGS = 32;
-static char Heading[MAX_HEADINGS];
-static unsigned HeadingCount = 0;
-static float Mx[32][32];
-
-static void LogMx()
- {
- Log("Matrix\n");
- Log(" ");
- for (int i = 0; i < 20; ++i)
- Log(" %c", LetterToChar(i));
- Log("\n");
-
- for (int i = 0; i < 20; ++i)
- {
- Log("%c ", LetterToChar(i));
- for (int j = 0; j < 20; ++j)
- Log("%5.1f", Mx[i][j]);
- Log("\n");
- }
- Log("\n");
- }
-
-static unsigned MxCharToLetter(char c)
- {
- for (unsigned Letter = 0; Letter < HeadingCount; ++Letter)
- if (Heading[Letter] == c)
- return Letter;
- Quit("Letter '%c' has no heading", c);
- return 0;
- }
-
-PTR_SCOREMATRIX ReadMx(TextFile &File)
- {
-// Find column headers
- char Line[MAX_LINE];
- for (;;)
- {
- bool EndOfFile = File.GetLine(Line, sizeof(Line));
- if (EndOfFile)
- Quit("Premature EOF in matrix file");
-
- if (Line[0] == '#')
- continue;
- else if (Line[0] == ' ')
- break;
- else
- Quit("Invalid line in matrix file: '%s'", Line);
- }
-
-// Read column headers
- HeadingCount = 0;
- for (char *p = Line; *p; ++p)
- {
- char c = *p;
- if (!isspace(c))
- Heading[HeadingCount++] = c;
- }
-
- if (HeadingCount > 0 && Heading[HeadingCount-1] == '*')
- --HeadingCount;
-
- if (HeadingCount < 20)
- Quit("Error in matrix file: < 20 headers, line='%s'", Line);
-
-#if TRACE
- {
- Log("ReadMx\n");
- Log("%d headings: ", HeadingCount);
- for (unsigned i = 0; i < HeadingCount; ++i)
- Log("%c", Heading[i]);
- Log("\n");
- }
-#endif
-
-// Zero out matrix
- for (int i = 0; i < MAX_ALPHA; ++i)
- for (int j = 0; j < MAX_ALPHA; ++j)
- Mx[i][j] = 0.0;
-
-// Read data lines
- for (unsigned RowIndex = 0; RowIndex < HeadingCount; ++RowIndex)
- {
- bool EndOfFile = File.GetTrimLine(Line, sizeof(Line));
- if (EndOfFile)
- Quit("Premature EOF in matrix file");
-
-#if TRACE
- Log("Line=%s\n", Line);
-#endif
- if (Line[0] == '#')
- continue;
-
- char c = Line[0];
-#if TRACE
- Log("Row char=%c\n", c);
-#endif
- if (!IsResidueChar(c))
- continue;
-
- unsigned RowLetter = CharToLetter(c);
-#if TRACE
- Log("Row letter = %u\n", RowLetter);
-#endif
-
- char *p = Line + 1;
- char *maxp = p + strlen(Line);
- for (unsigned Col = 0; Col < HeadingCount - 1; ++Col)
- {
- if (p >= maxp)
- Quit("Too few fields in line of matrix file: '%s'", Line);
- while (isspace(*p))
- ++p;
- char *Value = p;
- while (!isspace(*p))
- ++p;
- float v = (float) atof(Value);
- char HeaderChar = Heading[Col];
- if (IsResidueChar(HeaderChar))
- {
- unsigned ColLetter = CharToLetter(HeaderChar);
- Mx[RowLetter][ColLetter] = v;
- }
- p += 1;
- }
- }
-
-// Sanity check for symmetry
- for (int i = 0; i < 20; ++i)
- for (int j = 0; j < i; ++j)
- {
- if (Mx[i][j] != Mx[j][i])
- {
- Warning("Matrix is not symmetrical, %c->%c=%g, %c->%c=%g",
- CharToLetter(i),
- CharToLetter(j),
- Mx[i][j],
- CharToLetter(j),
- CharToLetter(i),
- Mx[j][i]);
- goto ExitLoop;
- }
- }
-ExitLoop:;
-
- if (g_bVerbose)
- LogMx();
-
- return &Mx;
- }
Deleted: trunk/packages/muscle/trunk/realigndiffs.cpp
===================================================================
--- trunk/packages/muscle/trunk/realigndiffs.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/realigndiffs.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,115 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "tree.h"
-#include "profile.h"
-#include "pwpath.h"
-
-#define TRACE 0
-
-// Progressive alignment according to a diffs tree.
-
-static void MakeNode(const MSA &msaIn, const Tree &Diffs, unsigned uDiffsNodeIndex,
- const unsigned IdToDiffsTreeNodeIndex[], ProgNode &Node)
- {
- const unsigned uSeqCount = msaIn.GetSeqCount();
-
- unsigned *Ids = new unsigned[uSeqCount];
-
- unsigned uSeqsInDiffCount = 0;
- for (unsigned uId = 0; uId < uSeqCount; ++uId)
- {
- if (IdToDiffsTreeNodeIndex[uId] == uDiffsNodeIndex)
- {
- Ids[uSeqsInDiffCount] = uId;
- ++uSeqsInDiffCount;
- }
- }
- if (0 == uSeqsInDiffCount)
- Quit("MakeNode: no seqs in diff");
-
- MSASubsetByIds(msaIn, Ids, uSeqsInDiffCount, Node.m_MSA);
-
-#if DEBUG
- ValidateMuscleIds(Node.m_MSA);
-#endif
-
- DeleteGappedCols(Node.m_MSA);
- delete[] Ids;
- }
-
-void RealignDiffs(const MSA &msaIn, const Tree &Diffs,
- const unsigned IdToDiffsTreeNodeIndex[], MSA &msaOut)
- {
- assert(Diffs.IsRooted());
-
-#if TRACE
- Log("RealignDiffs\n");
- Log("Diff tree:\n");
- Diffs.LogMe();
-#endif
-
- const unsigned uNodeCount = Diffs.GetNodeCount();
- if (uNodeCount%2 == 0)
- Quit("RealignDiffs: Expected odd number of nodes");
-
- const unsigned uMergeCount = (uNodeCount - 1)/2;
-
- ProgNode *ProgNodes = new ProgNode[uNodeCount];
-
- unsigned uJoin = 0;
- SetProgressDesc("Refine tree");
- for (unsigned uDiffsNodeIndex = Diffs.FirstDepthFirstNode();
- NULL_NEIGHBOR != uDiffsNodeIndex;
- uDiffsNodeIndex = Diffs.NextDepthFirstNode(uDiffsNodeIndex))
- {
- if (Diffs.IsLeaf(uDiffsNodeIndex))
- {
- assert(uDiffsNodeIndex < uNodeCount);
- if (uDiffsNodeIndex >= uNodeCount)
- Quit("TreeNodeIndex=%u NodeCount=%u\n", uDiffsNodeIndex, uNodeCount);
-
- ProgNode &Node = ProgNodes[uDiffsNodeIndex];
- MakeNode(msaIn, Diffs, uDiffsNodeIndex, IdToDiffsTreeNodeIndex, Node);
-
- Node.m_uLength = Node.m_MSA.GetColCount();
- }
- else
- {
- Progress(uJoin, uMergeCount);
- ++uJoin;
- const unsigned uMergeNodeIndex = uDiffsNodeIndex;
- ProgNode &Parent = ProgNodes[uMergeNodeIndex];
-
- const unsigned uLeft = Diffs.GetLeft(uDiffsNodeIndex);
- const unsigned uRight = Diffs.GetRight(uDiffsNodeIndex);
-
- ProgNode &Node1 = ProgNodes[uLeft];
- ProgNode &Node2 = ProgNodes[uRight];
-
- PWPath Path;
- AlignTwoMSAs(Node1.m_MSA, Node2.m_MSA, Parent.m_MSA, Path);
-
-#if TRACE
- {
- Log("Combined:\n");
- Parent.m_MSA.LogMe();
- }
-#endif
-
- Node1.m_MSA.Clear();
- Node2.m_MSA.Clear();
- }
- }
- ProgressStepsDone();
-
- unsigned uRootNodeIndex = Diffs.GetRootNodeIndex();
- const ProgNode &RootProgNode = ProgNodes[uRootNodeIndex];
- msaOut.Copy(RootProgNode.m_MSA);
-
-#if DEBUG
- AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);
-#endif
-
- delete[] ProgNodes;
- ProgNodes = 0;
- }
Deleted: trunk/packages/muscle/trunk/realigndiffse.cpp
===================================================================
--- trunk/packages/muscle/trunk/realigndiffse.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/realigndiffse.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,142 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "tree.h"
-#include "profile.h"
-#include "pwpath.h"
-#include "seqvect.h"
-#include "estring.h"
-
-#define TRACE 0
-
-void DeleteProgNode(ProgNode &Node)
- {
- delete[] Node.m_Prof;
- delete[] Node.m_EstringL;
- delete[] Node.m_EstringR;
-
- Node.m_Prof = 0;
- Node.m_EstringL = 0;
- Node.m_EstringR = 0;
- }
-
-static void MakeNode(ProgNode &OldNode, ProgNode &NewNode, bool bSwapLR)
- {
- if (bSwapLR)
- {
- NewNode.m_EstringL = OldNode.m_EstringR;
- NewNode.m_EstringR = OldNode.m_EstringL;
- }
- else
- {
- NewNode.m_EstringL = OldNode.m_EstringL;
- NewNode.m_EstringR = OldNode.m_EstringR;
- }
- NewNode.m_Prof = OldNode.m_Prof;
- NewNode.m_uLength = OldNode.m_uLength;
- NewNode.m_Weight = OldNode.m_Weight;
-
- OldNode.m_Prof = 0;
- OldNode.m_EstringL = 0;
- OldNode.m_EstringR = 0;
- }
-
-void RealignDiffsE(const MSA &msaIn, const SeqVect &v,
- const Tree &NewTree, const Tree &OldTree,
- const unsigned uNewNodeIndexToOldNodeIndex[],
- MSA &msaOut, ProgNode *OldProgNodes)
- {
- assert(OldProgNodes != 0);
-
- const unsigned uNodeCount = NewTree.GetNodeCount();
- if (uNodeCount%2 == 0)
- Quit("RealignDiffs: Expected odd number of nodes");
-
- const unsigned uMergeCount = (uNodeCount - 1)/2;
- ProgNode *NewProgNodes = new ProgNode[uNodeCount];
-
- for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
- {
- if (NODE_CHANGED == uNewNodeIndexToOldNodeIndex[uNewNodeIndex])
- continue;
-
- unsigned uOldNodeIndex = uNewNodeIndexToOldNodeIndex[uNewNodeIndex];
- assert(uNewNodeIndex < uNodeCount);
- assert(uOldNodeIndex < uNodeCount);
-
- ProgNode &NewNode = NewProgNodes[uNewNodeIndex];
- ProgNode &OldNode = OldProgNodes[uOldNodeIndex];
- bool bSwapLR = false;
- if (!NewTree.IsLeaf(uNewNodeIndex))
- {
- unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
- unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);
- unsigned uOld = uNewNodeIndexToOldNodeIndex[uNewNodeIndex];
- unsigned uOldLeft = OldTree.GetLeft(uOld);
- unsigned uOldRight = OldTree.GetRight(uOld);
- assert(uOldLeft < uNodeCount && uOldRight < uNodeCount);
- if (uOldLeft != uNewNodeIndexToOldNodeIndex[uNewLeft])
- {
- assert(uOldLeft == uNewNodeIndexToOldNodeIndex[uNewRight]);
- bSwapLR = true;
- }
- }
- MakeNode(OldNode, NewNode, bSwapLR);
-#if TRACE
- Log("MakeNode old=%u new=%u swap=%d length=%u weight=%.3g\n",
- uOldNodeIndex, uNewNodeIndex, bSwapLR, NewNode.m_uLength, NewNode.m_Weight);
-#endif
- }
-
- unsigned uJoin = 0;
- SetProgressDesc("Refine tree");
- for (unsigned uNewNodeIndex = NewTree.FirstDepthFirstNode();
- NULL_NEIGHBOR != uNewNodeIndex;
- uNewNodeIndex = NewTree.NextDepthFirstNode(uNewNodeIndex))
- {
- if (NODE_CHANGED != uNewNodeIndexToOldNodeIndex[uNewNodeIndex])
- continue;
-
- Progress(uJoin, uMergeCount - 1);
- ++uJoin;
-
- const unsigned uMergeNodeIndex = uNewNodeIndex;
- ProgNode &Parent = NewProgNodes[uMergeNodeIndex];
-
- const unsigned uLeft = NewTree.GetLeft(uNewNodeIndex);
- const unsigned uRight = NewTree.GetRight(uNewNodeIndex);
-
- ProgNode &Node1 = NewProgNodes[uLeft];
- ProgNode &Node2 = NewProgNodes[uRight];
-
- AlignTwoProfs(
- Node1.m_Prof, Node1.m_uLength, Node1.m_Weight,
- Node2.m_Prof, Node2.m_uLength, Node2.m_Weight,
- Parent.m_Path,
- &Parent.m_Prof, &Parent.m_uLength);
- PathToEstrings(Parent.m_Path, &Parent.m_EstringL, &Parent.m_EstringR);
-
- Parent.m_Weight = Node1.m_Weight + Node2.m_Weight;
-
- delete[] Node1.m_Prof;
- delete[] Node2.m_Prof;
-
- Node1.m_Prof = 0;
- Node2.m_Prof = 0;
- }
-
- ProgressStepsDone();
-
- if (g_bBrenner)
- MakeRootMSABrenner((SeqVect &) v, NewTree, NewProgNodes, msaOut);
- else
- MakeRootMSA(v, NewTree, NewProgNodes, msaOut);
-
-#if DEBUG
- AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);
-#endif
-
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- DeleteProgNode(NewProgNodes[uNodeIndex]);
-
- delete[] NewProgNodes;
- }
Deleted: trunk/packages/muscle/trunk/refine.cpp
===================================================================
--- trunk/packages/muscle/trunk/refine.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/refine.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,79 +0,0 @@
-#include "muscle.h"
-#include "textfile.h"
-#include "seqvect.h"
-#include "distfunc.h"
-#include "msa.h"
-#include "tree.h"
-#include "clust.h"
-#include "profile.h"
-#include "clustsetmsa.h"
-
-void Refine()
- {
- SetOutputFileName(g_pstrOutFileName);
- SetInputFileName(g_pstrInFileName);
- SetStartTime();
-
- SetMaxIters(g_uMaxIters);
- SetSeqWeightMethod(g_SeqWeight1);
-
- TextFile fileIn(g_pstrInFileName);
- MSA msa;
- msa.FromFile(fileIn);
-
- const unsigned uSeqCount = msa.GetSeqCount();
- if (0 == uSeqCount)
- Quit("No sequences in input file");
-
- ALPHA Alpha = ALPHA_Undefined;
- switch (g_SeqType)
- {
- case SEQTYPE_Auto:
- Alpha = msa.GuessAlpha();
- break;
-
- case SEQTYPE_Protein:
- Alpha = ALPHA_Amino;
- break;
-
- case SEQTYPE_DNA:
- Alpha = ALPHA_DNA;
- break;
-
- case SEQTYPE_RNA:
- Alpha = ALPHA_RNA;
- break;
-
- default:
- Quit("Invalid SeqType");
- }
- SetAlpha(Alpha);
- msa.FixAlpha();
-
- if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
- SetPPScore(PPSCORE_SPN);
-
- MSA::SetIdCount(uSeqCount);
-
-// Initialize sequence ids.
-// From this point on, ids must somehow propogate from here.
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- msa.SetSeqId(uSeqIndex, uSeqIndex);
- SetMuscleInputMSA(msa);
-
- Tree GuideTree;
- TreeFromMSA(msa, GuideTree, g_Cluster2, g_Distance2, g_Root2);
- SetMuscleTree(GuideTree);
-
- if (g_bAnchors)
- RefineVert(msa, GuideTree, g_uMaxIters);
- else
- RefineHoriz(msa, GuideTree, g_uMaxIters, false, false);
-
- ValidateMuscleIds(msa);
- ValidateMuscleIds(GuideTree);
-
-// TextFile fileOut(g_pstrOutFileName, true);
-// msa.ToFile(fileOut);
- MuscleOutput(msa);
- }
Deleted: trunk/packages/muscle/trunk/refinehoriz.cpp
===================================================================
--- trunk/packages/muscle/trunk/refinehoriz.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/refinehoriz.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,288 +0,0 @@
-#include "muscle.h"
-#include "tree.h"
-#include "msa.h"
-#include "pwpath.h"
-#include "profile.h"
-#include "scorehistory.h"
-#include "objscore.h"
-
-unsigned g_uRefineHeightSubtree;
-unsigned g_uRefineHeightSubtreeTotal;
-
-#define TRACE 0
-#define DIFFOBJSCORE 0
-
-static bool TryRealign(MSA &msaIn, const Tree &tree, const unsigned Leaves1[],
- unsigned uCount1, const unsigned Leaves2[], unsigned uCount2,
- SCORE *ptrscoreBefore, SCORE *ptrscoreAfter,
- bool bLockLeft, bool bLockRight)
- {
-#if TRACE
- Log("TryRealign, msaIn=\n");
- msaIn.LogMe();
-#endif
-
- const unsigned uSeqCount = msaIn.GetSeqCount();
-
- unsigned *Ids1 = new unsigned[uSeqCount];
- unsigned *Ids2 = new unsigned[uSeqCount];
-
- LeafIndexesToIds(tree, Leaves1, uCount1, Ids1);
- LeafIndexesToIds(tree, Leaves2, uCount2, Ids2);
-
- MSA msa1;
- MSA msa2;
-
- MSASubsetByIds(msaIn, Ids1, uCount1, msa1);
- MSASubsetByIds(msaIn, Ids2, uCount2, msa2);
-
-#if DEBUG
- ValidateMuscleIds(msa1);
- ValidateMuscleIds(msa2);
-#endif
-
-// Computing the objective score may be expensive for
-// large numbers of sequences. As a speed optimization,
-// we check whether the alignment changes. If it does
-// not change, there is no need to compute the objective
-// score. We test for the alignment changing by comparing
-// the Viterbi paths before and after re-aligning.
- PWPath pathBefore;
- pathBefore.FromMSAPair(msa1, msa2);
-
- DeleteGappedCols(msa1);
- DeleteGappedCols(msa2);
-
- if (0 == msa1.GetColCount() || 0 == msa2.GetColCount())
- return false;
-
- MSA msaRealigned;
- PWPath pathAfter;
-
- AlignTwoMSAs(msa1, msa2, msaRealigned, pathAfter, bLockLeft, bLockRight);
-
- bool bAnyChanges = !pathAfter.Equal(pathBefore);
- unsigned uDiffCount1;
- unsigned uDiffCount2;
- static unsigned Edges1[10000];
- static unsigned Edges2[10000];
- DiffPaths(pathBefore, pathAfter, Edges1, &uDiffCount1, Edges2, &uDiffCount2);
-
-#if TRACE
- Log("TryRealign, msa1=\n");
- msa1.LogMe();
- Log("\nmsa2=\n");
- msa2.LogMe();
- Log("\nRealigned (changes %s)=\n", bAnyChanges ? "TRUE" : "FALSE");
- msaRealigned.LogMe();
-#endif
-
- if (!bAnyChanges)
- {
- *ptrscoreBefore = 0;
- *ptrscoreAfter = 0;
- return false;
- }
-
- SetMSAWeightsMuscle(msaIn);
- SetMSAWeightsMuscle(msaRealigned);
-
-#if DIFFOBJSCORE
- const SCORE scoreDiff = DiffObjScore(msaIn, pathBefore, Edges1, uDiffCount1,
- msaRealigned, pathAfter, Edges2, uDiffCount2);
- bool bAccept = (scoreDiff > 0);
- *ptrscoreBefore = 0;
- *ptrscoreAfter = scoreDiff;
- //const SCORE scoreBefore = ObjScoreIds(msaIn, Ids1, uCount1, Ids2, uCount2);
- //const SCORE scoreAfter = ObjScoreIds(msaRealigned, Ids1, uCount1, Ids2, uCount2);
- //Log("Diff = %.3g %.3g\n", scoreDiff, scoreAfter - scoreBefore);
-#else
- const SCORE scoreBefore = ObjScoreIds(msaIn, Ids1, uCount1, Ids2, uCount2);
- const SCORE scoreAfter = ObjScoreIds(msaRealigned, Ids1, uCount1, Ids2, uCount2);
-
- bool bAccept = (scoreAfter > scoreBefore);
-
-#if TRACE
- Log("Score %g -> %g Accept %s\n", scoreBefore, scoreAfter, bAccept ? "TRUE" : "FALSE");
-#endif
-
- *ptrscoreBefore = scoreBefore;
- *ptrscoreAfter = scoreAfter;
-#endif
-
- if (bAccept)
- msaIn.Copy(msaRealigned);
- delete[] Ids1;
- delete[] Ids2;
- return bAccept;
- }
-
-static void RefineHeightParts(MSA &msaIn, const Tree &tree,
- const unsigned InternalNodeIndexes[], bool bReversed, bool bRight,
- unsigned uIter,
- ScoreHistory &History,
- bool *ptrbAnyChanges, bool *ptrbOscillating, bool bLockLeft, bool bLockRight)
- {
- *ptrbOscillating = false;
-
- const unsigned uSeqCount = msaIn.GetSeqCount();
- const unsigned uInternalNodeCount = uSeqCount - 1;
-
- unsigned *Leaves1 = new unsigned[uSeqCount];
- unsigned *Leaves2 = new unsigned[uSeqCount];
-
- const unsigned uRootNodeIndex = tree.GetRootNodeIndex();
- bool bAnyAccepted = false;
- for (unsigned i = 0; i < uInternalNodeCount; ++i)
- {
- const unsigned uInternalNodeIndex = InternalNodeIndexes[i];
- unsigned uNeighborNodeIndex;
- if (tree.IsRoot(uInternalNodeIndex) && !bRight)
- continue;
- else if (bRight)
- uNeighborNodeIndex = tree.GetRight(uInternalNodeIndex);
- else
- uNeighborNodeIndex = tree.GetLeft(uInternalNodeIndex);
-
- g_uTreeSplitNode1 = uInternalNodeIndex;
- g_uTreeSplitNode2 = uNeighborNodeIndex;
-
- unsigned uCount1;
- unsigned uCount2;
-
- GetLeaves(tree, uNeighborNodeIndex, Leaves1, &uCount1);
- GetLeavesExcluding(tree, uRootNodeIndex, uNeighborNodeIndex,
- Leaves2, &uCount2);
-
-#if TRACE
- Log("\nRefineHeightParts node %u\n", uInternalNodeIndex);
- Log("Group1=");
- for (unsigned n = 0; n < uCount1; ++n)
- Log(" %u(%s)", Leaves1[n], tree.GetName(Leaves1[n]));
- Log("\n");
- Log("Group2=");
- for (unsigned n = 0; n < uCount2; ++n)
- Log(" %u(%s)", Leaves2[n], tree.GetName(Leaves2[n]));
- Log("\n");
-#endif
-
- SCORE scoreBefore;
- SCORE scoreAfter;
- bool bAccepted = TryRealign(msaIn, tree, Leaves1, uCount1, Leaves2, uCount2,
- &scoreBefore, &scoreAfter, bLockLeft, bLockRight);
- SetCurrentAlignment(msaIn);
-
- ++g_uRefineHeightSubtree;
- Progress(g_uRefineHeightSubtree, g_uRefineHeightSubtreeTotal);
-
-#if TRACE
- if (uIter > 0)
- Log("Before %g %g\n", scoreBefore,
- History.GetScore(uIter - 1, uInternalNodeIndex, bReversed, bRight));
-#endif
- SCORE scoreMax = scoreAfter > scoreBefore? scoreAfter : scoreBefore;
- bool bRepeated = History.SetScore(uIter, uInternalNodeIndex, bRight, scoreMax);
- if (bRepeated)
- {
- *ptrbOscillating = true;
- break;
- }
-
- if (bAccepted)
- bAnyAccepted = true;
- }
-
- delete[] Leaves1;
- delete[] Leaves2;
-
- *ptrbAnyChanges = bAnyAccepted;
- }
-
-// Return true if any changes made
-bool RefineHoriz(MSA &msaIn, const Tree &tree, unsigned uIters, bool bLockLeft,
- bool bLockRight)
- {
-#if TRACE
- tree.LogMe();
-#endif
-
- if (!tree.IsRooted())
- Quit("RefineHeight: requires rooted tree");
-
- const unsigned uSeqCount = msaIn.GetSeqCount();
- if (uSeqCount < 3)
- return false;
-
- const unsigned uInternalNodeCount = uSeqCount - 1;
- unsigned *InternalNodeIndexes = new unsigned[uInternalNodeCount];
- unsigned *InternalNodeIndexesR = new unsigned[uInternalNodeCount];
-
- GetInternalNodesInHeightOrder(tree, InternalNodeIndexes);
-
- ScoreHistory History(uIters, 2*uSeqCount - 1);
-
- bool bAnyChangesAnyIter = false;
- for (unsigned n = 0; n < uInternalNodeCount; ++n)
- InternalNodeIndexesR[uInternalNodeCount - 1 - n] = InternalNodeIndexes[n];
-
- for (unsigned uIter = 0; uIter < uIters; ++uIter)
- {
- bool bAnyChangesThisIter = false;
- IncIter();
- SetProgressDesc("Refine biparts");
- g_uRefineHeightSubtree = 0;
- g_uRefineHeightSubtreeTotal = uInternalNodeCount*2 - 1;
-
- bool bReverse = (uIter%2 != 0);
- unsigned *Internals;
- if (bReverse)
- Internals = InternalNodeIndexesR;
- else
- Internals = InternalNodeIndexes;
-
- bool bOscillating;
- for (unsigned i = 0; i < 2; ++i)
- {
- bool bAnyChanges = false;
- bool bRight;
- switch (i)
- {
- case 0:
- bRight = true;
- break;
- case 1:
- bRight = false;
- break;
- default:
- Quit("RefineHeight default case");
- }
- RefineHeightParts(msaIn, tree, Internals, bReverse, bRight,
- uIter,
- History,
- &bAnyChanges, &bOscillating, bLockLeft, bLockRight);
- if (bOscillating)
- {
- ProgressStepsDone();
- goto Osc;
- }
- if (bAnyChanges)
- {
- bAnyChangesThisIter = true;
- bAnyChangesAnyIter = true;
- }
- }
-
- ProgressStepsDone();
- if (bOscillating)
- break;
-
- if (!bAnyChangesThisIter)
- break;
- }
-
-Osc:
- delete[] InternalNodeIndexes;
- delete[] InternalNodeIndexesR;
-
- return bAnyChangesAnyIter;
- }
Deleted: trunk/packages/muscle/trunk/refinesubfams.cpp
===================================================================
--- trunk/packages/muscle/trunk/refinesubfams.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/refinesubfams.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,212 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "tree.h"
-#include "clust.h"
-#include "profile.h"
-#include "pwpath.h"
-
-#define TRACE 0
-
-static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],
- unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa);
-
-// Identify subfamilies in a tree.
-// Returns array of internal node indexes, one for each subfamily.
-// First try is to select groups by height (which should approximate
-// minimum percent identity), if this gives too many subfamilies then
-// we cut at a point that gives the maximum allowed number of subfams.
-static void GetSubfams(const Tree &tree, double dMaxHeight,
- unsigned uMaxSubfamCount, unsigned **ptrptrSubfams, unsigned *ptruSubfamCount)
- {
- const unsigned uNodeCount = tree.GetNodeCount();
-
- unsigned *Subfams = new unsigned[uNodeCount];
-
- unsigned uSubfamCount;
- ClusterByHeight(tree, dMaxHeight, Subfams, &uSubfamCount);
-
- if (uSubfamCount > uMaxSubfamCount)
- ClusterBySubfamCount(tree, uMaxSubfamCount, Subfams, &uSubfamCount);
-
- *ptrptrSubfams = Subfams;
- *ptruSubfamCount = uSubfamCount;
- }
-
-static void LogSubfams(const Tree &tree, const unsigned Subfams[],
- unsigned uSubfamCount)
- {
- const unsigned uNodeCount = tree.GetNodeCount();
- Log("%u subfamilies found\n", uSubfamCount);
- Log("Subfam Sequence\n");
- Log("------ --------\n");
- unsigned *Leaves = new unsigned[uNodeCount];
- for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
- {
- unsigned uSubfamNodeIndex = Subfams[uSubfamIndex];
- unsigned uLeafCount;
- GetLeaves(tree, uSubfamNodeIndex, Leaves, &uLeafCount);
- for (unsigned uLeafIndex = 0; uLeafIndex < uLeafCount; ++uLeafIndex)
- Log("%6u %s\n", uSubfamIndex + 1, tree.GetLeafName(Leaves[uLeafIndex]));
- Log("\n");
- }
- delete[] Leaves;
- }
-
-bool RefineSubfams(MSA &msa, const Tree &tree, unsigned uIters)
- {
- const unsigned uSeqCount = msa.GetSeqCount();
- if (uSeqCount < 3)
- return false;
-
- const double dMaxHeight = 0.6;
- const unsigned uMaxSubfamCount = 16;
- const unsigned uNodeCount = tree.GetNodeCount();
-
- unsigned *Subfams;
- unsigned uSubfamCount;
- GetSubfams(tree, dMaxHeight, uMaxSubfamCount, &Subfams, &uSubfamCount);
- assert(uSubfamCount <= uSeqCount);
-
- if (g_bVerbose)
- LogSubfams(tree, Subfams, uSubfamCount);
-
- MSA *SubfamMSAs = new MSA[uSubfamCount];
- unsigned *Leaves = new unsigned[uSeqCount];
- unsigned *Ids = new unsigned[uSeqCount];
-
- bool bAnyChanges = false;
- for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
- {
- unsigned uSubfam = Subfams[uSubfamIndex];
- unsigned uLeafCount;
- GetLeaves(tree, uSubfam, Leaves, &uLeafCount);
- assert(uLeafCount <= uSeqCount);
-
- LeafIndexesToIds(tree, Leaves, uLeafCount, Ids);
-
- MSA &msaSubfam = SubfamMSAs[uSubfamIndex];
- MSASubsetByIds(msa, Ids, uLeafCount, msaSubfam);
- DeleteGappedCols(msaSubfam);
-
-#if TRACE
- Log("Subfam %u MSA=\n", uSubfamIndex);
- msaSubfam.LogMe();
-#endif
-
- if (msaSubfam.GetSeqCount() <= 2)
- continue;
-
- // TODO /////////////////////////////////////////
- // Try using existing tree, may actually hurt to
- // re-estimate, may also be a waste of CPU & mem.
- /////////////////////////////////////////////////
- Tree SubfamTree;
- TreeFromMSA(msaSubfam, SubfamTree, g_Cluster2, g_Distance2, g_Root2);
-
- bool bAnyChangesThisSubfam;
- if (g_bAnchors)
- bAnyChangesThisSubfam = RefineVert(msaSubfam, SubfamTree, uIters);
- else
- bAnyChangesThisSubfam = RefineHoriz(msaSubfam, SubfamTree, uIters, false, false);
-#if TRACE
- Log("Subfam %u Changed %d\n", uSubfamIndex, bAnyChangesThisSubfam);
-#endif
- if (bAnyChangesThisSubfam)
- bAnyChanges = true;
- }
-
- if (bAnyChanges)
- ProgressiveAlignSubfams(tree, Subfams, uSubfamCount, SubfamMSAs, msa);
-
- delete[] Leaves;
- delete[] Subfams;
- delete[] SubfamMSAs;
-
- return bAnyChanges;
- }
-
-static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],
- unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa)
- {
- const unsigned uNodeCount = tree.GetNodeCount();
-
- bool *Ready = new bool[uNodeCount];
- MSA **MSAs = new MSA *[uNodeCount];
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- {
- Ready[uNodeIndex] = false;
- MSAs[uNodeIndex] = 0;
- }
-
- for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
- {
- unsigned uNodeIndex = Subfams[uSubfamIndex];
- Ready[uNodeIndex] = true;
- MSA *ptrMSA = new MSA;
- // TODO: Wasteful copy, needs re-design
- ptrMSA->Copy(SubfamMSAs[uSubfamIndex]);
- MSAs[uNodeIndex] = ptrMSA;
- }
-
- for (unsigned uNodeIndex = tree.FirstDepthFirstNode();
- NULL_NEIGHBOR != uNodeIndex;
- uNodeIndex = tree.NextDepthFirstNode(uNodeIndex))
- {
- if (tree.IsLeaf(uNodeIndex))
- continue;
-
- unsigned uRight = tree.GetRight(uNodeIndex);
- unsigned uLeft = tree.GetLeft(uNodeIndex);
- if (!Ready[uRight] || !Ready[uLeft])
- continue;
-
- MSA *ptrLeft = MSAs[uLeft];
- MSA *ptrRight = MSAs[uRight];
- assert(ptrLeft != 0 && ptrRight != 0);
-
- MSA *ptrParent = new MSA;
-
- PWPath Path;
- AlignTwoMSAs(*ptrLeft, *ptrRight, *ptrParent, Path);
-
- MSAs[uNodeIndex] = ptrParent;
- Ready[uNodeIndex] = true;
- Ready[uLeft] = false;
- Ready[uRight] = false;
-
- delete MSAs[uLeft];
- delete MSAs[uRight];
- MSAs[uLeft] = 0;
- MSAs[uRight] = 0;
- }
-
-#if DEBUG
- {
- unsigned uReadyCount = 0;
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- {
- if (Ready[uNodeIndex])
- {
- assert(tree.IsRoot(uNodeIndex));
- ++uReadyCount;
- assert(0 != MSAs[uNodeIndex]);
- }
- else
- assert(0 == MSAs[uNodeIndex]);
- }
- assert(1 == uReadyCount);
- }
-#endif
-
- const unsigned uRoot = tree.GetRootNodeIndex();
- MSA *ptrRootAlignment = MSAs[uRoot];
-
- msa.Copy(*ptrRootAlignment);
-
- delete ptrRootAlignment;
-
-#if TRACE
- Log("After refine subfamilies, root alignment=\n");
- msa.LogMe();
-#endif
- }
Deleted: trunk/packages/muscle/trunk/refinetree.cpp
===================================================================
--- trunk/packages/muscle/trunk/refinetree.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/refinetree.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,59 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "tree.h"
-#include "profile.h"
-#include <stdio.h>
-
-void RefineTree(MSA &msa, Tree &tree)
- {
- const unsigned uSeqCount = msa.GetSeqCount();
- if (tree.GetLeafCount() != uSeqCount)
- Quit("Refine tree, tree has different number of nodes");
-
- if (uSeqCount < 3)
- return;
-
-#if DEBUG
- ValidateMuscleIds(msa);
- ValidateMuscleIds(tree);
-#endif
-
- unsigned *IdToDiffsLeafNodeIndex = new unsigned[uSeqCount];
- unsigned uDiffsCount = uSeqCount;
- Tree Tree2;
- for (unsigned uIter = 0; uIter < g_uMaxTreeRefineIters; ++uIter)
- {
- TreeFromMSA(msa, Tree2, g_Cluster2, g_Distance2, g_Root2);
-
-#if DEBUG
- ValidateMuscleIds(Tree2);
-#endif
-
- Tree Diffs;
- DiffTrees(Tree2, tree, Diffs, IdToDiffsLeafNodeIndex);
-
- tree.Copy(Tree2);
-
- const unsigned uNewDiffsNodeCount = Diffs.GetNodeCount();
- const unsigned uNewDiffsCount = (uNewDiffsNodeCount - 1)/2;
-
- if (0 == uNewDiffsCount || uNewDiffsCount >= uDiffsCount)
- {
- ProgressStepsDone();
- break;
- }
- uDiffsCount = uNewDiffsCount;
-
- MSA msa2;
- RealignDiffs(msa, Diffs, IdToDiffsLeafNodeIndex, msa2);
-
-#if DEBUG
- ValidateMuscleIds(msa2);
-#endif
-
- msa.Copy(msa2);
- SetCurrentAlignment(msa);
- }
-
- delete[] IdToDiffsLeafNodeIndex;
- }
Deleted: trunk/packages/muscle/trunk/refinetreee.cpp
===================================================================
--- trunk/packages/muscle/trunk/refinetreee.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/refinetreee.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,51 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "tree.h"
-#include "profile.h"
-#include <stdio.h>
-
-#define TRACE 0
-
-void RefineTreeE(MSA &msa, const SeqVect &v, Tree &tree, ProgNode *ProgNodes)
- {
- const unsigned uSeqCount = msa.GetSeqCount();
- if (tree.GetLeafCount() != uSeqCount)
- Quit("Refine tree, tree has different number of nodes");
-
- if (uSeqCount < 3)
- return;
-
-#if DEBUG
- ValidateMuscleIds(msa);
- ValidateMuscleIds(tree);
-#endif
-
- const unsigned uNodeCount = tree.GetNodeCount();
- unsigned *uNewNodeIndexToOldNodeIndex= new unsigned[uNodeCount];
-
- Tree Tree2;
- TreeFromMSA(msa, Tree2, g_Cluster2, g_Distance2, g_Root2);
-
-#if DEBUG
- ValidateMuscleIds(Tree2);
-#endif
-
- DiffTreesE(Tree2, tree, uNewNodeIndexToOldNodeIndex);
-
- unsigned uRoot = Tree2.GetRootNodeIndex();
- if (NODE_CHANGED == uNewNodeIndexToOldNodeIndex[uRoot])
- {
- MSA msa2;
- RealignDiffsE(msa, v, Tree2, tree, uNewNodeIndexToOldNodeIndex, msa2, ProgNodes);
- tree.Copy(Tree2);
- msa.Copy(msa2);
-#if DEBUG
- ValidateMuscleIds(msa2);
-#endif
- }
-
- delete[] uNewNodeIndexToOldNodeIndex;
-
- SetCurrentAlignment(msa);
- ProgressStepsDone();
- }
Deleted: trunk/packages/muscle/trunk/refinevert.cpp
===================================================================
--- trunk/packages/muscle/trunk/refinevert.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/refinevert.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,159 +0,0 @@
-#include "muscle.h"
-#include "profile.h"
-#include "msa.h"
-#include "pwpath.h"
-#include "seqvect.h"
-#include "clust.h"
-#include "tree.h"
-
-#define TRACE 0
-
-struct Range
- {
- unsigned m_uBestColLeft;
- unsigned m_uBestColRight;
- };
-
-static void ListVertSavings(unsigned uColCount, unsigned uAnchorColCount,
- const Range *Ranges, unsigned uRangeCount)
- {
- if (!g_bVerbose || !g_bAnchors)
- return;
- double dTotalArea = uColCount*uColCount;
- double dArea = 0.0;
- for (unsigned i = 0; i < uRangeCount; ++i)
- {
- unsigned uLength = Ranges[i].m_uBestColRight - Ranges[i].m_uBestColLeft;
- dArea += uLength*uLength;
- }
- double dPct = (dTotalArea - dArea)*100.0/dTotalArea;
- Log("Anchor columns found %u\n", uAnchorColCount);
- Log("DP area saved by anchors %-4.1f%%\n", dPct);
- }
-
-static void ColsToRanges(const unsigned BestCols[], unsigned uBestColCount,
- unsigned uColCount, Range Ranges[])
- {
-// N best columns produces N+1 vertical blocks.
- const unsigned uRangeCount = uBestColCount + 1;
- for (unsigned uIndex = 0; uIndex < uRangeCount ; ++uIndex)
- {
- unsigned uBestColLeft = 0;
- if (uIndex > 0)
- uBestColLeft = BestCols[uIndex-1];
-
- unsigned uBestColRight = uColCount;
- if (uIndex < uBestColCount)
- uBestColRight = BestCols[uIndex];
-
- Ranges[uIndex].m_uBestColLeft = uBestColLeft;
- Ranges[uIndex].m_uBestColRight = uBestColRight;
- }
- }
-
-// Return true if any changes made
-bool RefineVert(MSA &msaIn, const Tree &tree, unsigned uIters)
- {
- bool bAnyChanges = false;
-
- const unsigned uColCountIn = msaIn.GetColCount();
- const unsigned uSeqCountIn = msaIn.GetSeqCount();
-
- if (uColCountIn < 3 || uSeqCountIn < 3)
- return false;
-
- unsigned *AnchorCols = new unsigned[uColCountIn];
- unsigned uAnchorColCount;
- SetMSAWeightsMuscle(msaIn);
- FindAnchorCols(msaIn, AnchorCols, &uAnchorColCount);
-
- const unsigned uRangeCount = uAnchorColCount + 1;
- Range *Ranges = new Range[uRangeCount];
-
-#if TRACE
- Log("%u ranges\n", uRangeCount);
-#endif
-
- ColsToRanges(AnchorCols, uAnchorColCount, uColCountIn, Ranges);
- ListVertSavings(uColCountIn, uAnchorColCount, Ranges, uRangeCount);
-
-#if TRACE
- {
- Log("Anchor cols: ");
- for (unsigned i = 0; i < uAnchorColCount; ++i)
- Log(" %u", AnchorCols[i]);
- Log("\n");
-
- Log("Ranges:\n");
- for (unsigned i = 0; i < uRangeCount; ++i)
- Log("%4u - %4u\n", Ranges[i].m_uBestColLeft, Ranges[i].m_uBestColRight);
- }
-#endif
-
- delete[] AnchorCols;
-
- MSA msaOut;
- msaOut.SetSize(uSeqCountIn, 0);
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCountIn; ++uSeqIndex)
- {
- const char *ptrName = msaIn.GetSeqName(uSeqIndex);
- unsigned uId = msaIn.GetSeqId(uSeqIndex);
- msaOut.SetSeqName(uSeqIndex, ptrName);
- msaOut.SetSeqId(uSeqIndex, uId);
- }
-
- for (unsigned uRangeIndex = 0; uRangeIndex < uRangeCount; ++uRangeIndex)
- {
- MSA msaRange;
-
- const Range &r = Ranges[uRangeIndex];
-
- const unsigned uFromColIndex = r.m_uBestColLeft;
- const unsigned uRangeColCount = r.m_uBestColRight - uFromColIndex;
-
- if (0 == uRangeColCount)
- continue;
- else if (1 == uRangeColCount)
- {
- MSAFromColRange(msaIn, uFromColIndex, 1, msaRange);
- MSAAppend(msaOut, msaRange);
- continue;
- }
- MSAFromColRange(msaIn, uFromColIndex, uRangeColCount, msaRange);
-
-#if TRACE
- Log("\n-------------\n");
- Log("Range %u - %u count=%u\n", r.m_uBestColLeft, r.m_uBestColRight, uRangeColCount);
- Log("Before:\n");
- msaRange.LogMe();
-#endif
-
- bool bLockLeft = (0 != uRangeIndex);
- bool bLockRight = (uRangeCount - 1 != uRangeIndex);
- bool bAnyChangesThisBlock = RefineHoriz(msaRange, tree, uIters, bLockLeft, bLockRight);
- bAnyChanges = (bAnyChanges || bAnyChangesThisBlock);
-
-#if TRACE
- Log("After:\n");
- msaRange.LogMe();
-#endif
-
- MSAAppend(msaOut, msaRange);
-
-#if TRACE
- Log("msaOut after Cat:\n");
- msaOut.LogMe();
-#endif
- }
-
-#if DEBUG
-// Sanity check
- AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);
-#endif
-
- delete[] Ranges;
- if (bAnyChanges)
- msaIn.Copy(msaOut);
- return bAnyChanges;
- }
Deleted: trunk/packages/muscle/trunk/refinew.cpp
===================================================================
--- trunk/packages/muscle/trunk/refinew.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/refinew.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,227 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "seqvect.h"
-#include "textfile.h"
-
-#define MEMDEBUG 0
-
-#if MEMDEBUG
-#include <crtdbg.h>
-#endif
-
-void MUSCLE(SeqVect &v, MSA &msaOut);
-
-// Append msa2 at the end of msa1
-void AppendMSA(MSA &msa1, const MSA &msa2)
- {
- const unsigned uSeqCount = msa1.GetSeqCount();
-
- const unsigned uColCount1 = msa1.GetColCount();
- const unsigned uColCount2 = msa2.GetColCount();
-
- const unsigned uColCountCat = uColCount1 + uColCount2;
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- unsigned uId = msa1.GetSeqId(uSeqIndex);
- unsigned uSeqIndex2;
- bool bFound = msa2.GetSeqIndex(uId, &uSeqIndex2);
- if (bFound)
- {
- for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
- {
- const char c = msa2.GetChar(uSeqIndex2, uColIndex);
- msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
- }
- }
- else
- {
- for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
- msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');
- }
- }
- }
-
-static void SeqFromMSACols(const MSA &msa, unsigned uSeqIndex, unsigned uColFrom,
- unsigned uColTo, Seq &s)
- {
- s.Clear();
- s.SetName(msa.GetSeqName(uSeqIndex));
- s.SetId(msa.GetSeqId(uSeqIndex));
- for (unsigned uColIndex = uColFrom; uColIndex <= uColTo; ++uColIndex)
- {
- char c = msa.GetChar(uSeqIndex, uColIndex);
- if (!IsGapChar(c))
- s.AppendChar(c);
- }
- }
-
-static void SeqVectFromMSACols(const MSA &msa, unsigned uColFrom, unsigned uColTo,
- SeqVect &v)
- {
- v.Clear();
- const unsigned uSeqCount = msa.GetSeqCount();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- Seq s;
- SeqFromMSACols(msa, uSeqIndex, uColFrom, uColTo, s);
- v.AppendSeq(s);
- }
- }
-
-void RefineW(const MSA &msaIn, MSA &msaOut)
- {
- const unsigned uSeqCount = msaIn.GetSeqCount();
- const unsigned uColCount = msaIn.GetColCount();
-
-// Reserve same nr seqs, 20% more cols
- const unsigned uReserveColCount = (uColCount*120)/100;
- msaOut.SetSize(uSeqCount, uReserveColCount);
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- msaOut.SetSeqName(uSeqIndex, msaIn.GetSeqName(uSeqIndex));
- msaOut.SetSeqId(uSeqIndex, msaIn.GetSeqId(uSeqIndex));
- }
-
- const unsigned uWindowCount = (uColCount + g_uRefineWindow - 1)/g_uRefineWindow;
- if (0 == g_uWindowTo)
- g_uWindowTo = uWindowCount - 1;
-
-#if MEMDEBUG
- _CrtSetBreakAlloc(1560);
-#endif
-
- if (g_uWindowOffset > 0)
- {
- MSA msaTmp;
- MSAFromColRange(msaIn, 0, g_uWindowOffset, msaOut);
- }
-
- fprintf(stderr, "\n");
- for (unsigned uWindowIndex = g_uWindowFrom; uWindowIndex <= g_uWindowTo; ++uWindowIndex)
- {
- fprintf(stderr, "Window %d of %d \r", uWindowIndex, uWindowCount);
- const unsigned uColFrom = g_uWindowOffset + uWindowIndex*g_uRefineWindow;
- unsigned uColTo = uColFrom + g_uRefineWindow - 1;
- if (uColTo >= uColCount)
- uColTo = uColCount - 1;
- assert(uColTo >= uColFrom);
-
- SeqVect v;
- SeqVectFromMSACols(msaIn, uColFrom, uColTo, v);
-
-#if MEMDEBUG
- _CrtMemState s1;
- _CrtMemCheckpoint(&s1);
-#endif
-
- MSA msaTmp;
- MUSCLE(v, msaTmp);
- AppendMSA(msaOut, msaTmp);
- if (uWindowIndex == g_uSaveWindow)
- {
- MSA msaInTmp;
- unsigned uOutCols = msaOut.GetColCount();
- unsigned un = uColTo - uColFrom + 1;
- MSAFromColRange(msaIn, uColFrom, un, msaInTmp);
-
- char fn[256];
- sprintf(fn, "win%d_inaln.tmp", uWindowIndex);
- TextFile fIn(fn, true);
- msaInTmp.ToFile(fIn);
-
- sprintf(fn, "win%d_inseqs.tmp", uWindowIndex);
- TextFile fv(fn, true);
- v.ToFile(fv);
-
- sprintf(fn, "win%d_outaln.tmp", uWindowIndex);
- TextFile fOut(fn, true);
- msaTmp.ToFile(fOut);
- }
-
-#if MEMDEBUG
- void FreeDPMemSPN();
- FreeDPMemSPN();
-
- _CrtMemState s2;
- _CrtMemCheckpoint(&s2);
-
- _CrtMemState s;
- _CrtMemDifference(&s, &s1, &s2);
-
- _CrtMemDumpStatistics(&s);
- _CrtMemDumpAllObjectsSince(&s1);
- exit(1);
-#endif
-//#if DEBUG
-// AssertMSAEqIgnoreCaseAndGaps(msaInTmp, msaTmp);
-//#endif
- }
- fprintf(stderr, "\n");
-
-// AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);//@@uncomment!
- }
-
-void DoRefineW()
- {
- SetOutputFileName(g_pstrOutFileName);
- SetInputFileName(g_pstrInFileName);
- SetStartTime();
-
- SetMaxIters(g_uMaxIters);
- SetSeqWeightMethod(g_SeqWeight1);
-
- TextFile fileIn(g_pstrInFileName);
- MSA msa;
- msa.FromFile(fileIn);
-
- const unsigned uSeqCount = msa.GetSeqCount();
- if (0 == uSeqCount)
- Quit("No sequences in input file");
-
- MSA::SetIdCount(uSeqCount);
-
-// Initialize sequence ids.
-// From this point on, ids must somehow propogate from here.
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- msa.SetSeqId(uSeqIndex, uSeqIndex);
- SetMuscleInputMSA(msa);
-
- ALPHA Alpha = ALPHA_Undefined;
- switch (g_SeqType)
- {
- case SEQTYPE_Auto:
- Alpha = msa.GuessAlpha();
- break;
-
- case SEQTYPE_Protein:
- Alpha = ALPHA_Amino;
- break;
-
- case SEQTYPE_DNA:
- Alpha = ALPHA_DNA;
- break;
-
- case SEQTYPE_RNA:
- Alpha = ALPHA_RNA;
- break;
-
- default:
- Quit("Invalid SeqType");
- }
- SetAlpha(Alpha);
- msa.FixAlpha();
-
- if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
- SetPPScore(PPSCORE_SPN);
-
- MSA msaOut;
- RefineW(msa, msaOut);
-
-// ValidateMuscleIds(msa);
-
-// TextFile fileOut(g_pstrOutFileName, true);
-// msaOut.ToFile(fileOut);
- MuscleOutput(msaOut);
- }
Deleted: trunk/packages/muscle/trunk/savebest.cpp
===================================================================
--- trunk/packages/muscle/trunk/savebest.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/savebest.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,66 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "textfile.h"
-#include <time.h>
-
-static MSA *ptrBestMSA;
-static const char *pstrOutputFileName;
-
-void SetOutputFileName(const char *out)
- {
- pstrOutputFileName = out;
- }
-
-void SetCurrentAlignment(MSA &msa)
- {
- ptrBestMSA = &msa;
- }
-
-void SaveCurrentAlignment()
- {
- static bool bCalled = false;
- if (bCalled)
- {
- fprintf(stderr,
- "\nRecursive call to SaveCurrentAlignment, giving up attempt to save.\n");
- exit(EXIT_FatalError);
- }
-
- if (0 == ptrBestMSA)
- {
- fprintf(stderr, "\nAlignment not completed, cannot save.\n");
- Log("Alignment not completed, cannot save.\n");
- exit(EXIT_FatalError);
- }
-
- if (0 == pstrOutputFileName)
- {
- fprintf(stderr, "\nOutput file name not specified, cannot save.\n");
- exit(EXIT_FatalError);
- }
-
- fprintf(stderr, "\nSaving current alignment ...\n");
-
- TextFile fileOut(pstrOutputFileName, true);
- ptrBestMSA->ToFASTAFile(fileOut);
-
- fprintf(stderr, "Current alignment saved to \"%s\".\n", pstrOutputFileName);
- Log("Current alignment saved to \"%s\".\n", pstrOutputFileName);
- }
-
-void CheckMaxTime()
- {
- if (0 == g_ulMaxSecs)
- return;
-
- time_t Now = time(0);
- time_t ElapsedSecs = Now - GetStartTime();
- if (ElapsedSecs <= (time_t) g_ulMaxSecs)
- return;
-
- Log("Max time %s exceeded, elapsed seconds = %ul\n",
- MaxSecsToStr(), ElapsedSecs);
-
- SaveCurrentAlignment();
- exit(EXIT_Success);
- }
Deleted: trunk/packages/muscle/trunk/scoregaps.cpp
===================================================================
--- trunk/packages/muscle/trunk/scoregaps.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/scoregaps.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,201 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "objscore.h"
-
-#define TRACE 0
-
-struct GAPINFO
- {
- GAPINFO *Next;
- unsigned Start;
- unsigned End;
- };
-
-static GAPINFO **g_Gaps;
-static GAPINFO *g_FreeList;
-static unsigned g_MaxSeqCount;
-static unsigned g_MaxColCount;
-static unsigned g_ColCount;
-static bool *g_ColDiff;
-
-static GAPINFO *NewGapInfo()
- {
- if (0 == g_FreeList)
- {
- const int NEWCOUNT = 256;
- GAPINFO *NewList = new GAPINFO[NEWCOUNT];
- g_FreeList = &NewList[0];
- for (int i = 0; i < NEWCOUNT-1; ++i)
- NewList[i].Next = &NewList[i+1];
- NewList[NEWCOUNT-1].Next = 0;
- }
- GAPINFO *GI = g_FreeList;
- g_FreeList = g_FreeList->Next;
- return GI;
- }
-
-static void FreeGapInfo(GAPINFO *GI)
- {
- GI->Next = g_FreeList;
- g_FreeList = GI;
- }
-
-// TODO: This could be much faster, no need to look
-// at all columns.
-static void FindIntersectingGaps(const MSA &msa, unsigned SeqIndex)
- {
- const unsigned ColCount = msa.GetColCount();
- bool InGap = false;
- bool Intersects = false;
- unsigned Start = uInsane;
- for (unsigned Col = 0; Col <= ColCount; ++Col)
- {
- bool Gap = ((Col != ColCount) && msa.IsGap(SeqIndex, Col));
- if (Gap)
- {
- if (!InGap)
- {
- InGap = true;
- Start = Col;
- }
- if (g_ColDiff[Col])
- Intersects = true;
- }
- else if (InGap)
- {
- InGap = false;
- if (Intersects)
- {
- GAPINFO *GI = NewGapInfo();
- GI->Start = Start;
- GI->End = Col - 1;
- GI->Next = g_Gaps[SeqIndex];
- g_Gaps[SeqIndex] = GI;
- }
- Intersects = false;
- }
- }
- }
-
-static SCORE Penalty(unsigned Length, bool Term)
- {
- if (0 == Length)
- return 0;
- SCORE s1 = g_scoreGapOpen + g_scoreGapExtend*(Length - 1);
-#if DOUBLE_AFFINE
- SCORE s2 = g_scoreGapOpen2 + g_scoreGapExtend2*(Length - 1);
- if (s1 > s2)
- return s1;
- return s2;
-#else
- return s1;
-#endif
- }
-
-//static SCORE ScorePair(unsigned Seq1, unsigned Seq2)
-// {
-//#if TRACE
-// {
-// Log("ScorePair(%d,%d)\n", Seq1, Seq2);
-// Log("Gaps seq 1: ");
-// for (GAPINFO *GI = g_Gaps[Seq1]; GI; GI = GI->Next)
-// Log(" %d-%d", GI->Start, GI->End);
-// Log("\n");
-// Log("Gaps seq 2: ");
-// for (GAPINFO *GI = g_Gaps[Seq2]; GI; GI = GI->Next)
-// Log(" %d-%d", GI->Start, GI->End);
-// Log("\n");
-// }
-//#endif
-// return 0;
-// }
-
-SCORE ScoreGaps(const MSA &msa, const unsigned DiffCols[], unsigned DiffColCount)
- {
-#if TRACE
- {
- Log("ScoreGaps\n");
- Log("DiffCols ");
- for (unsigned i = 0; i < DiffColCount; ++i)
- Log(" %u", DiffCols[i]);
- Log("\n");
- Log("msa=\n");
- msa.LogMe();
- Log("\n");
- }
-#endif
- const unsigned SeqCount = msa.GetSeqCount();
- const unsigned ColCount = msa.GetColCount();
- g_ColCount = ColCount;
-
- if (SeqCount > g_MaxSeqCount)
- {
- delete[] g_Gaps;
- g_MaxSeqCount = SeqCount + 256;
- g_Gaps = new GAPINFO *[g_MaxSeqCount];
- }
- memset(g_Gaps, 0, SeqCount*sizeof(GAPINFO *));
-
- if (ColCount > g_MaxColCount)
- {
- delete[] g_ColDiff;
- g_MaxColCount = ColCount + 256;
- g_ColDiff = new bool[g_MaxColCount];
- }
-
- memset(g_ColDiff, 0, g_ColCount*sizeof(bool));
- for (unsigned i = 0; i < DiffColCount; ++i)
- {
- unsigned Col = DiffCols[i];
- assert(Col < ColCount);
- g_ColDiff[Col] = true;
- }
-
- for (unsigned SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
- FindIntersectingGaps(msa, SeqIndex);
-
-#if TRACE
- {
- Log("\n");
- Log("Intersecting gaps:\n");
- Log(" ");
- for (unsigned Col = 0; Col < ColCount; ++Col)
- Log("%c", g_ColDiff[Col] ? '*' : ' ');
- Log("\n");
- Log(" ");
- for (unsigned Col = 0; Col < ColCount; ++Col)
- Log("%d", Col%10);
- Log("\n");
- for (unsigned Seq = 0; Seq < SeqCount; ++Seq)
- {
- Log("%3d: ", Seq);
- for (unsigned Col = 0; Col < ColCount; ++Col)
- Log("%c", msa.GetChar(Seq, Col));
- Log(" :: ");
- for (GAPINFO *GI = g_Gaps[Seq]; GI; GI = GI->Next)
- Log(" (%d,%d)", GI->Start, GI->End);
- Log(" >%s\n", msa.GetSeqName(Seq));
- }
- Log("\n");
- }
-#endif
-
- SCORE Score = 0;
- for (unsigned Seq1 = 0; Seq1 < SeqCount; ++Seq1)
- {
- const WEIGHT w1 = msa.GetSeqWeight(Seq1);
- for (unsigned Seq2 = Seq1 + 1; Seq2 < SeqCount; ++Seq2)
- {
- const WEIGHT w2 = msa.GetSeqWeight(Seq2);
-// const SCORE Pair = ScorePair(Seq1, Seq2);
- const SCORE Pair = ScoreSeqPairGaps(msa, Seq1, msa, Seq2);
- Score += w1*w2*Pair;
-#if TRACE
- Log("Seq1=%u Seq2=%u ScorePair=%.4g w1=%.4g w2=%.4g Sum=%.4g\n",
- Seq1, Seq2, Pair, w1, w2, Score);
-#endif
- }
- }
-
- return Score;
- }
Deleted: trunk/packages/muscle/trunk/scorehistory.cpp
===================================================================
--- trunk/packages/muscle/trunk/scorehistory.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/scorehistory.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,101 +0,0 @@
-#include "muscle.h"
-#include "scorehistory.h"
-#include <stdio.h>
-
-#define TRACE 0
-
-ScoreHistory::ScoreHistory(unsigned uIters, unsigned uNodeCount)
- {
- m_uNodeCount = uNodeCount;
- m_uIters = uIters;
-
- m_Score = new SCORE *[uIters];
- m_bScoreSet = new bool *[uIters];
- for (unsigned n = 0; n < uIters; ++n)
- {
- m_Score[n] = new SCORE[uNodeCount*2];
- m_bScoreSet[n] = new bool[uNodeCount*2];
- memset(m_bScoreSet[n], 0, uNodeCount*2*sizeof(bool));
- }
- }
-
-ScoreHistory::~ScoreHistory()
- {
- for (unsigned n = 0; n < m_uIters; ++n)
- {
- delete[] m_Score[n];
- delete[] m_bScoreSet[n];
- }
- delete[] m_Score;
- delete[] m_bScoreSet;
- }
-
-bool ScoreHistory::SetScore(unsigned uIter, unsigned uNodeIndex, bool bRight, SCORE Score)
- {
-#if TRACE
- Log("ScoreHistory::SetScore(Iter=%u Node=%u Right=%d Score=%g)\n",
- uIter, uNodeIndex, bRight, Score);
-#endif
- if (uIter >= m_uIters)
- Quit("ScoreHistory::SetScore-1");
- if (uNodeIndex >= m_uNodeCount)
- Quit("ScoreHistory::SetScore-2");
-
- const unsigned uIndex = uNodeIndex*2 + bRight;
- for (unsigned n = 1; n < uIter; ++n)
- {
- const unsigned uPrevIter = n - 1;
- if (!m_bScoreSet[uPrevIter][uIndex])
- {
- LogMe();
- Quit("ScoreHistory::SetScore-3");
- }
- if (m_Score[uPrevIter][uIndex] == Score)
- {
- ProgressStepsDone();
-#if TRACE
- Log("Oscillating\n");
-#endif
- return true;
- }
- }
- m_Score[uIter][uIndex] = Score;
- m_bScoreSet[uIter][uIndex] = true;
- return false;
- }
-
-void ScoreHistory::LogMe() const
- {
- Log("ScoreHistory\n");
- Log("Iter Node Right Score\n");
- Log("---- ---- ----- ---------\n");
- for (unsigned uIter = 0; uIter < m_uIters; ++uIter)
- {
- bool bAnySet = false;
- for (unsigned n = 0; n < m_uNodeCount*2; ++n)
- if (m_bScoreSet[uIter][n])
- {
- bAnySet = true;
- break;
- }
- if (!bAnySet)
- return;
- for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
- {
- const unsigned uBase = 2*uNodeIndex;
- if (m_bScoreSet[uIter][uBase])
- Log("%4u %4u F %9.3f\n", uIter, uNodeIndex, m_Score[uIter][uBase]);
- if (m_bScoreSet[uIter][uBase+1])
- Log("%4u %4u T %9.3f\n", uIter, uNodeIndex, m_Score[uIter][uBase+1]);
- }
- }
- }
-
-SCORE ScoreHistory::GetScore(unsigned uIter, unsigned uNodeIndex,
- bool bReverse, bool bRight) const
- {
- const unsigned uIndex = uNodeIndex*2 + bRight;
- if (!m_bScoreSet[uIter][uIndex])
- Quit("ScoreHistory::GetScore");
- return m_Score[uIter][uIndex];
- }
Deleted: trunk/packages/muscle/trunk/scorehistory.h
===================================================================
--- trunk/packages/muscle/trunk/scorehistory.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/scorehistory.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,21 +0,0 @@
-#ifndef ScoreHistory_h
-#define ScoreHistory_h
-
-class ScoreHistory
- {
-public:
- ScoreHistory(unsigned uIters, unsigned uInternalNodeCount);
- ~ScoreHistory();
- bool SetScore(unsigned uIter, unsigned uInternalNodeIndex, bool bRight, SCORE Score);
- void LogMe() const;
- SCORE GetScore(unsigned uIter, unsigned uInternalNodeIndex, bool bReversed,
- bool bRight) const;
-
-private:
- SCORE **m_Score;
- bool **m_bScoreSet;
- unsigned m_uIters;
- unsigned m_uNodeCount;
- };
-
-#endif // ScoreHistory_h
Deleted: trunk/packages/muscle/trunk/scorepp.cpp
===================================================================
--- trunk/packages/muscle/trunk/scorepp.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/scorepp.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,104 +0,0 @@
-#include "muscle.h"
-#include "profile.h"
-
-char ConsensusChar(const ProfPos &PP)
- {
- unsigned uMostCommonLetter = 0;
- FCOUNT fcMostCommon = PP.m_fcCounts[0];
- bool bMoreThanOneLetter = false;
- bool bAnyLetter = false;
- for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
- {
- const FCOUNT fc = PP.m_fcCounts[uLetter];
- if (fc > 0)
- {
- if (bAnyLetter)
- bMoreThanOneLetter = true;
- bAnyLetter = true;
- }
- if (fc > fcMostCommon)
- {
- uMostCommonLetter = uLetter;
- fcMostCommon = fc;
- }
- }
- if (!bAnyLetter)
- return '-';
- char c = LetterToChar(uMostCommonLetter);
- if (bMoreThanOneLetter)
- return UnalignChar(c);
- return c;
- }
-
-SCORE ScoreProfPos2LA(const ProfPos &PPA, const ProfPos &PPB)
- {
- SCORE Score = 0;
- for (unsigned n = 0; n < 20; ++n)
- {
- const unsigned uLetter = PPA.m_uSortOrder[n];
- const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];
- if (0 == fcLetter)
- break;
- Score += fcLetter*PPB.m_AAScores[uLetter];
- }
- if (0 == Score)
- return -2.5;
- SCORE logScore = logf(Score);
- return (SCORE) ((logScore - g_scoreCenter)*(PPA.m_fOcc * PPB.m_fOcc));
- }
-
-SCORE ScoreProfPos2NS(const ProfPos &PPA, const ProfPos &PPB)
- {
- SCORE Score = 0;
- for (unsigned n = 0; n < 20; ++n)
- {
- const unsigned uLetter = PPA.m_uSortOrder[n];
- const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];
- if (0 == fcLetter)
- break;
- Score += fcLetter*PPB.m_AAScores[uLetter];
- }
- return Score - g_scoreCenter;
- }
-
-SCORE ScoreProfPos2SP(const ProfPos &PPA, const ProfPos &PPB)
- {
- SCORE Score = 0;
- for (unsigned n = 0; n < 20; ++n)
- {
- const unsigned uLetter = PPA.m_uSortOrder[n];
- const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];
- if (0 == fcLetter)
- break;
- Score += fcLetter*PPB.m_AAScores[uLetter];
- }
- return Score - g_scoreCenter;
- }
-
-SCORE ScoreProfPos2SPN(const ProfPos &PPA, const ProfPos &PPB)
- {
- SCORE Score = 0;
- for (unsigned n = 0; n < 4; ++n)
- {
- const unsigned uLetter = PPA.m_uSortOrder[n];
- const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];
- if (0 == fcLetter)
- break;
- Score += fcLetter*PPB.m_AAScores[uLetter];
- }
- return Score - g_scoreCenter;
- }
-
-SCORE ScoreProfPos2(const ProfPos &PPA, const ProfPos &PPB)
- {
- if (PPSCORE_SP == g_PPScore)
- return ScoreProfPos2NS(PPA, PPB);
- else if (PPSCORE_LE == g_PPScore)
- return ScoreProfPos2LA(PPA, PPB);
- else if (PPSCORE_SV == g_PPScore)
- return ScoreProfPos2SP(PPA, PPB);
- else if (PPSCORE_SPN == g_PPScore)
- return ScoreProfPos2SPN(PPA, PPB);
- Quit("Invalid g_PPScore");
- return 0;
- }
Deleted: trunk/packages/muscle/trunk/seq.cpp
===================================================================
--- trunk/packages/muscle/trunk/seq.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/seq.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,342 +0,0 @@
-#include "muscle.h"
-#include "seq.h"
-#include "textfile.h"
-#include "msa.h"
-//#include <ctype.h>
-
-const size_t MAX_FASTA_LINE = 16000;
-
-void Seq::SetName(const char *ptrName)
- {
- delete[] m_ptrName;
- size_t n = strlen(ptrName) + 1;
- m_ptrName = new char[n];
- strcpy(m_ptrName, ptrName);
- }
-
-void Seq::ToFASTAFile(TextFile &File) const
- {
- File.PutFormat(">%s\n", m_ptrName);
- unsigned uColCount = Length();
- for (unsigned n = 0; n < uColCount; ++n)
- {
- if (n > 0 && n%60 == 0)
- File.PutString("\n");
- File.PutChar(at(n));
- }
- File.PutString("\n");
- }
-
-// Return true on end-of-file
-bool Seq::FromFASTAFile(TextFile &File)
- {
- Clear();
-
- char szLine[MAX_FASTA_LINE];
- bool bEof = File.GetLine(szLine, sizeof(szLine));
- if (bEof)
- return true;
- if ('>' != szLine[0])
- Quit("Expecting '>' in FASTA file %s line %u",
- File.GetFileName(), File.GetLineNr());
-
- size_t n = strlen(szLine);
- if (1 == n)
- Quit("Missing annotation following '>' in FASTA file %s line %u",
- File.GetFileName(), File.GetLineNr());
-
- m_ptrName = new char[n];
- strcpy(m_ptrName, szLine + 1);
-
- TEXTFILEPOS Pos = File.GetPos();
- for (;;)
- {
- bEof = File.GetLine(szLine, sizeof(szLine));
- if (bEof)
- {
- if (0 == size())
- {
- Quit("Empty sequence in FASTA file %s line %u",
- File.GetFileName(), File.GetLineNr());
- return true;
- }
- return false;
- }
- if ('>' == szLine[0])
- {
- if (0 == size())
- Quit("Empty sequence in FASTA file %s line %u",
- File.GetFileName(), File.GetLineNr());
- // Rewind to beginning of this line, it's the start of the
- // next sequence.
- File.SetPos(Pos);
- return false;
- }
- const char *ptrChar = szLine;
- while (char c = *ptrChar++)
- {
- if (isspace(c))
- continue;
- if (IsGapChar(c))
- continue;
- if (!IsResidueChar(c))
- {
- if (isprint(c))
- {
- char w = GetWildcardChar();
- Warning("Invalid residue '%c' in FASTA file %s line %d, replaced by '%c'",
- c, File.GetFileName(), File.GetLineNr(), w);
- c = w;
- }
- else
- Quit("Invalid byte hex %02x in FASTA file %s line %d",
- (unsigned char) c, File.GetFileName(), File.GetLineNr());
- }
- c = toupper(c);
- push_back(c);
- }
- Pos = File.GetPos();
- }
- }
-
-void Seq::ExtractUngapped(MSA &msa) const
- {
- msa.Clear();
- unsigned uColCount = Length();
- msa.SetSize(1, 1);
- unsigned uUngappedPos = 0;
- for (unsigned n = 0; n < uColCount; ++n)
- {
- char c = at(n);
- if (!IsGapChar(c))
- msa.SetChar(0, uUngappedPos++, c);
- }
- msa.SetSeqName(0, m_ptrName);
- }
-
-void Seq::Copy(const Seq &rhs)
- {
- clear();
- const unsigned uLength = rhs.Length();
- for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)
- push_back(rhs.at(uColIndex));
- const char *ptrName = rhs.GetName();
- size_t n = strlen(ptrName) + 1;
- m_ptrName = new char[n];
- strcpy(m_ptrName, ptrName);
- SetId(rhs.GetId());
- }
-
-void Seq::CopyReversed(const Seq &rhs)
- {
- clear();
- const unsigned uLength = rhs.Length();
- const unsigned uBase = rhs.Length() - 1;
- for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)
- push_back(rhs.at(uBase - uColIndex));
- const char *ptrName = rhs.GetName();
- size_t n = strlen(ptrName) + 1;
- m_ptrName = new char[n];
- strcpy(m_ptrName, ptrName);
- }
-
-void Seq::StripGaps()
- {
- for (CharVect::iterator p = begin(); p != end(); )
- {
- char c = *p;
- if (IsGapChar(c))
- erase(p);
- else
- ++p;
- }
- }
-
-void Seq::StripGapsAndWhitespace()
- {
- for (CharVect::iterator p = begin(); p != end(); )
- {
- char c = *p;
- if (isspace(c) || IsGapChar(c))
- erase(p);
- else
- ++p;
- }
- }
-
-void Seq::ToUpper()
- {
- for (CharVect::iterator p = begin(); p != end(); ++p)
- {
- char c = *p;
- if (islower(c))
- *p = toupper(c);
- }
- }
-
-unsigned Seq::GetLetter(unsigned uIndex) const
- {
- assert(uIndex < Length());
- char c = operator[](uIndex);
- return CharToLetter(c);
- }
-
-bool Seq::EqIgnoreCase(const Seq &s) const
- {
- const unsigned n = Length();
- if (n != s.Length())
- return false;
- for (unsigned i = 0; i < n; ++i)
- {
- const char c1 = at(i);
- const char c2 = s.at(i);
- if (IsGapChar(c1))
- {
- if (!IsGapChar(c2))
- return false;
- }
- else
- {
- if (toupper(c1) != toupper(c2))
- return false;
- }
- }
- return true;
- }
-
-bool Seq::Eq(const Seq &s) const
- {
- const unsigned n = Length();
- if (n != s.Length())
- return false;
- for (unsigned i = 0; i < n; ++i)
- {
- const char c1 = at(i);
- const char c2 = s.at(i);
- if (c1 != c2)
- return false;
- }
- return true;
- }
-
-bool Seq::EqIgnoreCaseAndGaps(const Seq &s) const
- {
- const unsigned uThisLength = Length();
- const unsigned uOtherLength = s.Length();
-
- unsigned uThisPos = 0;
- unsigned uOtherPos = 0;
-
- int cThis;
- int cOther;
- for (;;)
- {
- if (uThisPos == uThisLength && uOtherPos == uOtherLength)
- break;
-
- // Set cThis to next non-gap character in this string
- // or -1 if end-of-string.
- for (;;)
- {
- if (uThisPos == uThisLength)
- {
- cThis = -1;
- break;
- }
- else
- {
- cThis = at(uThisPos);
- ++uThisPos;
- if (!IsGapChar(cThis))
- {
- cThis = toupper(cThis);
- break;
- }
- }
- }
-
- // Set cOther to next non-gap character in s
- // or -1 if end-of-string.
- for (;;)
- {
- if (uOtherPos == uOtherLength)
- {
- cOther = -1;
- break;
- }
- else
- {
- cOther = s.at(uOtherPos);
- ++uOtherPos;
- if (!IsGapChar(cOther))
- {
- cOther = toupper(cOther);
- break;
- }
- }
- }
-
- // Compare characters are corresponding ungapped position
- if (cThis != cOther)
- return false;
- }
- return true;
- }
-
-unsigned Seq::GetUngappedLength() const
- {
- unsigned uUngappedLength = 0;
- for (CharVect::const_iterator p = begin(); p != end(); ++p)
- {
- char c = *p;
- if (!IsGapChar(c))
- ++uUngappedLength;
- }
- return uUngappedLength;
- }
-
-void Seq::LogMe() const
- {
- Log(">%s\n", m_ptrName);
- const unsigned n = Length();
- for (unsigned i = 0; i < n; ++i)
- Log("%c", at(i));
- Log("\n");
- }
-
-void Seq::FromString(const char *pstrSeq, const char *pstrName)
- {
- clear();
- const unsigned uLength = (unsigned) strlen(pstrSeq);
- for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)
- push_back(pstrSeq[uColIndex]);
- size_t n = strlen(pstrName) + 1;
- m_ptrName = new char[n];
- strcpy(m_ptrName, pstrName);
- }
-
-bool Seq::HasGap() const
- {
- for (CharVect::const_iterator p = begin(); p != end(); ++p)
- {
- char c = *p;
- if (IsGapChar(c))
- return true;
- }
- return false;
- }
-
-void Seq::FixAlpha()
- {
- for (CharVect::iterator p = begin(); p != end(); ++p)
- {
- char c = *p;
- if (!IsResidueChar(c))
- {
- char w = GetWildcardChar();
- // Warning("Invalid residue '%c', replaced by '%c'", c, w);
- InvalidLetterWarning(c, w);
- *p = w;
- }
- }
- }
Deleted: trunk/packages/muscle/trunk/seq.h
===================================================================
--- trunk/packages/muscle/trunk/seq.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/seq.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,85 +0,0 @@
-#ifndef Seq_h
-#define Seq_h
-
-#include <vector>
-
-class TextFile;
-class MSA;
-
-typedef std::vector<char> CharVect;
-
-class Seq : public CharVect
- {
-public:
- Seq()
- {
- m_ptrName = 0;
- // Start with moderate size to avoid
- // thrashing the heap.
- reserve(200);
- }
- virtual ~Seq()
- {
- delete[] m_ptrName;
- }
-
-private:
-// Not implemented; prevent use of copy c'tor and assignment.
- Seq(const Seq &);
- Seq &operator=(const Seq &);
-
-public:
- void Clear()
- {
- clear();
- delete[] m_ptrName;
- m_ptrName = 0;
- m_uId = uInsane;
- }
- const char *GetName() const
- {
- return m_ptrName;
- }
- unsigned GetId() const
- {
- if (uInsane == m_uId)
- Quit("Seq::GetId, id not set");
- return m_uId;
- }
- void SetId(unsigned uId) { m_uId = uId; }
-
- bool FromFASTAFile(TextFile &File);
- void ToFASTAFile(TextFile &File) const;
- void ExtractUngapped(MSA &msa) const;
-
- void FromString(const char *pstrSeq, const char *pstrName);
- void Copy(const Seq &rhs);
- void CopyReversed(const Seq &rhs);
- void StripGaps();
- void StripGapsAndWhitespace();
- void ToUpper();
- void SetName(const char *ptrName);
- unsigned GetLetter(unsigned uIndex) const;
- unsigned Length() const { return (unsigned) size(); }
- bool Eq(const Seq &s) const;
- bool EqIgnoreCase(const Seq &s) const;
- bool EqIgnoreCaseAndGaps(const Seq &s) const;
- bool HasGap() const;
- unsigned GetUngappedLength() const;
- void LogMe() const;
- char GetChar(unsigned uIndex) const { return operator[](uIndex); }
- void SetChar(unsigned uIndex, char c) { operator[](uIndex) = c; }
- void AppendChar(char c) { push_back(c); }
- void FixAlpha();
-
-#ifndef _WIN32
- reference at(size_type i) { return operator[](i); }
- const_reference at(size_type i) const { return operator[](i); }
-#endif
-
-private:
- char *m_ptrName;
- unsigned m_uId;
- };
-
-#endif // Seq.h
Deleted: trunk/packages/muscle/trunk/seqvect.cpp
===================================================================
--- trunk/packages/muscle/trunk/seqvect.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/seqvect.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,290 +0,0 @@
-#include "muscle.h"
-#include "seqvect.h"
-#include "textfile.h"
-#include "msa.h"
-
-const size_t MAX_FASTA_LINE = 16000;
-
-SeqVect::~SeqVect()
- {
- Clear();
- }
-
-void SeqVect::Clear()
- {
- for (size_t n = 0; n < size(); ++n)
- delete (*this)[n];
- }
-
-void SeqVect::ToFASTAFile(TextFile &File) const
- {
- unsigned uSeqCount = Length();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- Seq *ptrSeq = at(uSeqIndex);
- ptrSeq->ToFASTAFile(File);
- }
- }
-
-void SeqVect::FromFASTAFile(TextFile &File)
- {
- Clear();
-
- FILE *f = File.GetStdioFile();
- for (;;)
- {
- char *Label;
- unsigned uLength;
- char *SeqData = GetFastaSeq(f, &uLength, &Label);
- if (0 == SeqData)
- return;
- Seq *ptrSeq = new Seq;
-
- for (unsigned i = 0; i < uLength; ++i)
- {
- char c = SeqData[i];
- ptrSeq->push_back(c);
- }
-
- ptrSeq->SetName(Label);
- push_back(ptrSeq);
-
- delete[] SeqData;
- delete[] Label;
- }
- }
-
-void SeqVect::PadToMSA(MSA &msa)
- {
- unsigned uSeqCount = Length();
- if (0 == uSeqCount)
- {
- msa.Clear();
- return;
- }
-
- unsigned uLongestSeqLength = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- Seq *ptrSeq = at(uSeqIndex);
- unsigned uColCount = ptrSeq->Length();
- if (uColCount > uLongestSeqLength)
- uLongestSeqLength = uColCount;
- }
- msa.SetSize(uSeqCount, uLongestSeqLength);
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- Seq *ptrSeq = at(uSeqIndex);
- msa.SetSeqName(uSeqIndex, ptrSeq->GetName());
- unsigned uColCount = ptrSeq->Length();
- unsigned uColIndex;
- for (uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- char c = ptrSeq->at(uColIndex);
- msa.SetChar(uSeqIndex, uColIndex, c);
- }
- while (uColIndex < uLongestSeqLength)
- msa.SetChar(uSeqIndex, uColIndex++, '.');
- }
- }
-
-void SeqVect::Copy(const SeqVect &rhs)
- {
- clear();
- unsigned uSeqCount = rhs.Length();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- Seq *ptrSeq = rhs.at(uSeqIndex);
- Seq *ptrSeqCopy = new Seq;
- ptrSeqCopy->Copy(*ptrSeq);
- push_back(ptrSeqCopy);
- }
- }
-
-void SeqVect::StripGaps()
- {
- unsigned uSeqCount = Length();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- Seq *ptrSeq = at(uSeqIndex);
- ptrSeq->StripGaps();
- }
- }
-
-void SeqVect::StripGapsAndWhitespace()
- {
- unsigned uSeqCount = Length();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- Seq *ptrSeq = at(uSeqIndex);
- ptrSeq->StripGapsAndWhitespace();
- }
- }
-
-void SeqVect::ToUpper()
- {
- unsigned uSeqCount = Length();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- Seq *ptrSeq = at(uSeqIndex);
- ptrSeq->ToUpper();
- }
- }
-
-bool SeqVect::FindName(const char *ptrName, unsigned *ptruIndex) const
- {
- unsigned uSeqCount = Length();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- const Seq *ptrSeq = at(uSeqIndex);
- if (0 == stricmp(ptrSeq->GetName(), ptrName))
- {
- *ptruIndex = uSeqIndex;
- return true;
- }
- }
- return false;
- }
-
-void SeqVect::AppendSeq(const Seq &s)
- {
- Seq *ptrSeqCopy = new Seq;
- ptrSeqCopy->Copy(s);
- push_back(ptrSeqCopy);
- }
-
-void SeqVect::LogMe() const
- {
- unsigned uSeqCount = Length();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- const Seq *ptrSeq = at(uSeqIndex);
- ptrSeq->LogMe();
- }
- }
-
-const char *SeqVect::GetSeqName(unsigned uSeqIndex) const
- {
- assert(uSeqIndex < size());
- const Seq *ptrSeq = at(uSeqIndex);
- return ptrSeq->GetName();
- }
-
-unsigned SeqVect::GetSeqId(unsigned uSeqIndex) const
- {
- assert(uSeqIndex < size());
- const Seq *ptrSeq = at(uSeqIndex);
- return ptrSeq->GetId();
- }
-
-unsigned SeqVect::GetSeqIdFromName(const char *Name) const
- {
- const unsigned uSeqCount = GetSeqCount();
- for (unsigned i = 0; i < uSeqCount; ++i)
- {
- if (!strcmp(Name, GetSeqName(i)))
- return GetSeqId(i);
- }
- Quit("SeqVect::GetSeqIdFromName(%s): not found", Name);
- return 0;
- }
-
-Seq &SeqVect::GetSeqById(unsigned uId)
- {
- const unsigned uSeqCount = GetSeqCount();
- for (unsigned i = 0; i < uSeqCount; ++i)
- {
- if (GetSeqId(i) == uId)
- return GetSeq(i);
- }
- Quit("SeqVect::GetSeqIdByUd(%d): not found", uId);
- return (Seq &) *((Seq *) 0);
- }
-
-unsigned SeqVect::GetSeqLength(unsigned uSeqIndex) const
- {
- assert(uSeqIndex < size());
- const Seq *ptrSeq = at(uSeqIndex);
- return ptrSeq->Length();
- }
-
-Seq &SeqVect::GetSeq(unsigned uSeqIndex)
- {
- assert(uSeqIndex < size());
- return *at(uSeqIndex);
- }
-
-const Seq &SeqVect::GetSeq(unsigned uSeqIndex) const
- {
- assert(uSeqIndex < size());
- return *at(uSeqIndex);
- }
-
-void SeqVect::SetSeqId(unsigned uSeqIndex, unsigned uId)
- {
- assert(uSeqIndex < size());
- Seq *ptrSeq = at(uSeqIndex);
- return ptrSeq->SetId(uId);
- }
-
-ALPHA SeqVect::GuessAlpha() const
- {
-// If at least MIN_NUCLEO_PCT of the first CHAR_COUNT non-gap
-// letters belong to the nucleotide alphabet, guess nucleo.
-// Otherwise amino.
- const unsigned CHAR_COUNT = 100;
- const unsigned MIN_NUCLEO_PCT = 95;
-
- const unsigned uSeqCount = GetSeqCount();
- if (0 == uSeqCount)
- return ALPHA_Amino;
-
- unsigned uSeqIndex = 0;
- unsigned uPos = 0;
- unsigned uSeqLength = GetSeqLength(0);
- unsigned uDNACount = 0;
- unsigned uRNACount = 0;
- unsigned uTotal = 0;
- const Seq *ptrSeq = &GetSeq(0);
- for (;;)
- {
- while (uPos >= uSeqLength)
- {
- ++uSeqIndex;
- if (uSeqIndex >= uSeqCount)
- break;
- ptrSeq = &GetSeq(uSeqIndex);
- uSeqLength = ptrSeq->Length();
- uPos = 0;
- }
- if (uSeqIndex >= uSeqCount)
- break;
- char c = ptrSeq->at(uPos++);
- if (IsGapChar(c))
- continue;
- if (IsDNA(c))
- ++uDNACount;
- if (IsRNA(c))
- ++uRNACount;
- ++uTotal;
- if (uTotal >= CHAR_COUNT)
- break;
- }
- if (uTotal != 0 && ((uDNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
- return ALPHA_DNA;
- if (uTotal != 0 && ((uRNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
- return ALPHA_RNA;
- return ALPHA_Amino;
- }
-
-void SeqVect::FixAlpha()
- {
- ClearInvalidLetterWarning();
- unsigned uSeqCount = Length();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- Seq *ptrSeq = at(uSeqIndex);
- ptrSeq->FixAlpha();
- }
- ReportInvalidLetters();
- }
Deleted: trunk/packages/muscle/trunk/seqvect.h
===================================================================
--- trunk/packages/muscle/trunk/seqvect.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/seqvect.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,63 +0,0 @@
-#ifndef SeqVect_h
-#define SeqVect_h
-
-#include <vector>
-#include "seq.h"
-
-typedef std::vector<Seq *> SeqVectBase;
-
-class SeqVect : public SeqVectBase
- {
-public:
- SeqVect() {}
- virtual ~SeqVect();
-
-private:
-// Not implemented; prevent use of copy c'tor and assignment.
- SeqVect(const SeqVect &);
- SeqVect &operator=(const SeqVect &);
-
-public:
- void FromFile(TextFile &File)
- {
- FromFASTAFile(File);
- }
-
- void FromFASTAFile(TextFile &File);
- void ToFASTAFile(TextFile &File) const;
-
- void ToFile(TextFile &File) const
- {
- ToFASTAFile(File);
- }
-
- void PadToMSA(MSA &msa);
- void Copy(const SeqVect &rhs);
- void StripGaps();
- void StripGapsAndWhitespace();
- void ToUpper();
- void Clear();
- unsigned Length() const { return (unsigned) size(); }
- unsigned GetSeqCount() const { return (unsigned) size(); }
- void AppendSeq(const Seq &s);
- bool FindName(const char *ptrName, unsigned *ptruIndex) const;
- void LogMe() const;
- const char *GetSeqName(unsigned uSeqIndex) const;
- unsigned GetSeqId(unsigned uSeqIndex) const;
- unsigned GetSeqIdFromName(const char *Name) const;
- unsigned GetSeqLength(unsigned uSeqIndex) const;
- void SetSeqId(unsigned uSeqIndex, unsigned uId);
- Seq &GetSeq(unsigned uIndex);
- Seq &GetSeqById(unsigned uId);
- const Seq &GetSeq(unsigned uIndex) const;
-
- ALPHA GuessAlpha() const;
- void FixAlpha();
-
-#ifndef _WIN32
- reference at(size_type i) { return operator[](i); }
- const_reference at(size_type i) const { return operator[](i); }
-#endif
- };
-
-#endif // SeqVect_h
Deleted: trunk/packages/muscle/trunk/setblosumweights.cpp
===================================================================
--- trunk/packages/muscle/trunk/setblosumweights.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/setblosumweights.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,131 +0,0 @@
-/***
-Code for implementing HMMer's "BLOSUM weighting" algorithm.
-
-The algorithm was deduced by reverse-engineering the HMMer code.
-
-The HMMer documentation refers to BLOSUM weighting as "Henikoff
-simple filter weighting"
-
-The name BLOSUM implied to me that HMMer would be using a
-substitution probability matrix to compute distances, but this
-turned out not to be the case.
-
-It is notable, not to say puzzling, that the HMMer BLOSUM weighting
-algorithm is guaranteed to produce an integral NIC (number-of-indepdent-
-counts, also known as effective sequence count). Presumably Eddy must
-have known this, though he doesn't comment on it and he computes & stores
-the value in a float.
-
-Here's the algorithm:
-
-Distances between two sequences are based on the average of a simple
-binary equal (one) / not equal (zero) at each position. The only thing
-that has anything to do with BLOSUM in this calculation is an obscure
-(to me) constant value of 0.62. The sequences are clustered using this
-distance. If the pairwise identity (fraction of identical positions)
-is less than 0.62, they get assigned to disjoint clusters, the final
-number of disjoint clusters is the NIC. This makes some intuitive sense:
-I would interpret this by saying that if a set of sequences are close
-enough they count as one sequence. The weight for each sequence within a
-disjoint cluster is then determined to be 1 / (clustersize), from which it
-follows that the sum of all weights is equal to the number of disjoint
-clusters and is thus guaranteed to be an integer value. It is exactly this
-sum that HMMer uses for the NIC, by default.
-
-The individual BLOSUM sequence weights are not used for anything else in
-HMMer, unless you specify that BLOSUM weighting should override the default
-GSC weighting. GSC weighting uses a different clustering algorithm to
-determine relative weights. The BLOSUM NIC is then distributed over the
-GSC tree according to those relative weights.
-***/
-
-#include "muscle.h"
-#include "msa.h"
-#include "cluster.h"
-#include "distfunc.h"
-
-// Set weights of all sequences in the subtree under given node.
-void MSA::SetBLOSUMSubtreeWeight(const ClusterNode *ptrNode, double dWeight) const
- {
- if (0 == ptrNode)
- return;
-
- const ClusterNode *ptrRight = ptrNode->GetRight();
- const ClusterNode *ptrLeft = ptrNode->GetLeft();
-
-// If leaf, set weight
- if (0 == ptrRight && 0 == ptrLeft)
- {
- unsigned uIndex = ptrNode->GetIndex();
- WEIGHT w = DoubleToWeight(dWeight);
- m_Weights[uIndex] = w;
- return;
- }
-
-// Otherwise, recursively set subtrees
- SetBLOSUMSubtreeWeight(ptrLeft, dWeight);
- SetBLOSUMSubtreeWeight(ptrRight, dWeight);
- }
-
-// Traverse a subtree looking for clusters where all
-// the leaves are sufficiently similar that they
-// should be weighted as a group, i.e. given a weight
-// of 1/N where N is the cluster size. The idea is
-// to avoid sample bias where we have closely related
-// sequences in the input alignment.
-// The weight at a node is the distance between
-// the two closest sequences in the left and right
-// subtrees under that node. "Sufficiently similar"
-// is defined as being where that minimum distance
-// is less than the dMinDist threshhold. I don't know
-// why the clustering is done using a minimum rather
-// than a maximum or average, either of which would
-// seem more natural to me.
-// Return value is number of groups under this node.
-// A "group" is the cluster found under a node with a
-// weight less than the minimum.
-unsigned MSA::SetBLOSUMNodeWeight(const ClusterNode *ptrNode, double dMinDist) const
- {
- if (0 == ptrNode)
- return 0;
-
- if (ptrNode->GetWeight() < dMinDist)
- {
- unsigned uClusterSize = ptrNode->GetClusterSize();
- assert(uClusterSize > 0);
- double dWeight = 1.0 / uClusterSize;
- SetBLOSUMSubtreeWeight(ptrNode, dWeight);
- return 1;
- }
-
- const ClusterNode *ptrLeft = ptrNode->GetLeft();
- const ClusterNode *ptrRight = ptrNode->GetRight();
-
- unsigned uLeftGroupCount = SetBLOSUMNodeWeight(ptrLeft, dMinDist);
- unsigned uRightGroupCount = SetBLOSUMNodeWeight(ptrRight, dMinDist);
-
- return uLeftGroupCount + uRightGroupCount;
- }
-
-// Return value is the group count, i.e. the effective number
-// of distinctly different sequences.
-unsigned MSA::CalcBLOSUMWeights(ClusterTree &BlosumCluster) const
- {
-// Build distance matrix
- DistFunc DF;
- unsigned uSeqCount = GetSeqCount();
- DF.SetCount(uSeqCount);
- for (unsigned i = 0; i < uSeqCount; ++i)
- for (unsigned j = i+1; j < uSeqCount; ++j)
- {
- double dDist = GetPctIdentityPair(i, j);
- assert(dDist >= 0.0 && dDist <= 1.0);
- DF.SetDist(i, j, (float) (1.0 - dDist));
- }
-
-// Cluster based on the distance function
- BlosumCluster.Create(DF);
-
-// Return value is HMMer's "effective sequence count".
- return SetBLOSUMNodeWeight(BlosumCluster.GetRoot(), 1.0 - BLOSUM_DIST);
- }
Deleted: trunk/packages/muscle/trunk/setgscweights.cpp
===================================================================
--- trunk/packages/muscle/trunk/setgscweights.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/setgscweights.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,195 +0,0 @@
-/***
-Gerstein/Sonnhammer/Chothia ad hoc sequence weighting.
-The algorithm was deduced by reverse-engineering the
-HMMer code.
-
-I used an alternative representation that I prefer over
-HMMer's. The HMMer code is full of tree manipulations
-that do something to the left child and then the equivalent
-thing to the right child. It was clear that there must be
-a re-formulation that does everything once for each node,
-which would reduce the number of operations expressed
-in the code by a factor of two. This gives a more elegant
-and less error-prone way to code it.
-
-These notes explain the correspondence between my design
-and Eddy's.
-
-HMMer stores a data structure phylo_s for each non-leaf
-node in the cluster tree. This structure contains the
-following fields:
-
- diff Weight of the node
- lblen Left branch length
- rblen Right branch length
-
-The lblen and rblen branch lengths are calculated as:
-
- this.lblen = this.diff - left.diff
- this.rblen = this.diff - right.diff
-
-My code stores one ClusterNode data structure per node
-in the cluster tree, including leaves. I store only the
-weight. I can recover the HMMer branch length fields
-in a trivial O(1) calculation as follows:
-
- lblen = Node.GetWeight() - Node.GetLeft()->GetWeight()
- rblen = Node.GetWeight() - Node.GetRight()->GetWeight()
-
-For the GSC weights calculation, HMMer constructs the
-following vectors, which have entries for all nodes,
-including leaves:
-
- lwt Left weight
- rwt Right weight
-
-The "left weight" is calculated as the sum of the weights in
-all the nodes reachable through the left branch, including
-the node itself. (This is not immediately obvious from the
-code, which does the calculation using branch lengths rather
-than weights, but this is an equivalent, and to my mind clearer,
-statement of what they are). Similarly, the "right weight" is
-the sum of all weights reachable via the right branch. I define
-the "cluster weight" to be the summed weight of all nodes in the
-subtree under the node, including the node itself. I provide
-a function Node.GetClusterWeight() which calculates the cluster
-weight using a O(ln N) recursion through the tree. The lwt and
-rwt values can be recovered as follows:
-
- lwt = Node.GetLeft()->GetClusterWeight()
- + Node.GetWeight()
-
- lwt = Node.GetLeft()->GetClusterWeight()
- + Node.GetWeight()
-
-HMMer calculates a further vector fwt as follows.
-
- this.fwt = parent.fwt * parent.lwt / (parent.lwt + parent.rwt)
-
-This applies to nodes reached via a left branch, for nodes reached
-via a right branch:
-
- this.fwt = parent.fwt * parent.rwt / (parent.lwt + parent.rwt)
-
-The values of fwt at the leaf nodes are the final GSC weights.
-We derive the various terms using our equivalents.
-
- parent.lwt = Parent.GetLeft()->GetClusterWeight()
- + Parent.GetWeight()
-
- parent.rwt = Parent.GetRight()->GetClusterWeight()
- + Parent.GetWeight()
-
- parent.lwt + parent.rwt =
- { Parent.GetLeft()->GetClusterWeight()
- + Parent.GetRight()->GetClusterWeight()
- + Parent.GetWeight() }
- + Parent.GetWeight()
-
-We recognize the term {...} as the cluster weight of the
-parent, so
-
- parent.lwt + parent.rwt
- = Parent.GetClusterWeight()
- + Parent.GetWeight()
-
-As you would expect, repeating this exercise for parent.rwt gives
-exactly the same expression.
-
-The GSC weights (fwt) are stored in the Weight2 field of the cluster
-tree, the Weight field stores the original (BLOSUM) weights used
-as input to this algorithm.
-***/
-
-#include "muscle.h"
-#include "msa.h"
-#include "cluster.h"
-#include "distfunc.h"
-
-// Set weights of all sequences in the subtree under given node.
-void MSA::SetSubtreeWeight2(const ClusterNode *ptrNode) const
- {
- if (0 == ptrNode)
- return;
-
- const ClusterNode *ptrRight = ptrNode->GetRight();
- const ClusterNode *ptrLeft = ptrNode->GetLeft();
-
-// If leaf, set weight
- if (0 == ptrRight && 0 == ptrLeft)
- {
- unsigned uIndex = ptrNode->GetIndex();
- double dWeight = ptrNode->GetWeight2();
- WEIGHT w = DoubleToWeight(dWeight);
- m_Weights[uIndex] = w;
- return;
- }
-
-// Otherwise, recursively set subtrees
- SetSubtreeWeight2(ptrLeft);
- SetSubtreeWeight2(ptrRight);
- }
-
-void MSA::SetSubtreeGSCWeight(ClusterNode *ptrNode) const
- {
- if (0 == ptrNode)
- return;
-
- ClusterNode *ptrParent = ptrNode->GetParent();
- double dParentWeight2 = ptrParent->GetWeight2();
- double dParentClusterWeight = ptrParent->GetClusterWeight();
- if (0.0 == dParentClusterWeight)
- {
- double dThisClusterSize = ptrNode->GetClusterSize();
- double dParentClusterSize = ptrParent->GetClusterSize();
- double dWeight2 =
- dParentWeight2*dThisClusterSize/dParentClusterSize;
- ptrNode->SetWeight2(dWeight2);
- }
- else
- {
- // Could cache cluster weights for better performance.
- // We calculate cluster weight of each node twice, so this
- // would give x2 improvement.
- // As weighting is not very expensive, we don't care.
- double dThisClusterWeight = ptrNode->GetClusterWeight();
- double dParentWeight = ptrParent->GetWeight();
-
- double dNum = dThisClusterWeight + dParentWeight;
- double dDenom = dParentClusterWeight + dParentWeight;
- double dWeight2 = dParentWeight2*(dNum/dDenom);
-
- ptrNode->SetWeight2(dWeight2);
- }
-
- SetSubtreeGSCWeight(ptrNode->GetLeft());
- SetSubtreeGSCWeight(ptrNode->GetRight());
- }
-
-void MSA::SetGSCWeights() const
- {
- ClusterTree CT;
- CalcBLOSUMWeights(CT);
-
-// Calculate weights and store in tree.
- ClusterNode *ptrRoot = CT.GetRoot();
- ptrRoot->SetWeight2(1.0);
- SetSubtreeGSCWeight(ptrRoot->GetLeft());
- SetSubtreeGSCWeight(ptrRoot->GetRight());
-
-// Copy weights from tree to MSA.
- SetSubtreeWeight2(ptrRoot);
- }
-
-void MSA::ListWeights() const
- {
- const unsigned uSeqCount = GetSeqCount();
- Log("Weights:\n");
- WEIGHT wTotal = 0;
- for (unsigned n = 0; n < uSeqCount; ++n)
- {
- wTotal += GetSeqWeight(n);
- Log("%6.3f %s\n", GetSeqWeight(n), GetSeqName(n));
- }
- Log("Total weights = %6.3f, should be 1.0\n", wTotal);
- }
Deleted: trunk/packages/muscle/trunk/setnewhandler.cpp
===================================================================
--- trunk/packages/muscle/trunk/setnewhandler.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/setnewhandler.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,22 +0,0 @@
-#include "muscle.h"
-#include <stdio.h>
-#include <new>
-
-const int ONE_MB = 1024*1024;
-const size_t RESERVE_BYTES = 8*ONE_MB;
-static void *EmergencyReserve = 0;
-
-void OnOutOfMemory()
- {
- free(EmergencyReserve);
- fprintf(stderr, "\n*** OUT OF MEMORY ***\n");
- fprintf(stderr, "Memory allocated so far %g MB\n", GetMemUseMB());
- SaveCurrentAlignment();
- exit(EXIT_FatalError);
- }
-
-void SetNewHandler()
- {
- EmergencyReserve = malloc(RESERVE_BYTES);
- std::set_new_handler(OnOutOfMemory);
- }
Deleted: trunk/packages/muscle/trunk/spfast.cpp
===================================================================
--- trunk/packages/muscle/trunk/spfast.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/spfast.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,269 +0,0 @@
-#include "muscle.h"
-#include "profile.h"
-
-#define TRACE 0
-
-enum
- {
- LL = 0,
- LG = 1,
- GL = 2,
- GG = 3,
- };
-
-static char *GapTypeToStr(int GapType)
- {
- switch (GapType)
- {
- case LL: return "LL";
- case LG: return "LG";
- case GL: return "GL";
- case GG: return "GG";
- }
- Quit("Invalid gap type");
- return "?";
- }
-
-static SCORE GapScoreMatrix[4][4];
-
-static void InitGapScoreMatrix()
- {
- const SCORE t = (SCORE) 0.2;
-
- GapScoreMatrix[LL][LL] = 0;
- GapScoreMatrix[LL][LG] = g_scoreGapOpen;
- GapScoreMatrix[LL][GL] = 0;
- GapScoreMatrix[LL][GG] = 0;
-
- GapScoreMatrix[LG][LL] = g_scoreGapOpen;
- GapScoreMatrix[LG][LG] = 0;
- GapScoreMatrix[LG][GL] = g_scoreGapOpen;
- GapScoreMatrix[LG][GG] = t*g_scoreGapOpen; // approximation!
-
- GapScoreMatrix[GL][LL] = 0;
- GapScoreMatrix[GL][LG] = g_scoreGapOpen;
- GapScoreMatrix[GL][GL] = 0;
- GapScoreMatrix[GL][GG] = 0;
-
- GapScoreMatrix[GG][LL] = 0;
- GapScoreMatrix[GG][LG] = t*g_scoreGapOpen; // approximation!
- GapScoreMatrix[GG][GL] = 0;
- GapScoreMatrix[GG][GG] = 0;
-
- for (int i = 0; i < 4; ++i)
- for (int j = 0; j < i; ++j)
- if (GapScoreMatrix[i][j] != GapScoreMatrix[j][i])
- Quit("GapScoreMatrix not symmetrical");
- }
-
-static SCORE SPColBrute(const MSA &msa, unsigned uColIndex)
- {
- SCORE Sum = 0;
- const unsigned uSeqCount = msa.GetSeqCount();
- for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
- {
- const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
- unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex);
- if (uLetter1 >= 20)
- continue;
- for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)
- {
- const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
- unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex);
- if (uLetter2 >= 20)
- continue;
- SCORE t = w1*w2*(*g_ptrScoreMatrix)[uLetter1][uLetter2];
-#if TRACE
- Log("Check %c %c w1=%.3g w2=%.3g Mx=%.3g t=%.3g\n",
- LetterToCharAmino(uLetter1),
- LetterToCharAmino(uLetter2),
- w1,
- w2,
- (*g_ptrScoreMatrix)[uLetter1][uLetter2],
- t);
-#endif
- Sum += t;
- }
- }
- return Sum;
- }
-
-static SCORE SPGapFreqs(const FCOUNT Freqs[])
- {
-#if TRACE
- Log("Freqs=");
- for (unsigned i = 0; i < 4; ++i)
- if (Freqs[i] != 0)
- Log(" %s=%.3g", GapTypeToStr(i), Freqs[i]);
- Log("\n");
-#endif
-
- SCORE TotalOffDiag = 0;
- SCORE TotalDiag = 0;
- for (unsigned i = 0; i < 4; ++i)
- {
- const FCOUNT fi = Freqs[i];
- if (0 == fi)
- continue;
- const float *Row = GapScoreMatrix[i];
- SCORE diagt = fi*fi*Row[i];
- TotalDiag += diagt;
-#if TRACE
- Log("SPFGaps %s %s + Mx=%.3g TotalDiag += %.3g\n",
- GapTypeToStr(i),
- GapTypeToStr(i),
- Row[i],
- diagt);
-#endif
- SCORE Sum = 0;
- for (unsigned j = 0; j < i; ++j)
- {
- SCORE t = Freqs[j]*Row[j];
-#if TRACE
- if (Freqs[j] != 0)
- Log("SPFGaps %s %s + Mx=%.3g Sum += %.3g\n",
- GapTypeToStr(i),
- GapTypeToStr(j),
- Row[j],
- fi*t);
-#endif
- Sum += t;
- }
- TotalOffDiag += fi*Sum;
- }
-#if TRACE
- Log("SPFGap TotalOffDiag=%.3g + TotalDiag=%.3g = %.3g\n",
- TotalOffDiag, TotalDiag, TotalOffDiag + TotalDiag);
-#endif
- return TotalOffDiag*2 + TotalDiag;
- }
-
-static SCORE SPFreqs(const FCOUNT Freqs[])
- {
-#if TRACE
- Log("Freqs=");
- for (unsigned i = 0; i < 20; ++i)
- if (Freqs[i] != 0)
- Log(" %c=%.3g", LetterToCharAmino(i), Freqs[i]);
- Log("\n");
-#endif
-
- SCORE TotalOffDiag = 0;
- SCORE TotalDiag = 0;
- for (unsigned i = 0; i < 20; ++i)
- {
- const FCOUNT fi = Freqs[i];
- if (0 == fi)
- continue;
- const float *Row = (*g_ptrScoreMatrix)[i];
- SCORE diagt = fi*fi*Row[i];
- TotalDiag += diagt;
-#if TRACE
- Log("SPF %c %c + Mx=%.3g TotalDiag += %.3g\n",
- LetterToCharAmino(i),
- LetterToCharAmino(i),
- Row[i],
- diagt);
-#endif
- SCORE Sum = 0;
- for (unsigned j = 0; j < i; ++j)
- {
- SCORE t = Freqs[j]*Row[j];
-#if TRACE
- if (Freqs[j] != 0)
- Log("SPF %c %c + Mx=%.3g Sum += %.3g\n",
- LetterToCharAmino(i),
- LetterToCharAmino(j),
- Row[j],
- fi*t);
-#endif
- Sum += t;
- }
- TotalOffDiag += fi*Sum;
- }
-#if TRACE
- Log("SPF TotalOffDiag=%.3g + TotalDiag=%.3g = %.3g\n",
- TotalOffDiag, TotalDiag, TotalOffDiag + TotalDiag);
-#endif
- return TotalOffDiag*2 + TotalDiag;
- }
-
-static SCORE ObjScoreSPCol(const MSA &msa, unsigned uColIndex)
- {
- FCOUNT Freqs[20];
- FCOUNT GapFreqs[4];
-
- memset(Freqs, 0, sizeof(Freqs));
- memset(GapFreqs, 0, sizeof(GapFreqs));
-
- const unsigned uSeqCount = msa.GetSeqCount();
-#if TRACE
- Log("Weights=");
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- Log(" %u=%.3g", uSeqIndex, msa.GetSeqWeight(uSeqIndex));
- Log("\n");
-#endif
- SCORE SelfOverCount = 0;
- SCORE GapSelfOverCount = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- WEIGHT w = msa.GetSeqWeight(uSeqIndex);
-
- bool bGapThisCol = msa.IsGap(uSeqIndex, uColIndex);
- bool bGapPrevCol = (uColIndex == 0 ? false : msa.IsGap(uSeqIndex, uColIndex - 1));
- int GapType = bGapThisCol + 2*bGapPrevCol;
- assert(GapType >= 0 && GapType < 4);
- GapFreqs[GapType] += w;
- SCORE gapt = w*w*GapScoreMatrix[GapType][GapType];
- GapSelfOverCount += gapt;
-
- if (bGapThisCol)
- continue;
- unsigned uLetter = msa.GetLetterEx(uSeqIndex, uColIndex);
- if (uLetter >= 20)
- continue;
- Freqs[uLetter] += w;
- SCORE t = w*w*(*g_ptrScoreMatrix)[uLetter][uLetter];
-#if TRACE
- Log("FastCol compute freqs & SelfOverCount %c w=%.3g M=%.3g SelfOverCount += %.3g\n",
- LetterToCharAmino(uLetter), w, (*g_ptrScoreMatrix)[uLetter][uLetter], t);
-#endif
- SelfOverCount += t;
- }
- SCORE SPF = SPFreqs(Freqs);
- SCORE Col = SPF - SelfOverCount;
-
- SCORE SPFGaps = SPGapFreqs(GapFreqs);
- SCORE ColGaps = SPFGaps - GapSelfOverCount;
-#if TRACE
- Log("SPF=%.3g - SelfOverCount=%.3g = %.3g\n", SPF, SelfOverCount, Col);
- Log("SPFGaps=%.3g - GapsSelfOverCount=%.3g = %.3g\n", SPFGaps, GapSelfOverCount, ColGaps);
-#endif
- return Col + ColGaps;
- }
-
-SCORE ObjScoreSPDimer(const MSA &msa)
- {
- static bool bGapScoreMatrixInit = false;
- if (!bGapScoreMatrixInit)
- InitGapScoreMatrix();
-
- SCORE Total = 0;
- const unsigned uSeqCount = msa.GetSeqCount();
- const unsigned uColCount = msa.GetColCount();
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- SCORE Col = ObjScoreSPCol(msa, uColIndex);
-#if TRACE
- {
- SCORE ColCheck = SPColBrute(msa, uColIndex);
- Log("FastCol=%.3g CheckCol=%.3g\n", Col, ColCheck);
- }
-#endif
- Total += Col;
- }
-#if TRACE
- Log("Total/2 = %.3g (final result from fast)\n", Total/2);
-#endif
- return Total/2;
- }
Deleted: trunk/packages/muscle/trunk/sptest.cpp
===================================================================
--- trunk/packages/muscle/trunk/sptest.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/sptest.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,176 +0,0 @@
-#include "muscle.h"
-#include "objscore.h"
-#include "msa.h"
-#include "textfile.h"
-#include "pwpath.h"
-
-const unsigned INDELS = 1;
-
-static void GetPos(const char Str[], unsigned L, int *pi1, int *pi2)
- {
- int i1;
- for (;;)
- {
- i1 = rand()%(L-2) + 1;
- if (Str[i1] == 'M')
- break;
- }
- int i2;
- for (;;)
- {
- i2 = rand()%(L-2) + 1;
- if (i1 != i2 && Str[i2] == 'M')
- break;
- }
- *pi1 = i1;
- *pi2 = i2;
- }
-
-static void MakePath(unsigned uSeqLength, unsigned uIndelCount, char Str[])
- {
- unsigned uPathLength = uSeqLength + uIndelCount;
- for (unsigned i = 0; i < uPathLength; ++i)
- Str[i] = 'M';
-
- for (unsigned i = 0; i < uIndelCount; ++i)
- {
- int i1, i2;
- GetPos(Str, uPathLength, &i1, &i2);
- Str[i1] = 'D';
- Str[i2] = 'I';
- }
-
- Str[uPathLength] = 0;
- Log("MakePath=%s\n", Str);
- }
-
-void SPTest()
- {
- SetPPScore(PPSCORE_SV);
-
- SetListFileName("c:\\tmp\\muscle.log", false);
-
- TextFile file1("c:\\tmp\\msa1.afa");
- TextFile file2("c:\\tmp\\msa2.afa");
-
- MSA msa1;
- MSA msa2;
-
- msa1.FromFile(file1);
- msa2.FromFile(file2);
-
- Log("msa1=\n");
- msa1.LogMe();
- Log("msa2=\n");
- msa2.LogMe();
-
- const unsigned uColCount = msa1.GetColCount();
- if (msa2.GetColCount() != uColCount)
- Quit("Different lengths");
-
- const unsigned uSeqCount1 = msa1.GetSeqCount();
- const unsigned uSeqCount2 = msa2.GetSeqCount();
- const unsigned uSeqCount = uSeqCount1 + uSeqCount2;
-
- MSA::SetIdCount(uSeqCount);
-
- for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
- {
- msa1.SetSeqWeight(uSeqIndex1, 1.0);
- msa1.SetSeqId(uSeqIndex1, uSeqIndex1);
- }
-
- for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
- {
- msa2.SetSeqWeight(uSeqIndex2, 1.0);
- msa2.SetSeqId(uSeqIndex2, uSeqCount1 + uSeqIndex2);
- }
-
- MSA alnA;
- MSA alnB;
-
- char strPathA[1024];
- char strPathB[1024];
- MakePath(uColCount, INDELS, strPathA);
- MakePath(uColCount, INDELS, strPathB);
-
- PWPath PathA;
- PWPath PathB;
- PathA.FromStr(strPathA);
- PathB.FromStr(strPathB);
-
- Log("PathA=\n");
- PathA.LogMe();
- Log("PathB=\n");
- PathB.LogMe();
-
- AlignTwoMSAsGivenPath(PathA, msa1, msa2, alnA);
- AlignTwoMSAsGivenPath(PathB, msa1, msa2, alnB);
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- alnA.SetSeqWeight(uSeqIndex, 1.0);
- alnB.SetSeqWeight(uSeqIndex, 1.0);
- }
-
- unsigned Seqs1[1024];
- unsigned Seqs2[1024];
-
- for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
- Seqs1[uSeqIndex1] = uSeqIndex1;
-
- for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
- Seqs2[uSeqIndex2] = uSeqCount1 + uSeqIndex2;
-
- MSA msaA1;
- MSA msaA2;
- MSA msaB1;
- MSA msaB2;
- MSAFromSeqSubset(alnA, Seqs1, uSeqCount1, msaA1);
- MSAFromSeqSubset(alnB, Seqs1, uSeqCount1, msaB1);
- MSAFromSeqSubset(alnA, Seqs2, uSeqCount2, msaA2);
- MSAFromSeqSubset(alnB, Seqs2, uSeqCount2, msaB2);
-
- for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
- {
- msaA1.SetSeqWeight(uSeqIndex1, 1.0);
- msaB1.SetSeqWeight(uSeqIndex1, 1.0);
- }
-
- for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
- {
- msaA2.SetSeqWeight(uSeqIndex2, 1.0);
- msaB2.SetSeqWeight(uSeqIndex2, 1.0);
- }
-
- Log("msaA1=\n");
- msaA1.LogMe();
-
- Log("msaB1=\n");
- msaB1.LogMe();
-
- Log("msaA2=\n");
- msaA2.LogMe();
-
- Log("msaB2=\n");
- msaB2.LogMe();
-
- Log("alnA=\n");
- alnA.LogMe();
-
- Log("AlnB=\n");
- alnB.LogMe();
-
- Log("\nSPA\n---\n");
- SCORE SPA = ObjScoreSP(alnA);
- Log("\nSPB\n---\n");
- SCORE SPB = ObjScoreSP(alnB);
-
- Log("\nXPA\n---\n");
- SCORE XPA = ObjScoreXP(msaA1, msaA2);
- Log("\nXPB\n---\n");
- SCORE XPB = ObjScoreXP(msaB1, msaB2);
-
- Log("SPA=%.4g SPB=%.4g Diff=%.4g\n", SPA, SPB, SPA - SPB);
- Log("XPA=%.4g XPB=%.4g Diff=%.4g\n", XPA, XPB, XPA - XPB);
- }
Deleted: trunk/packages/muscle/trunk/stabilize.cpp
===================================================================
--- trunk/packages/muscle/trunk/stabilize.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/stabilize.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,20 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-
-void Stabilize(const MSA &msa, MSA &msaStable)
- {
- const unsigned uSeqCount = msa.GetSeqCount();
- const unsigned uColCount = msa.GetColCount();
-
- msaStable.SetSize(uSeqCount, uColCount);
- for (unsigned uId = 0; uId < uSeqCount; ++uId)
- {
- const unsigned uSeqIndex = msa.GetSeqIndex(uId);
- msaStable.SetSeqName(uId, msa.GetSeqName(uSeqIndex));
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
- {
- const char c = msa.GetChar(uSeqIndex, uColIndex);
- msaStable.SetChar(uId, uColIndex, c);
- }
- }
- }
Deleted: trunk/packages/muscle/trunk/subfam.cpp
===================================================================
--- trunk/packages/muscle/trunk/subfam.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/subfam.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,384 +0,0 @@
-#include "muscle.h"
-#include "tree.h"
-#include "textfile.h" // for test only
-#include "msa.h"
-#include "seqvect.h"
-#include "profile.h"
-#ifndef _MSC_VER
-#include <unistd.h> // for unlink
-#endif
-
-#define TRACE 0
-
-/***
-Find subfamilies from tree by following criteria:
-
-(a) number of leaves <= max,
-(b) is monophyletic, i.e. most recent common ancestor is parent
-of no more than one subfamily.
-***/
-
-static unsigned SubFamRecurse(const Tree &tree, unsigned uNodeIndex, unsigned uMaxLeafCount,
- unsigned SubFams[], unsigned &uSubFamCount)
- {
- if (tree.IsLeaf(uNodeIndex))
- return 1;
-
- unsigned uLeft = tree.GetLeft(uNodeIndex);
- unsigned uRight = tree.GetRight(uNodeIndex);
- unsigned uLeftCount = SubFamRecurse(tree, uLeft, uMaxLeafCount, SubFams, uSubFamCount);
- unsigned uRightCount = SubFamRecurse(tree, uRight, uMaxLeafCount, SubFams, uSubFamCount);
-
- unsigned uLeafCount = uLeftCount + uRightCount;
- if (uLeftCount + uRightCount > uMaxLeafCount)
- {
- if (uLeftCount <= uMaxLeafCount)
- SubFams[uSubFamCount++] = uLeft;
- if (uRightCount <= uMaxLeafCount)
- SubFams[uSubFamCount++] = uRight;
- }
- else if (tree.IsRoot(uNodeIndex))
- {
- if (uSubFamCount != 0)
- Quit("Error in SubFamRecurse");
- SubFams[uSubFamCount++] = uNodeIndex;
- }
-
- return uLeafCount;
- }
-
-void SubFam(const Tree &tree, unsigned uMaxLeafCount, unsigned SubFams[], unsigned *ptruSubFamCount)
- {
- *ptruSubFamCount = 0;
- SubFamRecurse(tree, tree.GetRootNodeIndex(), uMaxLeafCount, SubFams, *ptruSubFamCount);
-
-#if TRACE
- {
- Log("\n");
- Log("Tree:\n");
- tree.LogMe();
- //void DrawTree(const Tree &tree);
- //DrawTree(tree);
- Log("\n");
- Log("%d subfams:\n", *ptruSubFamCount);
- for (unsigned i = 0; i < *ptruSubFamCount; ++i)
- Log(" %d=%d", i, SubFams[i]);
- Log("\n");
- }
-#endif
- }
-
-//unsigned SubFams[9999];
-//unsigned uSubFamCount;
-//
-//static unsigned DistFromRoot(const Tree &tree, unsigned uNodeIndex)
-// {
-// const unsigned uRoot = tree.GetRootNodeIndex();
-// unsigned uDist = 0;
-// while (uNodeIndex != uRoot)
-// {
-// ++uDist;
-// uNodeIndex = tree.GetParent(uNodeIndex);
-// }
-// return uDist;
-// }
-//
-//static void DrawNode(const Tree &tree, unsigned uNodeIndex)
-// {
-// if (!tree.IsLeaf(uNodeIndex))
-// DrawNode(tree, tree.GetLeft(uNodeIndex));
-//
-// unsigned uDist = DistFromRoot(tree, uNodeIndex);
-// for (unsigned i = 0; i < 5*uDist; ++i)
-// Log(" ");
-// Log("%d", uNodeIndex);
-// for (unsigned i = 0; i < uSubFamCount; ++i)
-// if (uNodeIndex == SubFams[i])
-// {
-// Log("*");
-// break;
-// }
-// Log("\n");
-//
-// if (!tree.IsLeaf(uNodeIndex))
-// DrawNode(tree, tree.GetRight(uNodeIndex));
-// }
-//
-//static void DrawTree(const Tree &tree)
-// {
-// unsigned uRoot = tree.GetRootNodeIndex();
-// DrawNode(tree, uRoot);
-// }
-//
-//void TestSubFams(const char *FileName)
-// {
-// Tree tree;
-// TextFile f(FileName);
-// tree.FromFile(f);
-// SubFam(tree, 5, SubFams, &uSubFamCount);
-// DrawTree(tree);
-// }
-
-static void SetInFam(const Tree &tree, unsigned uNodeIndex, bool NodeInSubFam[])
- {
- if (tree.IsLeaf(uNodeIndex))
- return;
- unsigned uLeft = tree.GetLeft(uNodeIndex);
- unsigned uRight = tree.GetRight(uNodeIndex);
- NodeInSubFam[uLeft] = true;
- NodeInSubFam[uRight] = true;
-
- SetInFam(tree, uLeft, NodeInSubFam);
- SetInFam(tree, uRight, NodeInSubFam);
- }
-
-void AlignSubFam(SeqVect &vAll, const Tree &GuideTree, unsigned uNodeIndex,
- MSA &msaOut)
- {
- const unsigned uSeqCount = vAll.GetSeqCount();
-
- const char *InTmp = "asf_in.tmp";
- const char *OutTmp = "asf_out.tmp";
-
- unsigned *Leaves = new unsigned[uSeqCount];
- unsigned uLeafCount;
- GetLeaves(GuideTree, uNodeIndex, Leaves, &uLeafCount);
-
- SeqVect v;
- for (unsigned i = 0; i < uLeafCount; ++i)
- {
- unsigned uLeafNodeIndex = Leaves[i];
- unsigned uId = GuideTree.GetLeafId(uLeafNodeIndex);
- Seq &s = vAll.GetSeqById(uId);
- v.AppendSeq(s);
- }
-
-#if TRACE
- {
- Log("Align subfam[node=%d, size=%d] ", uNodeIndex, uLeafCount);
- for (unsigned i = 0; i < uLeafCount; ++i)
- Log(" %s", v.GetSeqName(i));
- Log("\n");
- }
-#endif
-
- TextFile fIn(InTmp, true);
-
- v.ToFASTAFile(fIn);
- fIn.Close();
-
- char CmdLine[4096];
- sprintf(CmdLine, "probcons %s > %s 2> /dev/null", InTmp, OutTmp);
-// sprintf(CmdLine, "muscle -in %s -out %s -maxiters 1", InTmp, OutTmp);
- system(CmdLine);
-
- TextFile fOut(OutTmp);
- msaOut.FromFile(fOut);
-
- for (unsigned uSeqIndex = 0; uSeqIndex < uLeafCount; ++uSeqIndex)
- {
- const char *Name = msaOut.GetSeqName(uSeqIndex);
- unsigned uId = vAll.GetSeqIdFromName(Name);
- msaOut.SetSeqId(uSeqIndex, uId);
- }
-
- unlink(InTmp);
- unlink(OutTmp);
-
- delete[] Leaves;
- }
-
-void ProgAlignSubFams()
- {
- MSA msaOut;
-
- SetOutputFileName(g_pstrOutFileName);
- SetInputFileName(g_pstrInFileName);
-
- SetMaxIters(g_uMaxIters);
- SetSeqWeightMethod(g_SeqWeight1);
-
- TextFile fileIn(g_pstrInFileName);
- SeqVect v;
- v.FromFASTAFile(fileIn);
- const unsigned uSeqCount = v.Length();
-
- if (0 == uSeqCount)
- Quit("No sequences in input file");
-
- ALPHA Alpha = ALPHA_Undefined;
- switch (g_SeqType)
- {
- case SEQTYPE_Auto:
- Alpha = v.GuessAlpha();
- break;
-
- case SEQTYPE_Protein:
- Alpha = ALPHA_Amino;
- break;
-
- case SEQTYPE_DNA:
- Alpha = ALPHA_DNA;
- break;
-
- case SEQTYPE_RNA:
- Alpha = ALPHA_RNA;
- break;
-
- default:
- Quit("Invalid seq type");
- }
- SetAlpha(Alpha);
- v.FixAlpha();
-
- if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
- {
- SetPPScore(PPSCORE_SPN);
- g_Distance1 = DISTANCE_Kmer4_6;
- }
-
- unsigned uMaxL = 0;
- unsigned uTotL = 0;
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- unsigned L = v.GetSeq(uSeqIndex).Length();
- uTotL += L;
- if (L > uMaxL)
- uMaxL = L;
- }
-
- SetIter(1);
- g_bDiags = g_bDiags1;
- SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);
-
- MSA::SetIdCount(uSeqCount);
-
-// Initialize sequence ids.
-// From this point on, ids must somehow propogate from here.
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- v.SetSeqId(uSeqIndex, uSeqIndex);
-
- if (uSeqCount > 1)
- MHackStart(v);
-
- if (0 == uSeqCount)
- {
- msaOut.Clear();
- return;
- }
-
- if (1 == uSeqCount && ALPHA_Amino == Alpha)
- {
- const Seq &s = v.GetSeq(0);
- msaOut.FromSeq(s);
- return;
- }
-
- Tree GuideTree;
- TreeFromSeqVect(v, GuideTree, g_Cluster1, g_Distance1, g_Root1);
- SetMuscleTree(GuideTree);
-
- MSA msa;
- if (g_bLow)
- {
- ProgNode *ProgNodes = 0;
- ProgNodes = ProgressiveAlignE(v, GuideTree, msa);
- delete[] ProgNodes;
- }
- else
- ProgressiveAlign(v, GuideTree, msa);
- SetCurrentAlignment(msa);
- TreeFromMSA(msa, GuideTree, g_Cluster2, g_Distance2, g_Root2);
- SetMuscleTree(GuideTree);
-
- unsigned *SubFams = new unsigned[uSeqCount];
- unsigned uSubFamCount;
- SubFam(GuideTree, g_uMaxSubFamCount, SubFams, &uSubFamCount);
-
- SetProgressDesc("Align node");
- const unsigned uNodeCount = 2*uSeqCount - 1;
-
- ProgNode *ProgNodes = new ProgNode[uNodeCount];
- bool *NodeIsSubFam = new bool[uNodeCount];
- bool *NodeInSubFam = new bool[uNodeCount];
-
- for (unsigned i = 0; i < uNodeCount; ++i)
- {
- NodeIsSubFam[i] = false;
- NodeInSubFam[i] = false;
- }
-
- for (unsigned i = 0; i < uSubFamCount; ++i)
- {
- unsigned uNodeIndex = SubFams[i];
- assert(uNodeIndex < uNodeCount);
- NodeIsSubFam[uNodeIndex] = true;
- SetInFam(GuideTree, uNodeIndex, NodeInSubFam);
- }
-
- unsigned uJoin = 0;
- unsigned uTreeNodeIndex = GuideTree.FirstDepthFirstNode();
- do
- {
- if (NodeIsSubFam[uTreeNodeIndex])
- {
-#if TRACE
- Log("Node %d: align subfam\n", uTreeNodeIndex);
-#endif
- ProgNode &Node = ProgNodes[uTreeNodeIndex];
- AlignSubFam(v, GuideTree, uTreeNodeIndex, Node.m_MSA);
- Node.m_uLength = Node.m_MSA.GetColCount();
- }
- else if (!NodeInSubFam[uTreeNodeIndex])
- {
-#if TRACE
- Log("Node %d: align two subfams\n", uTreeNodeIndex);
-#endif
- Progress(uJoin, uSubFamCount - 1);
- ++uJoin;
-
- const unsigned uMergeNodeIndex = uTreeNodeIndex;
- ProgNode &Parent = ProgNodes[uMergeNodeIndex];
-
- const unsigned uLeft = GuideTree.GetLeft(uTreeNodeIndex);
- const unsigned uRight = GuideTree.GetRight(uTreeNodeIndex);
-
- ProgNode &Node1 = ProgNodes[uLeft];
- ProgNode &Node2 = ProgNodes[uRight];
-
- PWPath Path;
- AlignTwoMSAs(Node1.m_MSA, Node2.m_MSA, Parent.m_MSA, Path);
- Parent.m_uLength = Parent.m_MSA.GetColCount();
-
- Node1.m_MSA.Clear();
- Node2.m_MSA.Clear();
- }
- else
- {
-#if TRACE
- Log("Node %d: in subfam\n", uTreeNodeIndex);
-#endif
- ;
- }
- uTreeNodeIndex = GuideTree.NextDepthFirstNode(uTreeNodeIndex);
- }
- while (NULL_NEIGHBOR != uTreeNodeIndex);
- ProgressStepsDone();
-
- unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
- ProgNode &RootProgNode = ProgNodes[uRootNodeIndex];
-
- TextFile fOut(g_pstrOutFileName, true);
- MHackEnd(RootProgNode.m_MSA);
- RootProgNode.m_MSA.ToFile(fOut);
-
- delete[] NodeInSubFam;
- delete[] NodeIsSubFam;
- delete[] ProgNodes;
- delete[] SubFams;
-
- ProgNodes = 0;
- NodeInSubFam = 0;
- NodeIsSubFam = 0;
- SubFams = 0;
- }
Deleted: trunk/packages/muscle/trunk/subfams.cpp
===================================================================
--- trunk/packages/muscle/trunk/subfams.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/subfams.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,65 +0,0 @@
-#include "muscle.h"
-#include "distfunc.h"
-
-const float INFINITY = float(1e29);
-const unsigned NILL = uInsane;
-
-static float *ShortestPathEstimate;
-static unsigned *Predecessor;
-
-static void GetMostDistantPair(DistFunc &DF, unsigned *ptrIndex1, unsigned *ptrIndex2)
- {
- const unsigned uNodeCount = DF.GetCount();
- if (uNodeCount < 2)
- Quit("GetMostDistantPair: < 2 seqs");
-
- float MaxDist = -1;
- unsigned Index1 = uInsane;
- unsigned Index2 = uInsane;
- for (unsigned i = 0; i < uNodeCount; ++i)
- {
- for (unsigned j = i + 1; j < uNodeCount; ++j)
- {
- float d = DF.GetDist(i, j);
- if (d > MaxDist)
- {
- MaxDist = d;
- Index1 = i;
- Index2 = j;
- }
- }
- }
-
- assert(Index1 != uInsane);
- assert(Index2 != uInsane);
-
- *ptrIndex1 = Index1;
- *ptrIndex2 = Index2;
- }
-
-static void InitializeSingleSource(DistFunc &DF, unsigned uIndex)
- {
- const unsigned uNodeCount = 0;
-
- for (unsigned i = 0; i < uNodeCount; ++i)
- {
- ShortestPathEstimate[i] = INFINITY;
- Predecessor[i] = NILL;
- }
- ShortestPathEstimate[uIndex] = 0;
- }
-
-static void Relax(DistFunc &DF, unsigned u, unsigned v)
- {
- float w = DF.GetDist(u, v);
- float d = ShortestPathEstimate[u] + w;
- if (ShortestPathEstimate[v] > d)
- {
- ShortestPathEstimate[v] = d;
- Predecessor[v] = u;
- }
- }
-
-void ShortestPath(DistFunc &DF, unsigned uIndex)
- {
- }
Deleted: trunk/packages/muscle/trunk/sw.cpp
===================================================================
--- trunk/packages/muscle/trunk/sw.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/sw.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,206 +0,0 @@
-#include "muscle.h"
-#include <math.h>
-#include "pwpath.h"
-#include "profile.h"
-#include <stdio.h>
-
-// Textbook Smith-Waterman affine gap implementation.
-
-#define TRACE 0
-
-static const char *LocalScoreToStr(SCORE s)
- {
- static char str[16];
- if (MINUS_INFINITY == s)
- return " *";
- sprintf(str, "%6.2f", s);
- return str;
- }
-
-static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
- unsigned uPrefixCountA, unsigned uPrefixCountB)
- {
- Log(" ");
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- char c = ' ';
- if (uPrefixLengthB > 0)
- c = ConsensusChar(PB[uPrefixLengthB - 1]);
- Log(" %4u:%c", uPrefixLengthB, c);
- }
- Log("\n");
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- char c = ' ';
- if (uPrefixLengthA > 0)
- c = ConsensusChar(PA[uPrefixLengthA - 1]);
- Log("%4u:%c ", uPrefixLengthA, c);
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));
- Log("\n");
- }
- }
-
-SCORE SW(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, PWPath &Path)
- {
- assert(uLengthB > 0 && uLengthA > 0);
-
- const unsigned uPrefixCountA = uLengthA + 1;
- const unsigned uPrefixCountB = uLengthB + 1;
-
-// Allocate DP matrices
- const size_t LM = uPrefixCountA*uPrefixCountB;
- SCORE *DPM_ = new SCORE[LM];
- SCORE *DPD_ = new SCORE[LM];
- SCORE *DPI_ = new SCORE[LM];
-
- DPM(0, 0) = 0;
- DPD(0, 0) = MINUS_INFINITY;
- DPI(0, 0) = MINUS_INFINITY;
-
- DPM(1, 0) = MINUS_INFINITY;
- DPD(1, 0) = MINUS_INFINITY;
- DPI(1, 0) = MINUS_INFINITY;
-
- DPM(0, 1) = MINUS_INFINITY;
- DPD(0, 1) = MINUS_INFINITY;
- DPI(0, 1) = MINUS_INFINITY;
-
-// Empty prefix of B is special case
- for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- // M=LetterA+LetterB, impossible with empty prefix
- DPM(uPrefixLengthA, 0) = MINUS_INFINITY;
-
- // D=LetterA+GapB, never optimal in local alignment with gap penalties
- DPD(uPrefixLengthA, 0) = MINUS_INFINITY;
-
- // I=GapA+LetterB, impossible with empty prefix
- DPI(uPrefixLengthA, 0) = MINUS_INFINITY;
- }
-
-// Empty prefix of A is special case
- for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- // M=LetterA+LetterB, impossible with empty prefix
- DPM(0, uPrefixLengthB) = MINUS_INFINITY;
-
- // D=LetterA+GapB, impossible with empty prefix
- DPD(0, uPrefixLengthB) = MINUS_INFINITY;
-
- // I=GapA+LetterB, never optimal in local alignment with gap penalties
- DPI(0, uPrefixLengthB) = MINUS_INFINITY;
- }
-
- SCORE scoreMax = MINUS_INFINITY;
- unsigned uPrefixLengthAMax = uInsane;
- unsigned uPrefixLengthBMax = uInsane;
-
-// ============
-// Main DP loop
-// ============
- SCORE scoreGapCloseB = MINUS_INFINITY;
- for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
- {
- const ProfPos &PPB = PB[uPrefixLengthB - 1];
-
- SCORE scoreGapCloseA = MINUS_INFINITY;
- for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
- {
- const ProfPos &PPA = PA[uPrefixLengthA - 1];
-
- {
- // Match M=LetterA+LetterB
- SCORE scoreLL = ScoreProfPos2(PPA, PPB);
-
- SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1);
- SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseA;
- SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseB;
-
- SCORE scoreBest;
- if (scoreMM >= scoreDM && scoreMM >= scoreIM)
- scoreBest = scoreMM;
- else if (scoreDM >= scoreMM && scoreDM >= scoreIM)
- scoreBest = scoreDM;
- else
- {
- assert(scoreIM >= scoreMM && scoreIM >= scoreDM);
- scoreBest = scoreIM;
- }
- if (scoreBest < 0)
- scoreBest = 0;
- scoreBest += scoreLL;
- if (scoreBest > scoreMax)
- {
- scoreMax = scoreBest;
- uPrefixLengthAMax = uPrefixLengthA;
- uPrefixLengthBMax = uPrefixLengthB;
- }
- DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest;
- }
-
- {
- // Delete D=LetterA+GapB
- SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) +
- PA[uPrefixLengthA-1].m_scoreGapOpen;
- SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB);
-
- SCORE scoreBest;
- if (scoreMD >= scoreDD)
- scoreBest = scoreMD;
- else
- {
- assert(scoreDD >= scoreMD);
- scoreBest = scoreDD;
- }
- DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;
- }
-
- // Insert I=GapA+LetterB
- {
- SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) +
- PB[uPrefixLengthB - 1].m_scoreGapOpen;
- SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1);
-
- SCORE scoreBest;
- if (scoreMI >= scoreII)
- scoreBest = scoreMI;
- else
- {
- assert(scoreII > scoreMI);
- scoreBest = scoreII;
- }
- DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;
- }
-
- scoreGapCloseA = PPA.m_scoreGapClose;
- }
- scoreGapCloseB = PPB.m_scoreGapClose;
- }
-
-#if TRACE
- Log("DPM:\n");
- ListDP(DPM_, PA, PB, uPrefixLengthA, uPrefixLengthB);
- Log("DPD:\n");
- ListDP(DPD_, PA, PB, uPrefixLengthA, uPrefixLengthB);
- Log("DPI:\n");
- ListDP(DPI_, PA, PB, uPrefixLengthA, uPrefixLengthB);
-#endif
-
- assert(scoreMax == DPM(uPrefixLengthAMax, uPrefixLengthBMax));
- TraceBackSW(PA, uLengthA, PB, uLengthB, DPM_, DPD_, DPI_,
- uPrefixLengthAMax, uPrefixLengthBMax, Path);
-
-#if TRACE
- SCORE scorePath = FastScorePath2(PA, uLengthA, PB, uLengthB, Path);
- Path.LogMe();
- Log("Score = %s Path = %s\n", LocalScoreToStr(scoreMax), LocalScoreToStr(scorePath));
-#endif
-
- delete[] DPM_;
- delete[] DPD_;
- delete[] DPI_;
-
- return scoreMax;
- }
Deleted: trunk/packages/muscle/trunk/termgaps.cpp
===================================================================
--- trunk/packages/muscle/trunk/termgaps.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/termgaps.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,36 +0,0 @@
-#include "muscle.h"
-#include "profile.h"
-
-void SetTermGaps(const ProfPos *Prof, unsigned uLength)
- {
- if (0 == uLength)
- return;
-
- ProfPos *First = (ProfPos *) Prof;
- ProfPos *Last = (ProfPos *) (Prof + uLength - 1);
-
- switch (g_TermGaps)
- {
- case TERMGAPS_Full:
- break;
-
- case TERMGAPS_Half:
- // -infinity check for lock left/right
- if (First->m_scoreGapOpen != MINUS_INFINITY)
- First->m_scoreGapOpen = 0;
-
- if (uLength > 1 && Last->m_scoreGapClose != MINUS_INFINITY)
- Last->m_scoreGapClose = 0;
-
- case TERMGAPS_Ext:
- if (First->m_scoreGapOpen != MINUS_INFINITY)
- First->m_scoreGapOpen *= -1;
-
- if (uLength > 1 && Last->m_scoreGapClose != MINUS_INFINITY)
- Last->m_scoreGapClose *= -1;
- break;
-
- default:
- Quit("Invalid g_TermGaps");
- }
- }
Deleted: trunk/packages/muscle/trunk/textfile.cpp
===================================================================
--- trunk/packages/muscle/trunk/textfile.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/textfile.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,345 +0,0 @@
-#include "muscle.h"
-#include "textfile.h"
-#include <errno.h>
-
-TextFile::TextFile(const char szFileName[], bool bWrite)
- {
- FILE *ptrFile = 0;
- if (bWrite)
- {
- if (0 == strcmp(szFileName, "-"))
- ptrFile = stdout;
- else
- ptrFile = fopen(szFileName, "wb");
- }
- else
- {
- if (0 == strcmp(szFileName, "-"))
- ptrFile = stdin;
- else
- ptrFile = fopen(szFileName, "rb");
- }
- if (0 == ptrFile)
- Quit("Cannot open '%s' errno=%d\n", szFileName, errno);
- Init(ptrFile, szFileName);
- }
-
-void TextFile::Init(FILE *ptrFile, const char *ptrFileName)
- {
- m_ptrFile = ptrFile;
- m_ptrName = strdup(ptrFileName);
- m_uLineNr = 1;
- m_uColNr = 0;
- m_bLastCharWasEOL = true;
- m_cPushedBack = -1;
-#if DEBUG
- setbuf(m_ptrFile, 0);
-#endif
- }
-
-TextFile::TextFile(FILE *ptrFile, const char *ptrFileName)
- {
- Init(ptrFile, "-");
- }
-
-TextFile::~TextFile()
- {
- if (m_ptrFile &&
- m_ptrFile != stdin && m_ptrFile != stdout && m_ptrFile != stderr)
- fclose(m_ptrFile);
- free(m_ptrName);
- }
-
-// Get line from file.
-// Return true if end-of-file, quit if line too long.
-bool TextFile::GetLine(char szLine[], unsigned uBytes)
- {
- if (0 == uBytes)
- Quit("TextFile::GetLine, buffer zero size");
-
- memset(szLine, 0, uBytes);
-
- unsigned uBytesCopied = 0;
-
-// Loop until end of line or end of file.
- for (;;)
- {
- char c;
- bool bEof = GetChar(c);
- if (bEof)
- return true;
- if ('\r' == c)
- continue;
- if ('\n' == c)
- return false;
- if (uBytesCopied < uBytes - 1)
- szLine[uBytesCopied++] = (char) c;
- else
- Quit("TextFile::GetLine: input buffer too small, line %u",
- m_uLineNr);
- }
- }
-
-// As GetLine, but trim leading and trailing blanks; skip empty lines
-bool TextFile::GetTrimLine(char szLine[], unsigned uBytes)
- {
- for (;;)
- {
- bool bEOF = GetLine(szLine, uBytes);
- if (bEOF)
- return true;
- TrimBlanks(szLine);
- if (0 != szLine[0])
- break;
- }
- return false;
- }
-
-void TextFile::Rewind()
- {
- fseek(m_ptrFile, 0, SEEK_SET);
- m_uLineNr = 1;
- m_bLastCharWasEOL = true;
- }
-
-void TextFile::PutChar(char c)
- {
- int i = fputc(c, m_ptrFile);
- assert(i == c);
- if ('\n' == c)
- {
- ++m_uLineNr;
- m_uColNr = 1;
- }
- else
- ++m_uColNr;
- }
-
-void TextFile::PutString(const char szLine[])
- {
- int iError = fputs(szLine, m_ptrFile);
- assert(iError >= 0);
- }
-
-void TextFile::PutFormat(const char szFormat[], ...)
- {
- char szStr[4096];
- va_list ArgList;
- va_start(ArgList, szFormat);
- vsprintf(szStr, szFormat, ArgList);
- PutString(szStr);
- }
-
-void TextFile::GetLineX(char szLine[], unsigned uBytes)
- {
- bool bEof = GetLine(szLine, uBytes);
- if (bEof)
- Quit("end-of-file in GetLineX");
- }
-
-bool TextFile::GetToken(char szToken[], unsigned uBytes, const char szCharTokens[])
- {
-// Skip leading white space
- char c;
- for (;;)
- {
- bool bEof = GetChar(c);
- if (bEof)
- return true;
- if (!isspace(c))
- break;
- }
-
-// Check for special case single-character tokens
- if (0 != strchr(szCharTokens, c))
- {
- assert(uBytes >= 2);
- szToken[0] = c;
- szToken[1] = 0;
- return false;
- }
-
-// Loop until token terminated by white space, EOF or special
- unsigned uBytesCopied = 0;
- for (;;)
- {
- if (uBytesCopied < uBytes - 1)
- szToken[uBytesCopied++] = c;
- else
- Quit("TextFile::GetToken: input buffer too small, line %u",
- m_uLineNr);
- bool bEof = GetChar(c);
- if (bEof)
- {
- szToken[uBytesCopied] = 0;
- return true;
- }
- // Check for special case single-character tokens
- if (0 != strchr(szCharTokens, c))
- {
- PushBack(c);
- assert(uBytesCopied > 0 && uBytesCopied < uBytes);
- szToken[uBytesCopied] = 0;
- return false;
- }
- if (isspace(c))
- {
- assert(uBytesCopied > 0 && uBytesCopied < uBytes);
- szToken[uBytesCopied] = 0;
- return false;
- }
- }
- }
-
-void TextFile::GetTokenX(char szToken[], unsigned uBytes, const char szCharTokens[])
- {
- bool bEof = GetToken(szToken, uBytes, szCharTokens);
- if (bEof)
- Quit("End-of-file in GetTokenX");
- }
-
-void TextFile::Skip()
- {
- for (;;)
- {
- char c;
- bool bEof = GetChar(c);
- if (bEof || '\n' == c)
- return;
- assert(isspace(c));
- }
- }
-
-#ifdef _WIN32
-
-TEXTFILEPOS TextFile::GetPos()
- {
- fpos_t p;
- int i = fgetpos(m_ptrFile, &p);
- assert(0 == i);
- assert(p >= 0);
- TEXTFILEPOS Pos;
- Pos.uOffset = (unsigned) p;
- Pos.uLineNr = m_uLineNr;
- Pos.uColNr = m_uColNr;
- return Pos;
- }
-
-void TextFile::SetPos(TEXTFILEPOS Pos)
- {
- fpos_t p = (fpos_t) Pos.uOffset;
- int i = fsetpos(m_ptrFile, &p);
- assert(0 == i);
- m_uLineNr = Pos.uLineNr;
- m_uColNr = Pos.uColNr;
- }
-
-#else
-
-TEXTFILEPOS TextFile::GetPos()
- {
- TEXTFILEPOS Pos;
- Pos.uOffset = ftell(m_ptrFile);
- Pos.uLineNr = m_uLineNr;
- Pos.uColNr = m_uColNr;
- return Pos;
- }
-
-void TextFile::SetPos(TEXTFILEPOS Pos)
- {
- fseek(m_ptrFile, Pos.uOffset, SEEK_SET);
- m_uLineNr = Pos.uLineNr;
- m_uColNr = Pos.uColNr;
- }
-
-#endif
-
-bool TextFile::GetChar(char &c)
- {
- if (-1 != m_cPushedBack)
- {
- c = (char) m_cPushedBack;
- m_cPushedBack = -1;
- return false;
- }
-
- int ic = fgetc(m_ptrFile);
- if (ic < 0)
- {
- if (feof(m_ptrFile))
- {
- // Hack to fix up a non-empty text file that is missing
- // and end-of-line character in the last line.
- if (!m_bLastCharWasEOL && m_uLineNr > 0)
- {
- c = '\n';
- m_bLastCharWasEOL = true;
- return false;
- }
- return true;
- }
- Quit("TextFile::GetChar, error %s", strerror(errno));
- }
- c = (char) ic;
- if ('\n' == c)
- {
- m_bLastCharWasEOL = true;
- ++m_uLineNr;
- m_uColNr = 1;
- }
- else
- {
- m_bLastCharWasEOL = false;
- ++m_uColNr;
- }
- return false;
- }
-
-void TextFile::GetCharX(char &c)
- {
- bool bEof = GetChar(c);
- if (bEof)
- Quit("End-of-file in GetCharX");
- }
-
-void TextFile::GetNonblankChar(char &c)
- {
- do
- {
- bool bEof = GetChar(c);
- if (bEof)
- Quit("End-of-file in GetCharX");
- }
- while (isspace(c));
- }
-
-void TextFile::SkipLine()
- {
- if (m_bLastCharWasEOL)
- return;
- for (;;)
- {
- char c;
- bool bEof = GetChar(c);
- if (bEof)
- Quit("End-of-file in SkipLine");
- if ('\n' == c)
- break;
- }
- }
-
-void TextFile::SkipWhite()
- {
- for (;;)
- {
- char c;
- bool bEof = GetChar(c);
- if (bEof)
- Quit("End-of-file in SkipWhite");
- if (!isspace(c))
- {
- PushBack(c);
- break;
- }
- }
- }
Deleted: trunk/packages/muscle/trunk/textfile.h
===================================================================
--- trunk/packages/muscle/trunk/textfile.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/textfile.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,69 +0,0 @@
-#ifndef TextFile_h
-#define TextFile_h
-
-#include <stdio.h>
-
-struct TEXTFILEPOS
- {
- unsigned uOffset;
- unsigned uLineNr;
- unsigned uColNr;
- };
-
-const unsigned TextFileBufferSize = 256;
-
-class TextFile
- {
-private:
-// no default c'tor, not implemented
- TextFile();
-
-public:
- virtual ~TextFile();
-
- TextFile(const char szFileName[], bool bWrite = false);
- TextFile(FILE *ptrFile, const char *ptrFileName = "-");
- void Close() { fclose(m_ptrFile); m_ptrFile = 0; }
-
- bool GetLine(char szLine[], unsigned uBytes);
- bool GetTrimLine(char szLine[], unsigned uBytes);
- void GetLineX(char szLine[], unsigned uBytes);
-
- bool GetToken(char szToken[], unsigned uBytes, const char szCharTokens[] = "{}");
- void GetTokenX(char szToken[], unsigned uBytes, const char szCharTokens[] = "{}");
-
- void Skip();
- void SkipLine();
- void SkipWhite();
- void Rewind();
- TEXTFILEPOS GetPos();
- void SetPos(TEXTFILEPOS Pos);
- bool GetChar(char &c);
- void GetCharX(char &c);
- void GetNonblankChar(char &c);
-
- unsigned GetLineNr() { return m_uLineNr; }
-
- void PutString(const char szLine[]);
- void PutFormat(const char szFormat[], ...);
- void PutChar(char c);
-
- const char *GetFileName() { return m_ptrName; }
-
- void PushBack(int c) { m_cPushedBack = c; }
-
- FILE *GetStdioFile() const { return m_ptrFile; }
-
-private:
- void Init(FILE *ptrFile, const char *ptrFileName);
-
-private:
- FILE *m_ptrFile;
- unsigned m_uLineNr;
- unsigned m_uColNr;
- char *m_ptrName;
- bool m_bLastCharWasEOL;
- int m_cPushedBack;
- };
-
-#endif // TextFile_h
Deleted: trunk/packages/muscle/trunk/threewaywt.cpp
===================================================================
--- trunk/packages/muscle/trunk/threewaywt.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/threewaywt.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,342 +0,0 @@
-#include "muscle.h"
-#include "tree.h"
-#include <math.h>
-
-#define TRACE 0
-
-/***
-Sequence weights derived from a tree using Gotoh's
-three-way method.
-
- Gotoh (1995) CABIOS 11(5), 543-51.
-
-Each edge e is assigned a weight w(e).
-
-Consider first a tree with three leaves A,B and C
-having branch lengths a, b and c, as follows.
-
- B
- |
- b
- |
- A---a---R---c---C
-
-The internal node is denoted by R.
-
-Define:
-
- S = (ab + ca + ab)
- x = bc(a + b)(a + c)
- y = a(b + c)FS
-
-Here F is a tunable normalization factor which is
-approximately 1.0. Then the edge weight for AR
-is computed as:
-
- w(AR) = sqrt(x/y)
-
-Similar expressions for the other edges follow by
-symmetry.
-
-For a tree with more than three edges, the weight
-of an edge that ends in a leaf is computed from
-the three-way tree that includes the edge and
-its two neighbors. The weight of an internal edge
-is computed as the product of the weights for that
-edge derived from the two three-way subtrees that
-include that edge.
-
-For example, consider the following tree.
-
- B
- |
- A--R--V--C
- |
- D
-
-Here, w(RV) is computed as the product of the
-two values for w(RV) derived from the three-way
-trees with leaves ABV and RCD respectively.
-
-The calculation is done using "Gotoh lengths",
-not the real edge lengths.
-
-The Gotoh length G of a directed edge is calculated
-recursively as:
-
- G = d + LR/(L + R)
-
-where d is the length of the edge, and L and R are
-the Gotoh lengths of the left and right edges adjoining
-the terminal end of the edge. If the edge terminates on
-a leaf, then G=d.
-
-Pairwise sequence weights are computed as the
-product of edge weights on the path that connects
-their leaves.
-
-If the tree is split into two subtrees by deleting
-a given edge e, then the pairwise weights factorize.
-For operations on profiles formed from the two
-subtrees, it is possible to assign a weight to a
-sequence as the product of edge weights on a path
-from e to its leaf.
-***/
-
-// The xxxUnrooted functions present a rooted tree as
-// if it had been unrooted by deleting the root node.
-static unsigned GetFirstNeighborUnrooted(const Tree &tree, unsigned uNode1,
- unsigned uNode2)
- {
- if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))
- Quit("GetFirstNeighborUnrooted, should never be called with root");
- if (!tree.IsEdge(uNode1, uNode2))
- {
- if (!tree.IsRoot(tree.GetParent(uNode1)) ||
- !tree.IsRoot(tree.GetParent(uNode2)))
- Quit("GetFirstNeighborUnrooted, not edge");
- const unsigned uRoot = tree.GetRootNodeIndex();
- return tree.GetFirstNeighbor(uNode1, uRoot);
- }
-
- unsigned uNeighbor = tree.GetFirstNeighbor(uNode1, uNode2);
- if (tree.IsRoot(uNeighbor))
- return tree.GetFirstNeighbor(uNeighbor, uNode1);
- return uNeighbor;
- }
-
-static unsigned GetSecondNeighborUnrooted(const Tree &tree, unsigned uNode1,
- unsigned uNode2)
- {
- if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))
- Quit("GetFirstNeighborUnrooted, should never be called with root");
- if (!tree.IsEdge(uNode1, uNode2))
- {
- if (!tree.IsRoot(tree.GetParent(uNode1)) ||
- !tree.IsRoot(tree.GetParent(uNode2)))
- Quit("GetFirstNeighborUnrooted, not edge");
- const unsigned uRoot = tree.GetRootNodeIndex();
- return tree.GetSecondNeighbor(uNode1, uRoot);
- }
-
- unsigned uNeighbor = tree.GetSecondNeighbor(uNode1, uNode2);
- if (tree.IsRoot(uNeighbor))
- return tree.GetFirstNeighbor(uNeighbor, uNode1);
- return uNeighbor;
- }
-
-static unsigned GetNeighborUnrooted(const Tree &tree, unsigned uNode1,
- unsigned uSub)
- {
- unsigned uNeighbor = tree.GetNeighbor(uNode1, uSub);
- if (tree.IsRoot(uNeighbor))
- return tree.GetFirstNeighbor(uNeighbor, uNode1);
- return uNeighbor;
- }
-
-static unsigned GetNeighborSubscriptUnrooted(const Tree &tree, unsigned uNode1,
- unsigned uNode2)
- {
- if (tree.IsEdge(uNode1, uNode2))
- return tree.GetNeighborSubscript(uNode1, uNode2);
- if (!tree.IsRoot(tree.GetParent(uNode1)) ||
- !tree.IsRoot(tree.GetParent(uNode2)))
- Quit("GetNeighborSubscriptUnrooted, not edge");
- for (unsigned uSub = 0; uSub < 3; ++uSub)
- if (GetNeighborUnrooted(tree, uNode1, uSub) == uNode2)
- return uSub;
- Quit("GetNeighborSubscriptUnrooted, not a neighbor");
- return NULL_NEIGHBOR;
- }
-
-static double GetEdgeLengthUnrooted(const Tree &tree, unsigned uNode1,
- unsigned uNode2)
- {
- if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))
- Quit("GetEdgeLengthUnrooted, should never be called with root");
- if (!tree.IsEdge(uNode1, uNode2))
- {
- if (!tree.IsRoot(tree.GetParent(uNode1)) ||
- !tree.IsRoot(tree.GetParent(uNode2)))
- Quit("GetEdgeLengthUnrooted, not edge");
-
- const unsigned uRoot = tree.GetRootNodeIndex();
- return tree.GetEdgeLength(uNode1, uRoot) +
- tree.GetEdgeLength(uNode2, uRoot);
- }
- return tree.GetEdgeLength(uNode1, uNode2);
- }
-
-double GetGotohLength(const Tree &tree, unsigned R, unsigned A)
- {
- double dThis = GetEdgeLengthUnrooted(tree, R, A);
-
-// Enforce non-negative edge lengths
- if (dThis < 0)
- dThis = 0;
-
- if (tree.IsLeaf(A))
- return dThis;
-
- const unsigned uFirst = GetFirstNeighborUnrooted(tree, A, R);
- const unsigned uSecond = GetSecondNeighborUnrooted(tree, A, R);
- const double dFirst = GetGotohLength(tree, A, uFirst);
- const double dSecond = GetGotohLength(tree, A, uSecond);
- const double dSum = dFirst + dSecond;
- const double dThird = dSum == 0 ? 0 : (dFirst*dSecond)/dSum;
- return dThis + dThird;
- }
-
-// Return weight of edge A-R in three-way subtree that has
-// leaves A,B,C and internal node R.
-static double GotohWeightThreeWay(const Tree &tree, unsigned A,
- unsigned B, unsigned C, unsigned R)
- {
- const double F = 1.0;
-
- if (tree.IsLeaf(R))
- Quit("GotohThreeWay: R must be internal node");
-
- double a = GetGotohLength(tree, R, A);
- double b = GetGotohLength(tree, R, B);
- double c = GetGotohLength(tree, R, C);
-
- double S = b*c + c*a + a*b;
- double x = b*c*(a + b)*(a + c);
- double y = a*(b + c)*F*S;
-
-// y is zero iff all three branch lengths are zero.
- if (y < 0.001)
- return 1.0;
- return sqrt(x/y);
- }
-
-static double GotohWeightEdge(const Tree &tree, unsigned uNodeIndex1,
- unsigned uNodeIndex2)
- {
- double w1 = 1.0;
- double w2 = 1.0;
- if (!tree.IsLeaf(uNodeIndex1))
- {
- unsigned R = uNodeIndex1;
- unsigned A = uNodeIndex2;
- unsigned B = GetFirstNeighborUnrooted(tree, R, A);
- unsigned C = GetSecondNeighborUnrooted(tree, R, A);
- w1 = GotohWeightThreeWay(tree, A, B, C, R);
- }
- if (!tree.IsLeaf(uNodeIndex2))
- {
- unsigned R = uNodeIndex2;
- unsigned A = uNodeIndex1;
- unsigned B = GetFirstNeighborUnrooted(tree, R, A);
- unsigned C = GetSecondNeighborUnrooted(tree, R, A);
- w2 = GotohWeightThreeWay(tree, A, B, C, R);
- }
- return w1*w2;
- }
-
-void CalcThreeWayEdgeWeights(const Tree &tree, WEIGHT **EdgeWeights)
- {
- const unsigned uNodeCount = tree.GetNodeCount();
- for (unsigned uNodeIndex1 = 0; uNodeIndex1 < uNodeCount; ++uNodeIndex1)
- {
- if (tree.IsRoot(uNodeIndex1))
- continue;
- for (unsigned uSub1 = 0; uSub1 < 3; ++uSub1)
- {
- const unsigned uNodeIndex2 = GetNeighborUnrooted(tree, uNodeIndex1, uSub1);
- if (NULL_NEIGHBOR == uNodeIndex2)
- continue;
-
- // Avoid computing same edge twice in reversed order
- if (uNodeIndex2 < uNodeIndex1)
- continue;
-
- const WEIGHT w = (WEIGHT) GotohWeightEdge(tree, uNodeIndex1, uNodeIndex2);
- const unsigned uSub2 = GetNeighborSubscriptUnrooted(tree, uNodeIndex2, uNodeIndex1);
-#if DEBUG
- {
- assert(uNodeIndex2 == GetNeighborUnrooted(tree, uNodeIndex1, uSub1));
- assert(uNodeIndex1 == GetNeighborUnrooted(tree, uNodeIndex2, uSub2));
- const WEIGHT wRev = (WEIGHT) GotohWeightEdge(tree, uNodeIndex2, uNodeIndex1);
- if (!BTEq(w, wRev))
- Quit("CalcThreeWayWeights: rev check failed %g %g",
- w, wRev);
- }
-#endif
- EdgeWeights[uNodeIndex1][uSub1] = w;
- EdgeWeights[uNodeIndex2][uSub2] = w;
- }
- }
- }
-
-static void SetSeqWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,
- double dPathWeight, WEIGHT *Weights)
- {
- if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))
- Quit("SetSeqWeights, should never be called with root");
-
- const double dThisLength = GetEdgeLengthUnrooted(tree, uNode1, uNode2);
- if (tree.IsLeaf(uNode2))
- {
- const unsigned Id = tree.GetLeafId(uNode2);
- Weights[Id] = (WEIGHT) (dPathWeight + dThisLength);
- return;
- }
- const unsigned uFirst = GetFirstNeighborUnrooted(tree, uNode2, uNode1);
- const unsigned uSecond = GetSecondNeighborUnrooted(tree, uNode2, uNode1);
- dPathWeight *= dThisLength;
- SetSeqWeights(tree, uNode2, uFirst, dPathWeight, Weights);
- SetSeqWeights(tree, uNode2, uSecond, dPathWeight, Weights);
- }
-
-void CalcThreeWayWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,
- WEIGHT *Weights)
- {
-#if TRACE
- Log("CalcThreeWayEdgeWeights\n");
- tree.LogMe();
-#endif
-
- if (tree.IsRoot(uNode1))
- uNode1 = tree.GetFirstNeighbor(uNode1, uNode2);
- else if (tree.IsRoot(uNode2))
- uNode2 = tree.GetFirstNeighbor(uNode2, uNode1);
- const unsigned uNodeCount = tree.GetNodeCount();
- WEIGHT **EdgeWeights = new WEIGHT *[uNodeCount];
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- EdgeWeights[uNodeIndex] = new WEIGHT[3];
-
- CalcThreeWayEdgeWeights(tree, EdgeWeights);
-
-#if TRACE
- {
- Log("Node1 Node2 Length Gotoh EdgeWt\n");
- Log("----- ----- ------ ------ ------\n");
- for (unsigned uNodeIndex1 = 0; uNodeIndex1 < uNodeCount; ++uNodeIndex1)
- {
- if (tree.IsRoot(uNodeIndex1))
- continue;
- for (unsigned uSub1 = 0; uSub1 < 3; ++uSub1)
- {
- const unsigned uNodeIndex2 = GetNeighborUnrooted(tree, uNodeIndex1, uSub1);
- if (NULL_NEIGHBOR == uNodeIndex2)
- continue;
- if (uNodeIndex2 < uNodeIndex1)
- continue;
- const WEIGHT ew = EdgeWeights[uNodeIndex1][uSub1];
- const double d = GetEdgeLengthUnrooted(tree, uNodeIndex1, uNodeIndex2);
- const double g = GetGotohLength(tree, uNodeIndex1, uNodeIndex2);
- Log("%5u %5u %6.3f %6.3f %6.3f\n", uNodeIndex1, uNodeIndex2, d, g, ew);
- }
- }
- }
-#endif
-
- SetSeqWeights(tree, uNode1, uNode2, 0.0, Weights);
- SetSeqWeights(tree, uNode2, uNode1, 0.0, Weights);
-
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- delete[] EdgeWeights[uNodeIndex];
- delete[] EdgeWeights;
- }
Deleted: trunk/packages/muscle/trunk/timing.h
===================================================================
--- trunk/packages/muscle/trunk/timing.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/timing.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,24 +0,0 @@
-#if WIN32
-
-typedef unsigned __int64 TICKS;
-
-#pragma warning(disable:4035)
-inline TICKS GetClockTicks()
- {
- _asm
- {
- _emit 0x0f
- _emit 0x31
- }
- }
-
-#define StartTimer() __int64 t1__ = GetClockTicks()
-
-#define GetElapsedTicks() (GetClockTicks() - t1__)
-
-static double TicksToSecs(TICKS t)
- {
- return (__int64) t/2.5e9;
- }
-
-#endif // WIN32
Deleted: trunk/packages/muscle/trunk/traceback.cpp
===================================================================
--- trunk/packages/muscle/trunk/traceback.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/traceback.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,208 +0,0 @@
-#include "muscle.h"
-#include "profile.h"
-#include "pwpath.h"
-#include <math.h>
-
-#define TRACE 0
-
-#define EQ(a, b) (fabs(a-b) < 0.1)
-
-SCORE TraceBack(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
- PWPath &Path)
- {
-#if TRACE
- Log("\n");
- Log("TraceBack LengthA=%u LengthB=%u\n", uLengthA, uLengthB);
-#endif
- assert(uLengthB > 0 && uLengthA > 0);
-
- const unsigned uPrefixCountA = uLengthA + 1;
- const unsigned uPrefixCountB = uLengthB + 1;
-
- Path.Clear();
-
- unsigned uPrefixLengthA = uLengthA;
- unsigned uPrefixLengthB = uLengthB;
-
- const SCORE scoreM = DPM(uPrefixLengthA, uPrefixLengthB);
- SCORE scoreD = DPD(uPrefixLengthA, uPrefixLengthB);
- SCORE scoreI = DPI(uPrefixLengthA, uPrefixLengthB);
-
- const ProfPos &LastPPA = PA[uLengthA - 1];
- const ProfPos &LastPPB = PB[uLengthB - 1];
-
- scoreD += LastPPA.m_scoreGapClose;
- scoreI += LastPPB.m_scoreGapClose;
-
- char cEdgeType = cInsane;
- SCORE scoreMax;
- if (scoreM >= scoreD && scoreM >= scoreI)
- {
- scoreMax = scoreM;
- cEdgeType = 'M';
- }
- else if (scoreD >= scoreM && scoreD >= scoreI)
- {
- scoreMax = scoreD;
- cEdgeType = 'D';
- }
- else
- {
- assert(scoreI >= scoreM && scoreI >= scoreD);
- scoreMax = scoreI;
- cEdgeType = 'I';
- }
-
- for (;;)
- {
- if ('S' == cEdgeType)
- break;
-
- PWEdge Edge;
- Edge.cType = cEdgeType;
- Edge.uPrefixLengthA = uPrefixLengthA;
- Edge.uPrefixLengthB = uPrefixLengthB;
- Path.PrependEdge(Edge);
-
- char cPrevEdgeType;
- unsigned uPrevPrefixLengthA = uPrefixLengthA;
- unsigned uPrevPrefixLengthB = uPrefixLengthB;
-
- switch (cEdgeType)
- {
- case 'M':
- {
- assert(uPrefixLengthA > 0);
- assert(uPrefixLengthB > 0);
- const ProfPos &PPA = PA[uPrefixLengthA - 1];
- const ProfPos &PPB = PB[uPrefixLengthB - 1];
-
- const SCORE Score = DPM(uPrefixLengthA, uPrefixLengthB);
- const SCORE scoreMatch = ScoreProfPos2(PPA, PPB);
-
- SCORE scoreSM;
- if (1 == uPrefixLengthA && 1 == uPrefixLengthB)
- scoreSM = scoreMatch;
- else
- scoreSM = MINUS_INFINITY;
-
- SCORE scoreMM = MINUS_INFINITY;
- SCORE scoreDM = MINUS_INFINITY;
- SCORE scoreIM = MINUS_INFINITY;
- if (uPrefixLengthA > 1 && uPrefixLengthB > 1)
- scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1) + scoreMatch;
- if (uPrefixLengthA > 1)
- {
- SCORE scoreTransDM = PA[uPrefixLengthA-2].m_scoreGapClose;
- scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreTransDM + scoreMatch;
- }
- if (uPrefixLengthB > 1)
- {
- SCORE scoreTransIM = PB[uPrefixLengthB-2].m_scoreGapClose;
- scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreTransIM + scoreMatch;
- }
-
- if (EQ(scoreMM, Score))
- cPrevEdgeType = 'M';
- else if (EQ(scoreDM, Score))
- cPrevEdgeType = 'D';
- else if (EQ(scoreIM, Score))
- cPrevEdgeType = 'I';
- else if (EQ(scoreSM, Score))
- cPrevEdgeType = 'S';
- else
- Quit("TraceBack: failed to match M score=%g M=%g D=%g I=%g S=%g",
- Score, scoreMM, scoreDM, scoreIM, scoreSM);
-
- --uPrevPrefixLengthA;
- --uPrevPrefixLengthB;
- break;
- }
-
- case 'D':
- {
- assert(uPrefixLengthA > 0);
- const SCORE Score = DPD(uPrefixLengthA, uPrefixLengthB);
-
- SCORE scoreMD = MINUS_INFINITY;
- SCORE scoreDD = MINUS_INFINITY;
- SCORE scoreSD = MINUS_INFINITY;
- if (uPrefixLengthB == 0)
- {
- if (uPrefixLengthA == 1)
- scoreSD = PA[0].m_scoreGapOpen;
- else
- scoreSD = DPD(uPrefixLengthA - 1, 0);
- }
- if (uPrefixLengthA > 1)
- {
- const ProfPos &PPA = PA[uPrefixLengthA - 1];
- SCORE scoreTransMD = PPA.m_scoreGapOpen;
- scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) + scoreTransMD;
- scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB);
- }
-
- if (EQ(Score, scoreMD))
- cPrevEdgeType = 'M';
- else if (EQ(Score, scoreDD))
- cPrevEdgeType = 'D';
- else if (EQ(Score, scoreSD))
- cPrevEdgeType = 'S';
- else
- Quit("TraceBack: failed to match D");
-
- --uPrevPrefixLengthA;
- break;
- }
-
- case 'I':
- {
- assert(uPrefixLengthB > 0);
- const SCORE Score = DPI(uPrefixLengthA, uPrefixLengthB);
-
- SCORE scoreMI = MINUS_INFINITY;
- SCORE scoreII = MINUS_INFINITY;
- SCORE scoreSI = MINUS_INFINITY;
- if (uPrefixLengthA == 0)
- {
- if (uPrefixLengthB == 1)
- scoreSI = PB[0].m_scoreGapOpen;
- else
- scoreSI = DPI(0, uPrefixLengthB - 1);
- }
- if (uPrefixLengthB > 1)
- {
- const ProfPos &PPB = PB[uPrefixLengthB - 1];
- SCORE scoreTransMI = PPB.m_scoreGapOpen;
- scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) + scoreTransMI;
- scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1);
- }
-
- if (EQ(Score, scoreMI))
- cPrevEdgeType = 'M';
- else if (EQ(Score, scoreII))
- cPrevEdgeType = 'I';
- else if (EQ(Score, scoreSI))
- cPrevEdgeType = 'S';
- else
- Quit("TraceBack: failed to match I");
-
- --uPrevPrefixLengthB;
- break;
- }
-
- default:
- assert(false);
- }
-#if TRACE
- Log("Edge %c%c%u.%u", cPrevEdgeType, cEdgeType, uPrefixLengthA, uPrefixLengthB);
- Log("\n");
-#endif
- cEdgeType = cPrevEdgeType;
- uPrefixLengthA = uPrevPrefixLengthA;
- uPrefixLengthB = uPrevPrefixLengthB;
- }
-
- return scoreMax;
- }
Deleted: trunk/packages/muscle/trunk/tracebackopt.cpp
===================================================================
--- trunk/packages/muscle/trunk/tracebackopt.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/tracebackopt.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,73 +0,0 @@
-#include "muscle.h"
-#include "pwpath.h"
-
-void TraceBackToPath(int **TraceBack, unsigned uLengthA,
- unsigned uLengthB, PWPath &Path)
- {
- Path.Clear();
-
- PWEdge Edge;
- Edge.uPrefixLengthA = uLengthA;
- Edge.uPrefixLengthB = uLengthB;
-
- for (;;)
- {
- if (0 == Edge.uPrefixLengthA && 0 == Edge.uPrefixLengthB)
- break;
-
- int iDelta = TraceBack[Edge.uPrefixLengthA][Edge.uPrefixLengthB];
-#if TRACE
- Log("TraceBack[%u][%u] = %d\n",
- Edge.uPrefixLengthA, Edge.uPrefixLengthB, iDelta);
-#endif
- if (0 == iDelta)
- {
- assert(Edge.uPrefixLengthA > 0);
- assert(Edge.uPrefixLengthB > 0);
-
- Edge.cType = 'M';
- Path.PrependEdge(Edge);
- --(Edge.uPrefixLengthA);
- --(Edge.uPrefixLengthB);
- continue;
- }
- else if (iDelta > 0)
- {
- Edge.cType = 'D';
- while (iDelta-- > 0)
- {
- assert(Edge.uPrefixLengthA > 0);
-
- Path.PrependEdge(Edge);
- --(Edge.uPrefixLengthA);
- }
- }
- else if (iDelta < 0)
- {
- Edge.cType = 'I';
- while (iDelta++ < 0)
- {
- assert(Edge.uPrefixLengthB > 0);
-
- Path.PrependEdge(Edge);
- --(Edge.uPrefixLengthB);
- }
- }
-
- if (0 == Edge.uPrefixLengthA && 0 == Edge.uPrefixLengthB)
- break;
-
- assert(Edge.uPrefixLengthA > 0);
- assert(Edge.uPrefixLengthB > 0);
-
- Edge.cType = 'M';
- Path.PrependEdge(Edge);
- --(Edge.uPrefixLengthA);
- --(Edge.uPrefixLengthB);
- }
-
-#if TRACE
- Log("TraceBackToPath ");
- Path.LogMe();
-#endif
- }
Deleted: trunk/packages/muscle/trunk/tracebacksw.cpp
===================================================================
--- trunk/packages/muscle/trunk/tracebacksw.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/tracebacksw.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,186 +0,0 @@
-#include "muscle.h"
-#include "profile.h"
-#include "pwpath.h"
-#include <math.h>
-
-#define TRACE 0
-
-#define EQ(a, b) (fabs(a-b) < 0.1)
-
-void TraceBackSW(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
- unsigned uLengthB, const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
- unsigned uPrefixLengthAMax, unsigned uPrefixLengthBMax, PWPath &Path)
- {
-#if TRACE
- Log("\n");
- Log("TraceBackSW LengthA=%u LengthB=%u PLAMax=%u PLBMax=%u\n",
- uLengthA, uLengthB, uPrefixLengthAMax, uPrefixLengthBMax);
-#endif
- assert(uLengthB > 0 && uLengthA > 0);
-
- const unsigned uPrefixCountA = uLengthA + 1;
- const unsigned uPrefixCountB = uLengthB + 1;
-
- Path.Clear();
-
- unsigned uPrefixLengthA = uPrefixLengthAMax;
- unsigned uPrefixLengthB = uPrefixLengthBMax;
-
- SCORE scoreMax = DPM(uPrefixLengthA, uPrefixLengthB);
- char cEdgeType = 'M';
-
- for (;;)
- {
- if ('S' == cEdgeType)
- break;
-
- PWEdge Edge;
- Edge.cType = cEdgeType;
- Edge.uPrefixLengthA = uPrefixLengthA;
- Edge.uPrefixLengthB = uPrefixLengthB;
- Path.PrependEdge(Edge);
-
- char cPrevEdgeType;
- unsigned uPrevPrefixLengthA = uPrefixLengthA;
- unsigned uPrevPrefixLengthB = uPrefixLengthB;
-
- switch (cEdgeType)
- {
- case 'M':
- {
- assert(uPrefixLengthA > 0);
- assert(uPrefixLengthB > 0);
- const ProfPos &PPA = PA[uPrefixLengthA - 1];
- const ProfPos &PPB = PB[uPrefixLengthB - 1];
-
- const SCORE Score = DPM(uPrefixLengthA, uPrefixLengthB);
- const SCORE scoreMatch = ScoreProfPos2(PPA, PPB);
-
- SCORE scoreSM;
- if (1 == uPrefixLengthA && 1 == uPrefixLengthB)
- scoreSM = scoreMatch;
- else
- scoreSM = MINUS_INFINITY;
-
- SCORE scoreMM = MINUS_INFINITY;
- SCORE scoreDM = MINUS_INFINITY;
- SCORE scoreIM = MINUS_INFINITY;
- if (uPrefixLengthA > 1 && uPrefixLengthB > 1)
- {
- SCORE scoreTrans = DPM(uPrefixLengthA-1, uPrefixLengthB-1);
- scoreMM = scoreTrans + scoreMatch;
- }
- if (uPrefixLengthA > 1)
- {
- SCORE scoreTransDM = PA[uPrefixLengthA-2].m_scoreGapClose;
- scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreTransDM + scoreMatch;
- }
- if (uPrefixLengthB > 1)
- {
- SCORE scoreTransIM = PB[uPrefixLengthB-2].m_scoreGapClose;
- scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreTransIM + scoreMatch;
- }
-
- if (EQ(scoreMM, Score))
- cPrevEdgeType = 'M';
- else if (EQ(scoreDM, Score))
- cPrevEdgeType = 'D';
- else if (EQ(scoreIM, Score))
- cPrevEdgeType = 'I';
- else if (EQ(scoreSM, Score))
- cPrevEdgeType = 'S';
- else if (EQ(scoreMatch, Score))
- cPrevEdgeType = 'S';
- else
- Quit("TraceBack2: failed to match M score=%g M=%g D=%g I=%g S=%g",
- Score, scoreMM, scoreDM, scoreIM, scoreSM);
-
- --uPrevPrefixLengthA;
- --uPrevPrefixLengthB;
- break;
- }
-
- case 'D':
- {
- assert(uPrefixLengthA > 0);
- const SCORE Score = DPD(uPrefixLengthA, uPrefixLengthB);
-
- SCORE scoreMD = MINUS_INFINITY;
- SCORE scoreDD = MINUS_INFINITY;
- SCORE scoreSD = MINUS_INFINITY;
- if (uPrefixLengthB == 0)
- {
- if (uPrefixLengthA == 1)
- scoreSD = PA[0].m_scoreGapOpen;
- else
- scoreSD = DPD(uPrefixLengthA - 1, 0);
- }
- if (uPrefixLengthA > 1)
- {
- const ProfPos &PPA = PA[uPrefixLengthA - 1];
- SCORE scoreTransMD = PPA.m_scoreGapOpen;
- scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) + scoreTransMD;
- scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB);
- }
-
- if (EQ(Score, scoreMD))
- cPrevEdgeType = 'M';
- else if (EQ(Score, scoreDD))
- cPrevEdgeType = 'D';
- else if (EQ(Score, scoreSD))
- cPrevEdgeType = 'S';
- else
- Quit("TraceBack2: failed to match D");
-
- --uPrevPrefixLengthA;
- break;
- }
-
- case 'I':
- {
- assert(uPrefixLengthB > 0);
- const SCORE Score = DPI(uPrefixLengthA, uPrefixLengthB);
-
- SCORE scoreMI = MINUS_INFINITY;
- SCORE scoreII = MINUS_INFINITY;
- SCORE scoreSI = MINUS_INFINITY;
- if (uPrefixLengthA == 0)
- {
- if (uPrefixLengthB == 1)
- scoreSI = PB[0].m_scoreGapOpen;
- else
- scoreSI = DPI(0, uPrefixLengthB - 1);
- }
- if (uPrefixLengthB > 1)
- {
- const ProfPos &PPB = PB[uPrefixLengthB - 1];
- SCORE scoreTransMI = PPB.m_scoreGapOpen;
- scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) + scoreTransMI;
- scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1);
- }
-
- if (EQ(Score, scoreMI))
- cPrevEdgeType = 'M';
- else if (EQ(Score, scoreII))
- cPrevEdgeType = 'I';
- else if (EQ(Score, scoreSI))
- cPrevEdgeType = 'S';
- else
- Quit("TraceBack2: failed to match I");
-
- --uPrevPrefixLengthB;
- break;
- }
-
- default:
- assert(false);
- }
-#if TRACE
- Log("Edge %c%c%u.%u", cPrevEdgeType, cEdgeType, uPrefixLengthA, uPrefixLengthB);
- Log("\n");
-#endif
- cEdgeType = cPrevEdgeType;
- uPrefixLengthA = uPrevPrefixLengthA;
- uPrefixLengthB = uPrevPrefixLengthB;
- }
- }
Deleted: trunk/packages/muscle/trunk/tree.h
===================================================================
--- trunk/packages/muscle/trunk/tree.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/tree.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,339 +0,0 @@
-#ifndef tree_h
-#define tree_h
-
-#include <limits.h>
-
-class Clust;
-
-const unsigned NULL_NEIGHBOR = UINT_MAX;
-
-enum NEWICK_TOKEN_TYPE
- {
- NTT_Unknown,
-
-// Returned from Tree::GetToken:
- NTT_Lparen,
- NTT_Rparen,
- NTT_Colon,
- NTT_Comma,
- NTT_Semicolon,
- NTT_String,
-
-// Following are never returned from Tree::GetToken:
- NTT_SingleQuotedString,
- NTT_DoubleQuotedString,
- NTT_Comment
- };
-
-class Tree
- {
-public:
- Tree()
- {
- m_uNodeCount = 0;
- m_uCacheCount = 0;
- m_uNeighbor1 = 0;
- m_uNeighbor2 = 0;
- m_uNeighbor3 = 0;
- m_dEdgeLength1 = 0;
- m_dEdgeLength2 = 0;
- m_dEdgeLength3 = 0;
- m_dHeight = 0;
- m_bHasEdgeLength1 = 0;
- m_bHasEdgeLength2 = 0;
- m_bHasEdgeLength3 = 0;
- m_bHasHeight = 0;
- m_ptrName = 0;
- m_Ids = 0;
- }
- virtual ~Tree()
- {
- Clear();
- }
-
- void Clear()
- {
- for (unsigned n = 0; n < m_uNodeCount; ++n)
- free(m_ptrName[n]);
-
- m_uNodeCount = 0;
- m_uCacheCount = 0;
-
- delete[] m_uNeighbor1;
- delete[] m_uNeighbor2;
- delete[] m_uNeighbor3;
- delete[] m_dEdgeLength1;
- delete[] m_dEdgeLength2;
- delete[] m_dEdgeLength3;
- delete[] m_bHasEdgeLength1;
- delete[] m_bHasEdgeLength2;
- delete[] m_bHasEdgeLength3;
- delete[] m_ptrName;
- delete[] m_Ids;
- delete[] m_bHasHeight;
- delete[] m_dHeight;
-
- m_uNeighbor1 = 0;
- m_uNeighbor2 = 0;
- m_uNeighbor3 = 0;
- m_dEdgeLength1 = 0;
- m_dEdgeLength2 = 0;
- m_dEdgeLength3 = 0;
- m_ptrName = 0;
- m_Ids = 0;
- m_uRootNodeIndex = 0;
- m_bHasHeight = 0;
- m_dHeight = 0;
-
- m_bRooted = false;
- }
-
-// Creation and manipulation
- void CreateRooted();
- void CreateUnrooted(double dEdgeLength);
-
- void FromFile(TextFile &File);
- void FromClust(Clust &C);
-
- void Copy(const Tree &tree);
-
- void Create(unsigned uLeafCount, unsigned uRoot, const unsigned Left[],
- const unsigned Right[], const float LeftLength[], const float RightLength[],
- const unsigned LeafIds[], char *LeafNames[]);
- unsigned AppendBranch(unsigned uExistingNodeIndex);
- void SetLeafName(unsigned uNodeIndex, const char *ptrName);
- void SetLeafId(unsigned uNodeIndex, unsigned uId);
- void SetEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2,
- double dLength);
-
- void RootUnrootedTree(unsigned uNodeIndex1, unsigned uNodeIndex2);
- void RootUnrootedTree(ROOT Method);
- void UnrootByDeletingRoot();
-
-// Saving to file
- void ToFile(TextFile &File) const;
-
-// Accessor functions
- unsigned GetNodeCount() const
- {
- return m_uNodeCount;
- }
-
- unsigned GetLeafCount() const
- {
- if (m_bRooted)
- {
- assert(m_uNodeCount%2 == 1);
- return (m_uNodeCount + 1)/2;
- }
- else
- {
- assert(m_uNodeCount%2 == 0);
- return (m_uNodeCount + 2)/2;
- }
- }
-
- unsigned GetNeighbor(unsigned uNodeIndex, unsigned uNeighborSubscript) const;
-
- unsigned GetNeighbor1(unsigned uNodeIndex) const
- {
- assert(uNodeIndex < m_uNodeCount);
- return m_uNeighbor1[uNodeIndex];
- }
-
- unsigned GetNeighbor2(unsigned uNodeIndex) const
- {
- assert(uNodeIndex < m_uNodeCount);
- return m_uNeighbor2[uNodeIndex];
- }
-
- unsigned GetNeighbor3(unsigned uNodeIndex) const
- {
- assert(uNodeIndex < m_uNodeCount);
- return m_uNeighbor3[uNodeIndex];
- }
-
- unsigned GetParent(unsigned uNodeIndex) const
- {
- assert(m_bRooted && uNodeIndex < m_uNodeCount);
- return m_uNeighbor1[uNodeIndex];
- }
-
- bool IsRooted() const
- {
- return m_bRooted;
- }
-
- unsigned GetLeft(unsigned uNodeIndex) const
- {
- assert(m_bRooted && uNodeIndex < m_uNodeCount);
- return m_uNeighbor2[uNodeIndex];
- }
-
- unsigned GetRight(unsigned uNodeIndex) const
- {
- assert(m_bRooted && uNodeIndex < m_uNodeCount);
- return m_uNeighbor3[uNodeIndex];
- }
-
- const char *GetName(unsigned uNodeIndex) const
- {
- assert(uNodeIndex < m_uNodeCount);
- return m_ptrName[uNodeIndex];
- }
-
- unsigned GetRootNodeIndex() const
- {
- assert(m_bRooted);
- return m_uRootNodeIndex;
- }
-
- unsigned GetNeighborCount(unsigned uNodeIndex) const
- {
- const unsigned n1 = m_uNeighbor1[uNodeIndex];
- const unsigned n2 = m_uNeighbor2[uNodeIndex];
- const unsigned n3 = m_uNeighbor3[uNodeIndex];
- return (NULL_NEIGHBOR != n1) + (NULL_NEIGHBOR != n2) + (NULL_NEIGHBOR != n3);
- }
-
- bool IsLeaf(unsigned uNodeIndex) const
- {
- assert(uNodeIndex < m_uNodeCount);
- if (1 == m_uNodeCount)
- return true;
- return 1 == GetNeighborCount(uNodeIndex);
- }
-
- bool IsRoot(unsigned uNodeIndex) const
- {
- return IsRooted() && m_uRootNodeIndex == uNodeIndex;
- }
-
- unsigned GetLeafId(unsigned uNodeIndex) const;
- unsigned GetLeafNodeIndex(const char *ptrName) const;
- bool IsEdge(unsigned uNodeIndex1, unsigned uNodeIndex2) const;
- bool HasEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2) const;
- double GetEdgeLength(unsigned uNodeIndex1, unsigned uNodeIndex2) const;
- const char *GetLeafName(unsigned uNodeIndex) const;
- unsigned GetNeighborSubscript(unsigned uNodeIndex, unsigned uNeighborIndex) const;
- double GetNodeHeight(unsigned uNodeIndex) const;
-
-// Depth-first traversal
- unsigned FirstDepthFirstNode() const;
- unsigned NextDepthFirstNode(unsigned uNodeIndex) const;
-
- unsigned FirstDepthFirstNodeR() const;
- unsigned NextDepthFirstNodeR(unsigned uNodeIndex) const;
-
-// Equivalent of GetLeft/Right in unrooted tree, works in rooted tree too.
- unsigned GetFirstNeighbor(unsigned uNodeIndex, unsigned uNeighborIndex) const;
- unsigned GetSecondNeighbor(unsigned uNodeIndex, unsigned uNeighborIndex) const;
-
-// Getting parent node in unrooted tree defined iff leaf
- unsigned GetLeafParent(unsigned uNodeIndex) const;
-
-// Misc
- const char *NTTStr(NEWICK_TOKEN_TYPE NTT) const;
- void FindCenterByLongestSpan(unsigned *ptrNodeIndex1,
- unsigned *ptrNodeIndex2) const;
- void PruneTree(const Tree &tree, unsigned Subfams[],
- unsigned uSubfamCount);
- unsigned LeafIndexToNodeIndex(unsigned uLeafIndex) const;
-
-// Debugging & trouble-shooting support
- void Validate() const;
- void ValidateNode(unsigned uNodeIndex) const;
- void AssertAreNeighbors(unsigned uNodeIndex1, unsigned uNodeIndex2) const;
- void LogMe() const;
-
-private:
- unsigned UnrootFromFile();
- NEWICK_TOKEN_TYPE GetTokenVerbose(TextFile &File, char szToken[],
- unsigned uBytes) const
- {
- NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, uBytes);
- Log("GetToken %10.10s %s\n", NTTStr(NTT), szToken);
- return NTT;
- }
-
- void InitCache(unsigned uCacheCount);
- void ExpandCache();
- NEWICK_TOKEN_TYPE GetToken(TextFile &File, char szToken[], unsigned uBytes) const;
- bool GetGroupFromFile(TextFile &File, unsigned uNodeIndex, double *ptrdEdgeLength);
- unsigned GetLeafCountUnrooted(unsigned uNodeIndex1, unsigned uNodeIndex2,
- double *ptrdTotalDistance) const;
- void ToFileNodeRooted(TextFile &File, unsigned uNodeIndex) const;
- void ToFileNodeUnrooted(TextFile &File, unsigned uNodeIndex, unsigned uParent) const;
- void OrientParent(unsigned uNodeIndex, unsigned uParentNodeIndex);
- double FromClustNode(const Clust &C, unsigned uClustNodeIndex, unsigned uPhyNodeIndex);
- unsigned GetAnyNonLeafNode() const;
-
-// Yuck. Data is made public for the convenience of Tree::Copy.
-// There has to be a better way.
-public:
- unsigned m_uNodeCount;
- unsigned m_uCacheCount;
- unsigned *m_uNeighbor1;
- unsigned *m_uNeighbor2;
- unsigned *m_uNeighbor3;
- double *m_dEdgeLength1;
- double *m_dEdgeLength2;
- double *m_dEdgeLength3;
- double *m_dHeight;
- bool *m_bHasEdgeLength1;
- bool *m_bHasEdgeLength2;
- bool *m_bHasEdgeLength3;
- bool *m_bHasHeight;
- unsigned *m_Ids;
- char **m_ptrName;
- bool m_bRooted;
- unsigned m_uRootNodeIndex;
- };
-
-struct PhyEnumEdgeState
- {
- PhyEnumEdgeState()
- {
- m_bInit = false;
- m_uNodeIndex1 = NULL_NEIGHBOR;
- m_uNodeIndex2 = NULL_NEIGHBOR;
- }
- bool m_bInit;
- unsigned m_uNodeIndex1;
- unsigned m_uNodeIndex2;
- };
-
-const unsigned NODE_CHANGED = (unsigned) (~0);
-
-extern bool PhyEnumBiParts(const Tree &tree, PhyEnumEdgeState &ES,
- unsigned Leaves1[], unsigned *ptruCount1,
- unsigned Leaves2[], unsigned *ptruCount2);
-extern bool PhyEnumBiPartsR(const Tree &tree, PhyEnumEdgeState &ES,
- unsigned Leaves1[], unsigned *ptruCount1,
- unsigned Leaves2[], unsigned *ptruCount2);
-extern void ClusterByHeight(const Tree &tree, double dMaxHeight, unsigned Subtrees[],
- unsigned *ptruSubtreeCount);
-void ClusterBySubfamCount(const Tree &tree, unsigned uSubfamCount,
- unsigned Subfams[], unsigned *ptruSubfamCount);
-void GetLeaves(const Tree &tree, unsigned uNodeIndex, unsigned Leaves[],
- unsigned *ptruLeafCount);
-void GetLeavesExcluding(const Tree &tree, unsigned uNodeIndex,
- unsigned uExclude, unsigned Leaves[], unsigned *ptruCount);
-void GetInternalNodesInHeightOrder(const Tree &tree, unsigned NodeIndexes[]);
-void ApplyMinEdgeLength(Tree &tree, double dMinEdgeLength);
-void LeafIndexesToLeafNames(const Tree &tree, const unsigned Leaves[], unsigned uCount,
- char *Names[]);
-void LeafIndexesToIds(const Tree &tree, const unsigned Leaves[], unsigned uCount,
- unsigned Ids[]);
-void MSASeqSubset(const MSA &msaIn, char *Names[], unsigned uSeqCount,
- MSA &msaOut);
-void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs,
- unsigned IdToDiffsLeafNodeIndex[]);
-void DiffTreesE(const Tree &NewTree, const Tree &OldTree,
- unsigned NewNodeIndexToOldNodeIndex[]);
-void FindRoot(const Tree &tree, unsigned *ptruNode1, unsigned *ptruNode2,
- double *ptrdLength1, double *ptrdLength2,
- ROOT RootMethod);
-void FixRoot(Tree &tree, ROOT RootMethod);
-
-#endif // tree_h
Deleted: trunk/packages/muscle/trunk/treefrommsa.cpp
===================================================================
--- trunk/packages/muscle/trunk/treefrommsa.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/treefrommsa.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,55 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "tree.h"
-#include "clust.h"
-#include "clustsetmsa.h"
-#include "distcalc.h"
-
-static void TreeFromMSA_NJ(const MSA &msa, Tree &tree, CLUSTER Cluster,
- DISTANCE Distance)
- {
- MSADist MD(Distance);
- ClustSetMSA Set(msa, MD);
-
- Clust C;
- C.Create(Set, Cluster);
-
- tree.FromClust(C);
- }
-
-static void TreeFromMSA_UPGMA(const MSA &msa, Tree &tree, CLUSTER Cluster,
- DISTANCE Distance)
- {
- LINKAGE Linkage = LINKAGE_Undefined;
- switch (Cluster)
- {
- case CLUSTER_UPGMA:
- Linkage = LINKAGE_Avg;
- break;
- case CLUSTER_UPGMAMin:
- Linkage = LINKAGE_Min;
- break;
- case CLUSTER_UPGMAMax:
- Linkage = LINKAGE_Max;
- break;
- case CLUSTER_UPGMB:
- Linkage = LINKAGE_Biased;
- break;
- default:
- Quit("TreeFromMSA_UPGMA, CLUSTER_%u not supported", Cluster);
- }
-
- DistCalcMSA DC;
- DC.Init(msa, Distance);
- UPGMA2(DC, tree, Linkage);
- }
-
-void TreeFromMSA(const MSA &msa, Tree &tree, CLUSTER Cluster,
- DISTANCE Distance, ROOT Root)
- {
- if (CLUSTER_NeighborJoining == Cluster)
- TreeFromMSA_NJ(msa, tree, Cluster, Distance);
- else
- TreeFromMSA_UPGMA(msa, tree, Cluster, Distance);
- FixRoot(tree, Root);
- }
Deleted: trunk/packages/muscle/trunk/types.h
===================================================================
--- trunk/packages/muscle/trunk/types.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/types.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,117 +0,0 @@
-#ifndef types_h
-#define types_h
-
-typedef unsigned char byte;
-typedef unsigned short ushort;
-
-typedef float SCOREMATRIX[32][32];
-typedef SCOREMATRIX *PTR_SCOREMATRIX;
-
-class MSA;
-class Seq;
-class ClusterTree;
-class DistFunc;
-class TextFile;
-class PWPath;
-class Tree;
-class SeqVect;
-class DistCalc;
-
-struct ProgNode;
-struct ProfPos;
-
-#if SINGLE_AFFINE
-// Compress M, D and I trace-back matrices into 4 bits
-enum
- {
- BIT_MM = 0x00,
- BIT_DM = 0x01,
- BIT_IM = 0x02,
- BIT_xM = 0x03,
-
- BIT_DD = 0x00,
- BIT_MD = 0x04,
- // ID not allowed
- BIT_xD = 0x04,
-
- BIT_II = 0x00,
- BIT_MI = 0x08,
- // DI not allowed
- BIT_xI = 0x08,
- };
-
-#endif
-
-#if DOUBLE_AFFINE
-// Compress M, D, E, I and J trace-back matrices into 7 bits
-enum
- {
- BIT_MM = 0x00,
- BIT_DM = 0x01,
- BIT_EM = 0x02,
- BIT_IM = 0x03,
- BIT_JM = 0x04,
- BIT_xM = 0x07,
-
- BIT_DD = 0x00,
- BIT_MD = 0x08,
- // [EIJ]D not sallowed
- BIT_xD = 0x08,
-
- BIT_EE = 0x00,
- BIT_ME = 0x10,
- // [DDJ]E not allowed
- BIT_xE = 0x10,
-
- BIT_II = 0x00,
- BIT_MI = 0x20,
- // [EDJ]I not allowed
- BIT_xI = 0x20,
-
- BIT_JJ = 0x00,
- BIT_MJ = 0x40,
- // [EDI]J not allowed
- BIT_xJ = 0x40,
- };
-#endif
-
-enum EXIT
- {
- EXIT_Success = 0,
- EXIT_NotStarted = 1,
- EXIT_FatalError = 2,
- EXIT_Except = 3,
- };
-
-enum NODECMP
- {
- NODECMP_Undefined = 0,
- NODECMP_Same = 0, // equivalent to node in old tree
- NODECMP_Diff = 1, // equivalent & parent is changed
- NODECMP_Changed = 2 // no equivalent node in old tree
- };
-
-// Declare enums using macro hacks (see enums.h).
-#define s(t) enum t { t##_Undefined = 0,
-#define c(t, x) t##_##x,
-#define e(t) };
-#include "enums.h"
-
-// Declare conversion function XXXToStr(XXX x)
-// for each enum type XXX.
-#define s(t) const char *t##ToStr(t x);
-#define c(t, x) /* empty */
-#define e(t) /* empty */
-#include "enums.h"
-
-// Declare conversion function StrToXXX(const char *Str)
-// for each enum type XXX.
-#define s(t) t StrTo##t(const char *Str);
-#define c(t, x) /* empty */
-#define e(t) /* empty */
-#include "enums.h"
-
-const char *BoolToStr(bool b);
-const char *SecsToStr(unsigned long Secs);
-
-#endif // types_h
Deleted: trunk/packages/muscle/trunk/typetostr.cpp
===================================================================
--- trunk/packages/muscle/trunk/typetostr.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/typetostr.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,58 +0,0 @@
-#include "muscle.h"
-#include <stdio.h>
-
-const char *SecsToStr(unsigned long Secs)
- {
- static char Str[16];
- long hh, mm, ss;
-
- hh = Secs/(60*60);
- mm = (Secs/60)%60;
- ss = Secs%60;
-
- sprintf(Str, "%02d:%02d:%02d", hh, mm, ss);
- return Str;
- }
-
-const char *BoolToStr(bool b)
- {
- return b ? "True" : "False";
- }
-
-const char *ScoreToStr(SCORE Score)
- {
- if (MINUS_INFINITY >= Score)
- return " *";
-// Hack to use "circular" buffer so when called multiple
-// times in a printf-like argument list it works OK.
- const int iBufferCount = 16;
- const int iBufferLength = 16;
- static char szStr[iBufferCount*iBufferLength];
- static int iBufferIndex = 0;
- iBufferIndex = (iBufferIndex + 1)%iBufferCount;
- char *pStr = szStr + iBufferIndex*iBufferLength;
- sprintf(pStr, "%8g", Score);
- return pStr;
- }
-
-// Left-justified version of ScoreToStr
-const char *ScoreToStrL(SCORE Score)
- {
- if (MINUS_INFINITY >= Score)
- return "*";
-// Hack to use "circular" buffer so when called multiple
-// times in a printf-like argument list it works OK.
- const int iBufferCount = 16;
- const int iBufferLength = 16;
- static char szStr[iBufferCount*iBufferLength];
- static int iBufferIndex = 0;
- iBufferIndex = (iBufferIndex + 1)%iBufferCount;
- char *pStr = szStr + iBufferIndex*iBufferLength;
- sprintf(pStr, "%.3g", Score);
- return pStr;
- }
-
-const char *WeightToStr(WEIGHT w)
- {
- return ScoreToStr(w);
- }
Deleted: trunk/packages/muscle/trunk/unixio.h
===================================================================
--- trunk/packages/muscle/trunk/unixio.h 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/unixio.h 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,11 +0,0 @@
-#ifdef WIN32
-#include <fcntl.h>
-#include <io.h>
-#else
-#include <fcntl.h>
-#include <unistd.h>
-#endif
-
-#if !defined(WIN32) && !defined(O_BINARY)
-#define O_BINARY 0
-#endif
Deleted: trunk/packages/muscle/trunk/upgma2.cpp
===================================================================
--- trunk/packages/muscle/trunk/upgma2.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/upgma2.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,395 +0,0 @@
-#include "muscle.h"
-#include "tree.h"
-#include "distcalc.h"
-
-// UPGMA clustering in O(N^2) time and space.
-
-#define TRACE 0
-
-#define MIN(x, y) ((x) < (y) ? (x) : (y))
-#define MAX(x, y) ((x) > (y) ? (x) : (y))
-#define AVG(x, y) (((x) + (y))/2)
-
-static unsigned g_uLeafCount;
-static unsigned g_uTriangleSize;
-static unsigned g_uInternalNodeCount;
-static unsigned g_uInternalNodeIndex;
-
-// Triangular distance matrix is g_Dist, which is allocated
-// as a one-dimensional vector of length g_uTriangleSize.
-// TriangleSubscript(i,j) maps row,column=i,j to the subscript
-// into this vector.
-// Row / column coordinates are a bit messy.
-// Initially they are leaf indexes 0..N-1.
-// But each time we create a new node (=new cluster, new subtree),
-// we re-use one of the two rows that become available (the children
-// of the new node). This saves memory.
-// We keep track of this through the g_uNodeIndex vector.
-static dist_t *g_Dist;
-
-// Distance to nearest neighbor in row i of distance matrix.
-// Subscript is distance matrix row.
-static dist_t *g_MinDist;
-
-// Nearest neighbor to row i of distance matrix.
-// Subscript is distance matrix row.
-static unsigned *g_uNearestNeighbor;
-
-// Node index of row i in distance matrix.
-// Node indexes are 0..N-1 for leaves, N..2N-2 for internal nodes.
-// Subscript is distance matrix row.
-static unsigned *g_uNodeIndex;
-
-// The following vectors are defined on internal nodes,
-// subscripts are internal node index 0..N-2.
-// For g_uLeft/Right, value is the node index 0 .. 2N-2
-// because a child can be internal or leaf.
-static unsigned *g_uLeft;
-static unsigned *g_uRight;
-static dist_t *g_Height;
-static dist_t *g_LeftLength;
-static dist_t *g_RightLength;
-
-static inline unsigned TriangleSubscript(unsigned uIndex1, unsigned uIndex2)
- {
-#if DEBUG
- if (uIndex1 >= g_uLeafCount || uIndex2 >= g_uLeafCount)
- Quit("TriangleSubscript(%u,%u) %u", uIndex1, uIndex2, g_uLeafCount);
-#endif
- unsigned v;
- if (uIndex1 >= uIndex2)
- v = uIndex2 + (uIndex1*(uIndex1 - 1))/2;
- else
- v = uIndex1 + (uIndex2*(uIndex2 - 1))/2;
- assert(v < (g_uLeafCount*(g_uLeafCount - 1))/2);
- return v;
- }
-
-static void ListState()
- {
- Log("Dist matrix\n");
- Log(" ");
- for (unsigned i = 0; i < g_uLeafCount; ++i)
- {
- if (uInsane == g_uNodeIndex[i])
- continue;
- Log(" %5u", g_uNodeIndex[i]);
- }
- Log("\n");
-
- for (unsigned i = 0; i < g_uLeafCount; ++i)
- {
- if (uInsane == g_uNodeIndex[i])
- continue;
- Log("%5u ", g_uNodeIndex[i]);
- for (unsigned j = 0; j < g_uLeafCount; ++j)
- {
- if (uInsane == g_uNodeIndex[j])
- continue;
- if (i == j)
- Log(" ");
- else
- {
- unsigned v = TriangleSubscript(i, j);
- Log("%5.2g ", g_Dist[v]);
- }
- }
- Log("\n");
- }
-
- Log("\n");
- Log(" i Node NrNb Dist\n");
- Log("----- ----- ----- --------\n");
- for (unsigned i = 0; i < g_uLeafCount; ++i)
- {
- if (uInsane == g_uNodeIndex[i])
- continue;
- Log("%5u %5u %5u %8.3f\n",
- i,
- g_uNodeIndex[i],
- g_uNearestNeighbor[i],
- g_MinDist[i]);
- }
-
- Log("\n");
- Log(" Node L R Height LLength RLength\n");
- Log("----- ----- ----- ------ ------- -------\n");
- for (unsigned i = 0; i <= g_uInternalNodeIndex; ++i)
- Log("%5u %5u %5u %6.2g %6.2g %6.2g\n",
- i,
- g_uLeft[i],
- g_uRight[i],
- g_Height[i],
- g_LeftLength[i],
- g_RightLength[i]);
- }
-
-void UPGMA2(const DistCalc &DC, Tree &tree, LINKAGE Linkage)
- {
- g_uLeafCount = DC.GetCount();
-
- g_uTriangleSize = (g_uLeafCount*(g_uLeafCount - 1))/2;
- g_uInternalNodeCount = g_uLeafCount - 1;
-
- g_Dist = new dist_t[g_uTriangleSize];
-
- g_uNodeIndex = new unsigned[g_uLeafCount];
- g_uNearestNeighbor = new unsigned[g_uLeafCount];
- g_MinDist = new dist_t[g_uLeafCount];
- unsigned *Ids = new unsigned [g_uLeafCount];
- char **Names = new char *[g_uLeafCount];
-
- g_uLeft = new unsigned[g_uInternalNodeCount];
- g_uRight = new unsigned[g_uInternalNodeCount];
- g_Height = new dist_t[g_uInternalNodeCount];
- g_LeftLength = new dist_t[g_uInternalNodeCount];
- g_RightLength = new dist_t[g_uInternalNodeCount];
-
- for (unsigned i = 0; i < g_uLeafCount; ++i)
- {
- g_MinDist[i] = BIG_DIST;
- g_uNodeIndex[i] = i;
- g_uNearestNeighbor[i] = uInsane;
- Ids[i] = DC.GetId(i);
- Names[i] = strsave(DC.GetName(i));
- }
-
- for (unsigned i = 0; i < g_uInternalNodeCount; ++i)
- {
- g_uLeft[i] = uInsane;
- g_uRight[i] = uInsane;
- g_LeftLength[i] = BIG_DIST;
- g_RightLength[i] = BIG_DIST;
- g_Height[i] = BIG_DIST;
- }
-
-// Compute initial NxN triangular distance matrix.
-// Store minimum distance for each full (not triangular) row.
-// Loop from 1, not 0, because "row" is 0, 1 ... i-1,
-// so nothing to do when i=0.
- for (unsigned i = 1; i < g_uLeafCount; ++i)
- {
- dist_t *Row = g_Dist + TriangleSubscript(i, 0);
- DC.CalcDistRange(i, Row);
- for (unsigned j = 0; j < i; ++j)
- {
- const dist_t d = Row[j];
- if (d < g_MinDist[i])
- {
- g_MinDist[i] = d;
- g_uNearestNeighbor[i] = j;
- }
- if (d < g_MinDist[j])
- {
- g_MinDist[j] = d;
- g_uNearestNeighbor[j] = i;
- }
- }
- }
-
-#if TRACE
- Log("Initial state:\n");
- ListState();
-#endif
-
- for (g_uInternalNodeIndex = 0; g_uInternalNodeIndex < g_uLeafCount - 1;
- ++g_uInternalNodeIndex)
- {
-#if TRACE
- Log("\n");
- Log("Internal node index %5u\n", g_uInternalNodeIndex);
- Log("-------------------------\n");
-#endif
-
- // Find nearest neighbors
- unsigned Lmin = uInsane;
- unsigned Rmin = uInsane;
- dist_t dtMinDist = BIG_DIST;
- for (unsigned j = 0; j < g_uLeafCount; ++j)
- {
- if (uInsane == g_uNodeIndex[j])
- continue;
-
- dist_t d = g_MinDist[j];
- if (d < dtMinDist)
- {
- dtMinDist = d;
- Lmin = j;
- Rmin = g_uNearestNeighbor[j];
- assert(uInsane != Rmin);
- assert(uInsane != g_uNodeIndex[Rmin]);
- }
- }
-
- assert(Lmin != uInsane);
- assert(Rmin != uInsane);
- assert(dtMinDist != BIG_DIST);
-
-#if TRACE
- Log("Nearest neighbors Lmin %u[=%u] Rmin %u[=%u] dist %.3g\n",
- Lmin,
- g_uNodeIndex[Lmin],
- Rmin,
- g_uNodeIndex[Rmin],
- dtMinDist);
-#endif
-
- // Compute distances to new node
- // New node overwrites row currently assigned to Lmin
- dist_t dtNewMinDist = BIG_DIST;
- unsigned uNewNearestNeighbor = uInsane;
- for (unsigned j = 0; j < g_uLeafCount; ++j)
- {
- if (j == Lmin || j == Rmin)
- continue;
- if (uInsane == g_uNodeIndex[j])
- continue;
-
- const unsigned vL = TriangleSubscript(Lmin, j);
- const unsigned vR = TriangleSubscript(Rmin, j);
- const dist_t dL = g_Dist[vL];
- const dist_t dR = g_Dist[vR];
- dist_t dtNewDist;
-
- switch (Linkage)
- {
- case LINKAGE_Avg:
- dtNewDist = AVG(dL, dR);
- break;
-
- case LINKAGE_Min:
- dtNewDist = MIN(dL, dR);
- break;
-
- case LINKAGE_Max:
- dtNewDist = MAX(dL, dR);
- break;
-
- case LINKAGE_Biased:
- dtNewDist = g_dSUEFF*AVG(dL, dR) + (1 - g_dSUEFF)*MIN(dL, dR);
- break;
-
- default:
- Quit("UPGMA2: Invalid LINKAGE_%u", Linkage);
- }
-
- // Nasty special case.
- // If nearest neighbor of j is Lmin or Rmin, then make the new
- // node (which overwrites the row currently occupied by Lmin)
- // the nearest neighbor. This situation can occur when there are
- // equal distances in the matrix. If we don't make this fix,
- // the nearest neighbor pointer for j would become invalid.
- // (We don't need to test for == Lmin, because in that case
- // the net change needed is zero due to the change in row
- // numbering).
- if (g_uNearestNeighbor[j] == Rmin)
- g_uNearestNeighbor[j] = Lmin;
-
-#if TRACE
- Log("New dist to %u = (%u/%.3g + %u/%.3g)/2 = %.3g\n",
- j, Lmin, dL, Rmin, dR, dtNewDist);
-#endif
- g_Dist[vL] = dtNewDist;
- if (dtNewDist < dtNewMinDist)
- {
- dtNewMinDist = dtNewDist;
- uNewNearestNeighbor = j;
- }
- }
-
- assert(g_uInternalNodeIndex < g_uLeafCount - 1 || BIG_DIST != dtNewMinDist);
- assert(g_uInternalNodeIndex < g_uLeafCount - 1 || uInsane != uNewNearestNeighbor);
-
- const unsigned v = TriangleSubscript(Lmin, Rmin);
- const dist_t dLR = g_Dist[v];
- const dist_t dHeightNew = dLR/2;
- const unsigned uLeft = g_uNodeIndex[Lmin];
- const unsigned uRight = g_uNodeIndex[Rmin];
- const dist_t HeightLeft =
- uLeft < g_uLeafCount ? 0 : g_Height[uLeft - g_uLeafCount];
- const dist_t HeightRight =
- uRight < g_uLeafCount ? 0 : g_Height[uRight - g_uLeafCount];
-
- g_uLeft[g_uInternalNodeIndex] = uLeft;
- g_uRight[g_uInternalNodeIndex] = uRight;
- g_LeftLength[g_uInternalNodeIndex] = dHeightNew - HeightLeft;
- g_RightLength[g_uInternalNodeIndex] = dHeightNew - HeightRight;
- g_Height[g_uInternalNodeIndex] = dHeightNew;
-
- // Row for left child overwritten by row for new node
- g_uNodeIndex[Lmin] = g_uLeafCount + g_uInternalNodeIndex;
- g_uNearestNeighbor[Lmin] = uNewNearestNeighbor;
- g_MinDist[Lmin] = dtNewMinDist;
-
- // Delete row for right child
- g_uNodeIndex[Rmin] = uInsane;
-
-#if TRACE
- Log("\nInternalNodeIndex=%u Lmin=%u Rmin=%u\n",
- g_uInternalNodeIndex, Lmin, Rmin);
- ListState();
-#endif
- }
-
- unsigned uRoot = g_uLeafCount - 2;
- tree.Create(g_uLeafCount, uRoot, g_uLeft, g_uRight, g_LeftLength, g_RightLength,
- Ids, Names);
-
-#if TRACE
- tree.LogMe();
-#endif
-
- delete[] g_Dist;
-
- delete[] g_uNodeIndex;
- delete[] g_uNearestNeighbor;
- delete[] g_MinDist;
- delete[] g_Height;
-
- delete[] g_uLeft;
- delete[] g_uRight;
- delete[] g_LeftLength;
- delete[] g_RightLength;
-
- for (unsigned i = 0; i < g_uLeafCount; ++i)
- free(Names[i]);
- delete[] Names;
- delete[] Ids;
- }
-
-class DistCalcTest : public DistCalc
- {
- virtual void CalcDistRange(unsigned i, dist_t Dist[]) const
- {
- static dist_t TestDist[5][5] =
- {
- 0, 2, 14, 14, 20,
- 2, 0, 14, 14, 20,
- 14, 14, 0, 4, 20,
- 14, 14, 4, 0, 20,
- 20, 20, 20, 20, 0,
- };
- for (unsigned j = 0; j < i; ++j)
- Dist[j] = TestDist[i][j];
- }
- virtual unsigned GetCount() const
- {
- return 5;
- }
- virtual unsigned GetId(unsigned i) const
- {
- return i;
- }
- virtual const char *GetName(unsigned i) const
- {
- return "name";
- }
- };
-
-void Test()
- {
- SetListFileName("c:\\tmp\\lobster.log", false);
- DistCalcTest DC;
- Tree tree;
- UPGMA2(DC, tree, LINKAGE_Avg);
- }
Deleted: trunk/packages/muscle/trunk/usage.cpp
===================================================================
--- trunk/packages/muscle/trunk/usage.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/usage.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,47 +0,0 @@
-#include "muscle.h"
-#include <stdio.h>
-
-void Credits()
- {
- static bool Displayed = false;
- if (Displayed)
- return;
-
- fprintf(stderr, "\n" MUSCLE_LONG_VERSION "\n\n");
- fprintf(stderr, "http://www.drive5.com/muscle\n");
- fprintf(stderr, "This software is donated to the public domain.\n");
- fprintf(stderr, "Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.\n\n");
- Displayed = true;
- }
-
-void Usage()
- {
- Credits();
- fprintf(stderr,
-"\n"
-"Basic usage\n"
-"\n"
-" muscle -in <inputfile> -out <outputfile>\n"
-"\n"
-"Common options (for a complete list please see the User Guide):\n"
-"\n"
-" -in <inputfile> Input file in FASTA format (default stdin)\n"
-" -out <outputfile> Output alignment in FASTA format (default stdout)\n"
-" -diags Find diagonals (faster for similar sequences)\n"
-" -maxiters <n> Maximum number of iterations (integer, default 16)\n"
-" -maxhours <h> Maximum time to iterate in hours (default no limit)\n"
-" -maxmb <m> Maximum memory to allocate in Mb (default 80%% of RAM)\n"
-" -html Write output in HTML format (default FASTA)\n"
-" -msf Write output in GCG MSF format (default FASTA)\n"
-" -clw Write output in CLUSTALW format (default FASTA)\n"
-" -clwstrict As -clw, with 'CLUSTAL W (1.81)' header\n"
-" -log[a] <logfile> Log to file (append if -loga, overwrite if -log)\n"
-" -quiet Do not write progress messages to stderr\n"
-" -stable Output sequences in input order (default is -group)\n"
-" -group Group sequences by similarity (this is the default)\n"
-" -version Display version information and exit\n"
-"\n"
-"Without refinement (very fast, avg accuracy similar to T-Coffee): -maxiters 2\n"
-"Fastest possible (amino acids): -maxiters 1 -diags -sv -distance1 kbit20_3\n"
-"Fastest possible (nucleotides): -maxiters 1 -diags\n");
- }
Deleted: trunk/packages/muscle/trunk/validateids.cpp
===================================================================
--- trunk/packages/muscle/trunk/validateids.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/validateids.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,105 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include "tree.h"
-#include "seqvect.h"
-
-#if DEBUG
-static SeqVect *g_ptrMuscleSeqVect = 0;
-static MSA MuscleInputMSA;
-
-void SetMuscleInputMSA(MSA &msa)
- {
- MuscleInputMSA.Copy(msa);
- }
-
-void SetMuscleSeqVect(SeqVect &v)
- {
- g_ptrMuscleSeqVect = &v;
- }
-
-void ValidateMuscleIdsSeqVect(const MSA &msa)
- {
- const unsigned uSeqCount = msa.GetSeqCount();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- const unsigned uId = msa.GetSeqId(uSeqIndex);
- const char *ptrNameMSA = msa.GetSeqName(uSeqIndex);
- const char *ptrName = g_ptrMuscleSeqVect->GetSeqName(uId);
- if (0 != strcmp(ptrNameMSA, ptrName))
- Quit("ValidateMuscleIdsSeqVect, names don't match");
- }
- }
-
-void ValidateMuscleIdsMSA(const MSA &msa)
- {
- const unsigned uSeqCount = msa.GetSeqCount();
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
- {
- const unsigned uId = msa.GetSeqId(uSeqIndex);
- const char *ptrNameMSA = msa.GetSeqName(uSeqIndex);
- const char *ptrName = MuscleInputMSA.GetSeqName(uId);
- if (0 != strcmp(ptrNameMSA, ptrName))
- {
- Log("Input MSA:\n");
- MuscleInputMSA.LogMe();
- Log("MSA being tested:\n");
- msa.LogMe();
- Log("Id=%u\n", uId);
- Log("Input name=%s\n", ptrName);
- Log("Test name=%s\n", ptrNameMSA);
- Quit("ValidateMuscleIdsMSA, names don't match");
- }
- }
- }
-
-void ValidateMuscleIds(const MSA &msa)
- {
- if (0 != g_ptrMuscleSeqVect)
- ValidateMuscleIdsSeqVect(msa);
- else if (0 != MuscleInputMSA.GetSeqCount())
- ValidateMuscleIdsMSA(msa);
- else
- Quit("ValidateMuscleIds, ptrMuscleSeqVect=0 && 0 == MuscleInputMSA.SeqCount()");
-
- }
-
-void ValidateMuscleIdsSeqVect(const Tree &tree)
- {
- const unsigned uNodeCount = tree.GetNodeCount();
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- {
- if (!tree.IsLeaf(uNodeIndex))
- continue;
- const unsigned uId = tree.GetLeafId(uNodeIndex);
- const char *ptrNameTree = tree.GetLeafName(uNodeIndex);
- const char *ptrName = g_ptrMuscleSeqVect->GetSeqName(uId);
- if (0 != strcmp(ptrNameTree, ptrName))
- Quit("ValidateMuscleIds: names don't match");
- }
- }
-
-void ValidateMuscleIdsMSA(const Tree &tree)
- {
- const unsigned uNodeCount = tree.GetNodeCount();
- for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
- {
- if (!tree.IsLeaf(uNodeIndex))
- continue;
- const unsigned uId = tree.GetLeafId(uNodeIndex);
- const char *ptrNameTree = tree.GetLeafName(uNodeIndex);
- const char *ptrName = MuscleInputMSA.GetSeqName(uId);
- if (0 != strcmp(ptrNameTree, ptrName))
- Quit("ValidateMuscleIds: names don't match");
- }
- }
-
-void ValidateMuscleIds(const Tree &tree)
- {
- if (0 != g_ptrMuscleSeqVect)
- ValidateMuscleIdsSeqVect(tree);
- else if (0 != MuscleInputMSA.GetSeqCount())
- ValidateMuscleIdsMSA(tree);
- else
- Quit("ValidateMuscleIds, ptrMuscleSeqVect=0 && 0 == MuscleInputMSA.SeqCount");
- }
-#endif
Deleted: trunk/packages/muscle/trunk/vtml2.cpp
===================================================================
--- trunk/packages/muscle/trunk/vtml2.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/vtml2.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,145 +0,0 @@
-#include "muscle.h"
-
-// Note: We use 32x32 arrays rather than 20x20 as this may give the compiler
-// optimizer an opportunity to make subscript arithmetic more efficient
-// (multiplying by 32 is same as shifting left by 5 bits).
-
-#define v(x) ((float) x)
-#define ROW(A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
- { v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), v(M), v(N), v(P), v(Q), \
- v(R), v(S), v(T), v(V), v(W), v(Y) },
-
-
-// A C D E F G H I K L
-// M N P Q R S T V W Y
-// VTML200
-float VTML_LA[32][32] =
- {
-ROW( 2.25080, 1.31180, 0.82704, 0.88740, 0.55520, 1.09860, 0.71673, 0.80805, 0.81213, 0.68712,
- 0.79105, 0.86777, 0.99328, 0.86644, 0.72821, 1.33924, 1.20373, 1.05956, 0.38107, 0.54373) // A
-
-ROW( 1.31180,15.79469, 0.39862, 0.42329, 0.49882, 0.65541, 0.67100, 0.97185, 0.46414, 0.55673,
- 0.90230, 0.63236, 0.54479, 0.47895, 0.56465, 1.18490, 0.99069, 1.21604, 0.28988, 0.91338) // C
-
-ROW( 0.82704, 0.39862, 4.18833, 2.06850, 0.25194, 0.90937, 1.01617, 0.32860, 1.03391, 0.31300,
- 0.42498, 1.80888, 0.81307, 1.20043, 0.63712, 1.03001, 0.88191, 0.43557, 0.26313, 0.37947) // D
-
-ROW( 0.88740, 0.42329, 2.06850, 3.08354, 0.33456, 0.77183, 0.94536, 0.43151, 1.35989, 0.45579,
- 0.53423, 1.15745, 0.82832, 1.66752, 0.84500, 0.98693, 0.88132, 0.54047, 0.24519, 0.52025) // E
-
-ROW( 0.55520, 0.49882, 0.25194, 0.33456, 6.08351, 0.30140, 1.02191, 1.10969, 0.37069, 1.50587,
- 1.41207, 0.42850, 0.41706, 0.48113, 0.41970, 0.56867, 0.57172, 0.91256, 2.02494, 3.44675) // F
-
-ROW( 1.09860, 0.65541, 0.90937, 0.77183, 0.30140, 5.62829, 0.64191, 0.28432, 0.67874, 0.30549,
- 0.37739, 1.01012, 0.60851, 0.65996, 0.63660, 1.03448, 0.68435, 0.40728, 0.36034, 0.35679) // G
-
-ROW( 0.71673, 0.67100, 1.01617, 0.94536, 1.02191, 0.64191, 6.05494, 0.50783, 1.03822, 0.60887,
- 0.55685, 1.28619, 0.72275, 1.41503, 1.24635, 0.93344, 0.83543, 0.54817, 0.81780, 1.81552) // H
-
-ROW( 0.80805, 0.97185, 0.32860, 0.43151, 1.10969, 0.28432, 0.50783, 3.03766, 0.49310, 1.88886,
- 1.75039, 0.44246, 0.44431, 0.53213, 0.48153, 0.55603, 0.88168, 2.37367, 0.68494, 0.70035) // I
-
-ROW( 0.81213, 0.46414, 1.03391, 1.35989, 0.37069, 0.67874, 1.03822, 0.49310, 2.72883, 0.52739,
- 0.68244, 1.15671, 0.82911, 1.51333, 2.33521, 0.93858, 0.92730, 0.55467, 0.39944, 0.52549) // K
-
-ROW( 0.68712, 0.55673, 0.31300, 0.45579, 1.50587, 0.30549, 0.60887, 1.88886, 0.52739, 3.08540,
- 2.14480, 0.43539, 0.53630, 0.62771, 0.53025, 0.53468, 0.69924, 1.50372, 0.82822, 0.89854) // L
-
-ROW( 0.79105, 0.90230, 0.42498, 0.53423, 1.41207, 0.37739, 0.55685, 1.75039, 0.68244, 2.14480,
- 4.04057, 0.55603, 0.48415, 0.76770, 0.66775, 0.62409, 0.87759, 1.42742, 0.52278, 0.72067) // M
-
-ROW( 0.86777, 0.63236, 1.80888, 1.15745, 0.42850, 1.01012, 1.28619, 0.44246, 1.15671, 0.43539,
- 0.55603, 3.36000, 0.69602, 1.13490, 0.98603, 1.31366, 1.11252, 0.50603, 0.35810, 0.68349) // N
-
-ROW( 0.99328, 0.54479, 0.81307, 0.82832, 0.41706, 0.60851, 0.72275, 0.44431, 0.82911, 0.53630,
- 0.48415, 0.69602, 7.24709, 0.90276, 0.74827, 1.03719, 0.83014, 0.56795, 0.37867, 0.33127) // P
-
-ROW( 0.86644, 0.47895, 1.20043, 1.66752, 0.48113, 0.65996, 1.41503, 0.53213, 1.51333, 0.62771,
- 0.76770, 1.13490, 0.90276, 2.86937, 1.50116, 0.99561, 0.93103, 0.61085, 0.29926, 0.51971) // Q
-
-ROW( 0.72821, 0.56465, 0.63712, 0.84500, 0.41970, 0.63660, 1.24635, 0.48153, 2.33521, 0.53025,
- 0.66775, 0.98603, 0.74827, 1.50116, 4.28698, 0.84662, 0.80673, 0.51422, 0.47569, 0.59592) // R
-
-ROW( 1.33924, 1.18490, 1.03001, 0.98693, 0.56867, 1.03448, 0.93344, 0.55603, 0.93858, 0.53468,
- 0.62409, 1.31366, 1.03719, 0.99561, 0.84662, 2.13816, 1.52911, 0.67767, 0.45129, 0.66767) // S
-
-ROW( 1.20373, 0.99069, 0.88191, 0.88132, 0.57172, 0.68435, 0.83543, 0.88168, 0.92730, 0.69924,
- 0.87759, 1.11252, 0.83014, 0.93103, 0.80673, 1.52911, 2.58221, 0.98702, 0.31541, 0.57954) // T
-
-ROW( 1.05956, 1.21604, 0.43557, 0.54047, 0.91256, 0.40728, 0.54817, 2.37367, 0.55467, 1.50372,
- 1.42742, 0.50603, 0.56795, 0.61085, 0.51422, 0.67767, 0.98702, 2.65580, 0.43419, 0.63805) // V
-
-ROW( 0.38107, 0.28988, 0.26313, 0.24519, 2.02494, 0.36034, 0.81780, 0.68494, 0.39944, 0.82822,
- 0.52278, 0.35810, 0.37867, 0.29926, 0.47569, 0.45129, 0.31541, 0.43419,31.39564, 2.51433) // W
-
-ROW( 0.54373, 0.91338, 0.37947, 0.52025, 3.44675, 0.35679, 1.81552, 0.70035, 0.52549, 0.89854,
- 0.72067, 0.68349, 0.33127, 0.51971, 0.59592, 0.66767, 0.57954, 0.63805, 2.51433, 7.50693) // Y
- };
-
-const float VTML_SP_CENTER = (float) 22.0;
-
-#undef ROW
-#undef v
-#define v(x) ((float) (x + VTML_SP_CENTER))
-#define ROW(A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y, X) \
- { v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), v(M), v(N), v(P), v(Q), \
- v(R), v(S), v(T), v(V), v(W), v(Y), v(X) },
-
-// VTML 240
-float VTML_SP[32][32] =
- {
-// A C D E F G H I K L M N P Q R S T V W Y X
-ROW( 58, 23, -12, -7, -44, 10, -23, -14, -14, -27, -17, -8, 1, -9, -22, 23, 15, 5, -74, -45, 0) // A
-ROW( 23, 224, -67, -63, -50, -30, -29, 1, -56, -41, -6, -33, -44, -53, -43, 15, 2, 18, -93, -6, 0) // C
-ROW( -12, -67, 111, 59,-104, -4, 4, -84, 6, -88, -65, 48, -13, 18, -29, 5, -7, -63,-105, -73, 0) // D
-ROW( -7, -63, 59, 85, -83, -17, -1, -63, 25, -60, -47, 15, -12, 40, -8, 1, -7, -47,-108, -51, 0) // E
-ROW( -44, -50,-104, -83, 144, -93, 4, 12, -74, 36, 30, -64, -67, -56, -65, -43, -41, -3, 63, 104, 0) // F
-ROW( 10, -30, -4, -17, -93, 140, -32, -95, -27, -91, -75, 4, -36, -29, -32, 5, -26, -68, -80, -79, 0) // G
-ROW( -23, -29, 4, -1, 4, -32, 137, -50, 6, -37, -42, 21, -23, 27, 19, -4, -12, -44, -13, 48, 0) // H
-ROW( -14, 1, -84, -63, 12, -95, -50, 86, -53, 53, 47, -62, -60, -47, -55, -43, -8, 69, -27, -24, 0) // I
-ROW( -14, -56, 6, 25, -74, -27, 6, -53, 75, -48, -30, 13, -12, 34, 68, -3, -4, -44, -71, -49, 0) // K
-ROW( -27, -41, -88, -60, 36, -91, -37, 53, -48, 88, 62, -63, -48, -36, -48, -47, -25, 36, -11, -4, 0) // L
-ROW( -17, -6, -65, -47, 30, -75, -42, 47, -30, 62, 103, -45, -54, -21, -31, -35, -9, 31, -46, -20, 0) // M
-ROW( -8, -33, 48, 15, -64, 4, 21, -62, 13, -63, -45, 89, -25, 12, 2, 22, 10, -51, -79, -29, 0) // N
-ROW( 1, -44, -13, -12, -67, -36, -23, -60, -12, -48, -54, -25, 160, -6, -20, 5, -12, -42, -76, -83, 0) // P
-ROW( -9, -53, 18, 40, -56, -29, 27, -47, 34, -36, -21, 12, -6, 75, 34, 1, -4, -37, -92, -48, 0) // Q
-ROW( -22, -43, -29, -8, -65, -32, 19, -55, 68, -48, -31, 2, -20, 34, 113, -10, -14, -49, -58, -39, 0) // R
-ROW( 23, 15, 5, 1, -43, 5, -4, -43, -3, -47, -35, 22, 5, 1, -10, 53, 32, -28, -62, -31, 0) // S
-ROW( 15, 2, -7, -7, -41, -26, -12, -8, -4, -25, -9, 10, -12, -4, -14, 32, 68, 0, -87, -40, 0) // T
-ROW( 5, 18, -63, -47, -3, -68, -44, 69, -44, 36, 31, -51, -42, -37, -49, -28, 0, 74, -61, -32, 0) // V
-ROW( -74, -93,-105,-108, 63, -80, -13, -27, -71, -11, -46, -79, -76, -92, -58, -62, -87, -61, 289, 81, 0) // W
-ROW( -45, -6, -73, -51, 104, -79, 48, -24, -49, -4, -20, -29, -83, -48, -39, -31, -40, -32, 81, 162, 0) // Y
-ROW( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) // X
- };
-
-#undef v
-#define v(x) ((float) (x))
-#define RNC(A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y, X) \
- { v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), v(M), v(N), v(P), v(Q), \
- v(R), v(S), v(T), v(V), v(W), v(Y), v(X) },
-
-float VTML_SPNoCenter[32][32] =
- {
-// A C D E F G H I K L M N P Q R S T V W Y X
-RNC( 58, 23, -12, -7, -44, 10, -23, -14, -14, -27, -17, -8, 1, -9, -22, 23, 15, 5, -74, -45, 0) // A
-RNC( 23, 224, -67, -63, -50, -30, -29, 1, -56, -41, -6, -33, -44, -53, -43, 15, 2, 18, -93, -6, 0) // C
-RNC( -12, -67, 111, 59,-104, -4, 4, -84, 6, -88, -65, 48, -13, 18, -29, 5, -7, -63,-105, -73, 0) // D
-RNC( -7, -63, 59, 85, -83, -17, -1, -63, 25, -60, -47, 15, -12, 40, -8, 1, -7, -47,-108, -51, 0) // E
-RNC( -44, -50,-104, -83, 144, -93, 4, 12, -74, 36, 30, -64, -67, -56, -65, -43, -41, -3, 63, 104, 0) // F
-RNC( 10, -30, -4, -17, -93, 140, -32, -95, -27, -91, -75, 4, -36, -29, -32, 5, -26, -68, -80, -79, 0) // G
-RNC( -23, -29, 4, -1, 4, -32, 137, -50, 6, -37, -42, 21, -23, 27, 19, -4, -12, -44, -13, 48, 0) // H
-RNC( -14, 1, -84, -63, 12, -95, -50, 86, -53, 53, 47, -62, -60, -47, -55, -43, -8, 69, -27, -24, 0) // I
-RNC( -14, -56, 6, 25, -74, -27, 6, -53, 75, -48, -30, 13, -12, 34, 68, -3, -4, -44, -71, -49, 0) // K
-RNC( -27, -41, -88, -60, 36, -91, -37, 53, -48, 88, 62, -63, -48, -36, -48, -47, -25, 36, -11, -4, 0) // L
-RNC( -17, -6, -65, -47, 30, -75, -42, 47, -30, 62, 103, -45, -54, -21, -31, -35, -9, 31, -46, -20, 0) // M
-RNC( -8, -33, 48, 15, -64, 4, 21, -62, 13, -63, -45, 89, -25, 12, 2, 22, 10, -51, -79, -29, 0) // N
-RNC( 1, -44, -13, -12, -67, -36, -23, -60, -12, -48, -54, -25, 160, -6, -20, 5, -12, -42, -76, -83, 0) // P
-RNC( -9, -53, 18, 40, -56, -29, 27, -47, 34, -36, -21, 12, -6, 75, 34, 1, -4, -37, -92, -48, 0) // Q
-RNC( -22, -43, -29, -8, -65, -32, 19, -55, 68, -48, -31, 2, -20, 34, 113, -10, -14, -49, -58, -39, 0) // R
-RNC( 23, 15, 5, 1, -43, 5, -4, -43, -3, -47, -35, 22, 5, 1, -10, 53, 32, -28, -62, -31, 0) // S
-RNC( 15, 2, -7, -7, -41, -26, -12, -8, -4, -25, -9, 10, -12, -4, -14, 32, 68, 0, -87, -40, 0) // T
-RNC( 5, 18, -63, -47, -3, -68, -44, 69, -44, 36, 31, -51, -42, -37, -49, -28, 0, 74, -61, -32, 0) // V
-RNC( -74, -93,-105,-108, 63, -80, -13, -27, -71, -11, -46, -79, -76, -92, -58, -62, -87, -61, 289, 81, 0) // W
-RNC( -45, -6, -73, -51, 104, -79, 48, -24, -49, -4, -20, -29, -83, -48, -39, -31, -40, -32, 81, 162, 0) // Y
-RNC( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) // X
- };
Deleted: trunk/packages/muscle/trunk/writescorefile.cpp
===================================================================
--- trunk/packages/muscle/trunk/writescorefile.cpp 2008-01-12 07:15:13 UTC (rev 1123)
+++ trunk/packages/muscle/trunk/writescorefile.cpp 2008-01-12 07:30:41 UTC (rev 1124)
@@ -1,69 +0,0 @@
-#include "muscle.h"
-#include "msa.h"
-#include <errno.h>
-
-extern float VTML_SP[32][32];
-extern float NUC_SP[32][32];
-
-static double GetColScore(const MSA &msa, unsigned uCol)
- {
- const unsigned uSeqCount = msa.GetSeqCount();
- unsigned uPairCount = 0;
- double dSum = 0.0;
- for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
- {
- if (msa.IsGap(uSeq1, uCol))
- continue;
- unsigned uLetter1 = msa.GetLetterEx(uSeq1, uCol);
- if (uLetter1 >= g_AlphaSize)
- continue;
- for (unsigned uSeq2 = uSeq1 + 1; uSeq2 < uSeqCount; ++uSeq2)
- {
- if (msa.IsGap(uSeq2, uCol))
- continue;
- unsigned uLetter2 = msa.GetLetterEx(uSeq2, uCol);
- if (uLetter2 >= g_AlphaSize)
- continue;
- double Score;
- switch (g_Alpha)
- {
- case ALPHA_Amino:
- Score = VTML_SP[uLetter1][uLetter2];
- break;
- case ALPHA_DNA:
- case ALPHA_RNA:
- Score = NUC_SP[uLetter1][uLetter2];
- break;
- default:
- Quit("GetColScore: invalid alpha=%d", g_Alpha);
- }
- dSum += Score;
- ++uPairCount;
- }
- }
- if (0 == uPairCount)
- return 0;
- return dSum / uPairCount;
- }
-
-void WriteScoreFile(const MSA &msa)
- {
- FILE *f = fopen(g_pstrScoreFileName, "w");
- if (0 == f)
- Quit("Cannot open score file '%s' errno=%d", g_pstrScoreFileName, errno);
-
- const unsigned uColCount = msa.GetColCount();
- const unsigned uSeqCount = msa.GetSeqCount();
- for (unsigned uCol = 0; uCol < uColCount; ++uCol)
- {
- double Score = GetColScore(msa, uCol);
- fprintf(f, "%10.3f ", Score);
- for (unsigned uSeq = 0; uSeq < uSeqCount; ++uSeq)
- {
- char c = msa.GetChar(uSeq, uCol);
- fprintf(f, "%c", c);
- }
- fprintf(f, "\n");
- }
- fclose(f);
- }
More information about the debian-med-commit
mailing list