[med-svn] [paml] 01/03: New upstream version 4.9e+dfsg

Andreas Tille tille at debian.org
Fri Mar 17 20:54:20 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository paml.

commit b308d0b83b7d3a7f6225aef2b47cc6d262ae2ece
Author: Andreas Tille <tille at debian.org>
Date:   Fri Mar 17 21:28:26 2017 +0100

    New upstream version 4.9e+dfsg
---
 README.txt                                      |   11 +-
 doc/pamlHistory.txt                             |   29 +
 doc/{pamlHistory.txt => pamlHistory.txt~}       |   23 +
 examples/DatingSoftBound/BF.Clock23.xlsx        |  Bin 0 -> 20563 bytes
 examples/DatingSoftBound/README.BayesFactor.txt |   26 +
 examples/DatingSoftBound/{ => bf1}/mcmctree.ctl |   17 +-
 examples/DatingSoftBound/{ => bf2}/mcmctree.ctl |   13 +-
 examples/DatingSoftBound/mcmctree.ctl           |    5 +-
 examples/DatingSoftBound/mtCDNApri.trees        |    4 +
 src/BFdriver.c                                  |   74 +
 src/README.txt                                  |    3 +-
 src/baseml.c                                    |    6 +-
 src/chi2.c                                      |   14 +-
 src/codeml.c                                    |    2 +-
 src/evolver.c                                   | 2630 +++++++++++------------
 src/mcmctree.c                                  |   89 +-
 src/paml.h                                      |    9 +-
 src/pamp.c                                      | 1290 +++++------
 src/tools.c                                     |  223 +-
 src/treesub.c                                   |  715 +++---
 src/yn00.c                                      | 1814 ++++++++--------
 21 files changed, 3647 insertions(+), 3350 deletions(-)

diff --git a/README.txt b/README.txt
index ec5fab2..8f293ab 100644
--- a/README.txt
+++ b/README.txt
@@ -14,7 +14,16 @@ following, where the archive can be downloaded:
 
 	http://abacus.gene.ucl.ac.uk/software/paml.html
 
-PAML is distributed free of charge for academic use only.
+Copyright notice and disclaimer
+
+The software package is provided "as is" without warranty of any
+kind. In no event shall the author or his employer be held responsible
+for any damage resulting from the use of this software, including but
+not limited to the frustration that you may experience in using the
+package.  The program package, including source codes, example data
+sets, executables, and this documentation, is maintained by Ziheng
+Yang and distributed under the GNU GPL v3.
+
 
 Ziheng Yang
 Department of Biology                   Phone: (+44) (0)20 7679 4379
diff --git a/doc/pamlHistory.txt b/doc/pamlHistory.txt
index 87fb1c2..9eca86c 100644
--- a/doc/pamlHistory.txt
+++ b/doc/pamlHistory.txt
@@ -9,6 +9,35 @@ https://groups.google.com/forum/#!forum/pamlsoftware.
 
 
 
+Version 4.9e, March 2017
+
+Edited the readme files to change the license to GPL.
+
+
+
+Version 4.9d, February 2017
+
+(*) mcmctree.  A bug was introduced which causes the program to read
+the fossil calibration information in the tree file incorrectly, if
+lower (minimum) bounds are specified using the symbol '>'.  If you use
+the notation "B()" or "L()", the information is read correctly.  This
+bug was introduced in version 4.9b and exists also in 4.9c.  Versions
+4.9a and earlier were correct.
+
+(*) mcmctree.  Changed the default prior for rates for loci to
+gamma-Dirichlet (dos Reis 2014), and updated the documentation as
+well.  It was set to the conditional i.i.d. prior (Zhu et al. 2015).
+
+(*) mcmctree.  Added Bayes factor calculation.  A program called
+BFdriver is included in the release, as well as a pdf document in the
+folder examples/DatingSoftBound/BFdriverDOC.pdf.  We suggest that you 
+use the exact likelihood calculation when you use this option, since the 
+normal approximation is unreliable when the power posterior is close to 
+the prior (when beta is small).
+
+
+
+
 Version 4.9c, September 2016
 
 (*) Added GPL license statement in various places.
diff --git a/doc/pamlHistory.txt b/doc/pamlHistory.txt~
similarity index 96%
copy from doc/pamlHistory.txt
copy to doc/pamlHistory.txt~
index 87fb1c2..f6a3c20 100644
--- a/doc/pamlHistory.txt
+++ b/doc/pamlHistory.txt~
@@ -9,6 +9,29 @@ https://groups.google.com/forum/#!forum/pamlsoftware.
 
 
 
+Version 4.9d, February 2017
+
+(*) mcmctree.  A bug was introduced which causes the program to read
+the fossil calibration information in the tree file incorrectly, if
+lower (minimum) bounds are specified using the symbol '>'.  If you use
+the notation "B()" or "L()", the information is read correctly.  This
+bug was introduced in version 4.9b and exists also in 4.9c.  Versions
+4.9a and earlier were correct.
+
+(*) mcmctree.  Changed the default prior for rates for loci to
+gamma-Dirichlet (dos Reis 2014), and updated the documentation as
+well.  It was set to the conditional i.i.d. prior (Zhu et al. 2015).
+
+(*) mcmctree.  Added Bayes factor calculation.  A program called
+BFdriver is included in the release, as well as a pdf document in the
+folder examples/DatingSoftBound/BFdriverDOC.pdf.  We suggest that you 
+use the exact likelihood calculation when you use this option, since the 
+normal approximation is unreliable when the power posterior is close to 
+the prior (when beta is small).
+
+
+
+
 Version 4.9c, September 2016
 
 (*) Added GPL license statement in various places.
diff --git a/examples/DatingSoftBound/BF.Clock23.xlsx b/examples/DatingSoftBound/BF.Clock23.xlsx
new file mode 100644
index 0000000..bc9216b
Binary files /dev/null and b/examples/DatingSoftBound/BF.Clock23.xlsx differ
diff --git a/examples/DatingSoftBound/README.BayesFactor.txt b/examples/DatingSoftBound/README.BayesFactor.txt
new file mode 100644
index 0000000..23ce8b9
--- /dev/null
+++ b/examples/DatingSoftBound/README.BayesFactor.txt
@@ -0,0 +1,26 @@
+Bayes factor calculation using MCMCTREE
+Ziheng Yang
+
+5 February 2017
+
+Read the document BFDriverDOC.pdf, and replace bpp with MCMCtree. 
+
+The folders bf1/ is for running the marginal likelihood calculation for clock=2
+The folders bf2/ is for running the marginal likelihood calculation for clock=3
+
+
+    cd bf1
+    ../../../bin/BFdriver mcmctree.ctl 16
+
+Check and edit the commands file, and specify the full path for mcmctree if necessary, e.g.,
+     echo "../../../bin/mcmctree mcmctree.b$I.ctl > log.b$I.txt" > tmp.sh
+
+Then run the commands to submit jobs onto the queue.
+
+    source commands
+
+
+Do the same thing in bf2/ for clock3, and collect results into the excel file: BF.Clock23.xlsx.
+Note that the last line in the file commands is a grep command for retrieving the results after 
+all jobs are finished:
+grep BFbeta log.b*.txt
diff --git a/examples/DatingSoftBound/mcmctree.ctl b/examples/DatingSoftBound/bf1/mcmctree.ctl
similarity index 67%
copy from examples/DatingSoftBound/mcmctree.ctl
copy to examples/DatingSoftBound/bf1/mcmctree.ctl
index 26ed77a..1fd18e2 100644
--- a/examples/DatingSoftBound/mcmctree.ctl
+++ b/examples/DatingSoftBound/bf1/mcmctree.ctl
@@ -1,6 +1,7 @@
           seed = -1
-       seqfile = mtCDNApri123.txt
-      treefile = mtCDNApri.trees
+       seqfile = ../mtCDNApri123.txt
+      treefile = ../mtCDNApri.trees
+      mcmcfile = mcmc.txt
        outfile = out.txt
 
          ndata = 3
@@ -16,17 +17,15 @@
      cleandata = 0    * remove sites with ambiguity data (1:yes, 0:no)?
 
        BDparas = 1 1 0.1   * birth, death, sampling
-   kappa_gamma = 6 2      * gamma prior for kappa
-   alpha_gamma = 1 1      * gamma prior for alpha
+   kappa_gamma = 6 2       * gamma prior for kappa
+   alpha_gamma = 1 1       * gamma prior for alpha
 
    rgene_gamma = 2 20 1    * gammaDir prior for rate for genes
-  sigma2_gamma = 1 10 1   * gammaDir prior for sigma^2     (for clock=2 or 3)
+  sigma2_gamma = 1 10 1    * gammaDir prior for sigma^2     (for clock=2 or 3)
 
       finetune = 1: .1 .1 .1 .1 .1 .1 * auto (0 or 1): times, musigma2, rates, mixing, paras, FossilErr
 
-         print = 1
+         print = 1   * 0: no mcmc sample; 1: everything except branch rates 2: everything
         burnin = 2000
       sampfreq = 2
-       nsample = 20000
-
-*** Note: Make your window wider (100 columns) before running the program.
+       nsample = 200000
diff --git a/examples/DatingSoftBound/mcmctree.ctl b/examples/DatingSoftBound/bf2/mcmctree.ctl
similarity index 76%
copy from examples/DatingSoftBound/mcmctree.ctl
copy to examples/DatingSoftBound/bf2/mcmctree.ctl
index 26ed77a..d771d45 100644
--- a/examples/DatingSoftBound/mcmctree.ctl
+++ b/examples/DatingSoftBound/bf2/mcmctree.ctl
@@ -1,12 +1,13 @@
           seed = -1
-       seqfile = mtCDNApri123.txt
-      treefile = mtCDNApri.trees
+       seqfile = ../mtCDNApri123.txt
+      treefile = ../mtCDNApri.trees
+      mcmcfile = mcmc.txt
        outfile = out.txt
 
          ndata = 3
        seqtype = 0    * 0: nucleotides; 1:codons; 2:AAs
        usedata = 1    * 0: no data; 1:seq like; 2:normal approximation; 3:out.BV (in.BV)
-         clock = 2    * 1: global clock; 2: independent rates; 3: correlated rates
+         clock = 3    * 1: global clock; 2: independent rates; 3: correlated rates
        RootAge = '<1.0'  * safe constraint on root age, used if no fossil for root.
 
          model = 0    * 0:JC69, 1:K80, 2:F81, 3:F84, 4:HKY85
@@ -24,9 +25,7 @@
 
       finetune = 1: .1 .1 .1 .1 .1 .1 * auto (0 or 1): times, musigma2, rates, mixing, paras, FossilErr
 
-         print = 1
+         print = 1   * 0: no mcmc sample; 1: everything except branch rates 2: everything
         burnin = 2000
       sampfreq = 2
-       nsample = 20000
-
-*** Note: Make your window wider (100 columns) before running the program.
+       nsample = 200000
diff --git a/examples/DatingSoftBound/mcmctree.ctl b/examples/DatingSoftBound/mcmctree.ctl
index 26ed77a..1f260e9 100644
--- a/examples/DatingSoftBound/mcmctree.ctl
+++ b/examples/DatingSoftBound/mcmctree.ctl
@@ -1,6 +1,7 @@
           seed = -1
        seqfile = mtCDNApri123.txt
       treefile = mtCDNApri.trees
+      mcmcfile = mcmc.txt
        outfile = out.txt
 
          ndata = 3
@@ -24,9 +25,9 @@
 
       finetune = 1: .1 .1 .1 .1 .1 .1 * auto (0 or 1): times, musigma2, rates, mixing, paras, FossilErr
 
-         print = 1
+         print = 1   * 0: no mcmc sample; 1: everything except branch rates 2: everything
         burnin = 2000
-      sampfreq = 2
+      sampfreq = 10
        nsample = 20000
 
 *** Note: Make your window wider (100 columns) before running the program.
diff --git a/examples/DatingSoftBound/mtCDNApri.trees b/examples/DatingSoftBound/mtCDNApri.trees
index 688c42f..e79612b 100644
--- a/examples/DatingSoftBound/mtCDNApri.trees
+++ b/examples/DatingSoftBound/mtCDNApri.trees
@@ -5,3 +5,7 @@
 
 
 //end of file
+
+
+((((human, (chimpanzee, bonobo)) 'B(.06, .08)', gorilla),
+(orangutan, sumatran)) 'B(.12, .16)', gibbon);
diff --git a/src/BFdriver.c b/src/BFdriver.c
new file mode 100644
index 0000000..3af2eb3
--- /dev/null
+++ b/src/BFdriver.c
@@ -0,0 +1,74 @@
+/* This drives the computation of the marginal likelihood (bayes factor) calculation
+   using bpp and mcmctree.
+
+   cc -o BFdriver -O3 BFdriver.c tools.c -lm
+
+   BFdriver <controlfilename> <npoints> <scriptname.sh>
+   BFdriver mcmctree.ctl 16 tmp.sh
+*/
+#include "paml.h"
+
+int main (int argc, char*argv[])
+{
+   int j, npoints=8, ixw, nline=1024;
+   char resultsf[96]="betaweights.txt", ctlf[96]="mcmctree.ctl", scriptf[96]="tmp.sh";
+   char ctlfi[96], tmpf[96], line[1024], *pline, *s;
+   double beta, sign, weight;
+   const double *xNI = NULL, *wNI = NULL;  /* Gauss-Legendre quadrature points */
+   FILE  *fctl, *fctlb, *fcommand, *fresults;
+
+   puts("Usage:\n\tBFdriver controlfilename npoints\n");
+   puts("\tquadrature: log{M} = 0.5 * SUM w_b * E_b(log{f(X)})\n");
+   if(argc<2) exit(-1);
+   strcpy(ctlf, argv[1]);
+   if(argc>2) npoints = atoi(argv[2]);
+   if(argc>3) strcpy(scriptf, argv[3]);
+   fctl = (FILE*)gfopen(ctlf, "r");
+   if( s = strstr(ctlf, ".ctl") ) *s = '\0';
+
+   fresults =  (FILE*)gfopen(resultsf, "w");
+   fprintf(fresults, "%s\t%s\t%s\n", "beta", "weight", "ElnfX");
+   GaussLegendreRule(&xNI, &wNI, npoints);
+   for (j=0; j<npoints; j++) {
+      if (j<npoints / 2) { ixw = npoints / 2 - 1 - j;  sign = -1; }
+      else               { ixw = j - npoints / 2;    sign = 1; }
+      beta = 0.5 + sign / 2 * xNI[ixw];
+      weight = wNI[ixw];
+      printf("b%02d: beta = %.4f  w = %8.6f\n", j+1, beta, weight );
+      sprintf(ctlfi, "%s.b%02d.ctl\0", ctlf, j+1);
+      fctlb = (FILE*)gfopen(ctlfi, "w");
+      fprintf(fctlb, "BayesFactorBeta = %8.6f *  w=%8.6f.ctl\n", beta, weight);
+
+      rewind(fctl);
+      for ( ; ; ) {
+         if (fgets(line, nline, fctl) == NULL) break;
+         if (line[0] == '*') continue;
+         if (strstr(line, "BayesFactorBeta")) continue;
+         if (strstr(line, "outfile") || strstr(line, "mcmcfile")) {
+            pline = strchr(line, '=');
+            sscanf(pline + 1, "%s", tmpf);
+            if( s = strstr(tmpf, ".txt") ) {
+               *s = '\0';
+            }
+            sprintf(pline + 2, "%s.b%02d.txt\n\0", tmpf, j+1);
+         }
+         fputs(line, fctlb);
+      }
+      fclose(fctlb);
+      fprintf(fresults, "%.6f\t%.6f\t\n", beta, weight);
+   }
+   fclose(fctl); 
+
+   fcommand = (FILE*)gfopen("commands", "w");
+   fprintf(fcommand, "#!/bin/bash\nfor I in {01..%02d}\n", npoints);
+   fprintf(fcommand, "  do\n");
+   fprintf(fcommand, "     echo \"#!/bin/bash\" > %s\n", scriptf);
+   fprintf(fcommand, "     echo \"mcmctree %s.b$I.ctl > log.b$I.txt\" > %s\n", ctlf, scriptf);
+   fprintf(fcommand, "     sleep 1\n");
+   fprintf(fcommand, "     qsub -S /bin/bash -l h_vmem=4G -l tmem=4G -l h_rt=360:0:0 -cwd %s\n", scriptf);
+   fprintf(fcommand, "  done\n");
+
+   fputs("\n#grep BFbeta log.b*.txt\n", fcommand);
+   fclose(fcommand);
+   exit(0);
+}
diff --git a/src/README.txt b/src/README.txt
index 2452855..e4eda44 100644
--- a/src/README.txt
+++ b/src/README.txt
@@ -1,7 +1,8 @@
 Notes for compiling PAML on UNIX systems, including MAC OS X
 
 Ziheng Yang (z.yang at ucl.ac.uk)
-Last updated, 10 December 2003
+Last updated, 17 March 2017
+
 
 Copyright notice and disclaimer
 
diff --git a/src/baseml.c b/src/baseml.c
index 4b63796..532ca74 100644
--- a/src/baseml.c
+++ b/src/baseml.c
@@ -247,10 +247,11 @@ int main (int argc, char *argv[])
          if(SeqDistance==NULL||ancestor==NULL) error2("oom distance&ancestor");
       }
       InitializeBaseAA(fout);
+
       if(com.Mgene==3) 
          for(i=0; i<com.ngene; i++) xtoy(com.pi, com.piG[i], com.ncode);
 
-      if (com.model==JC69 && !com.readpattern && !com.print) {
+      if (com.model==JC69 && com.ngene<=1 && !com.readpattern && !com.print) {
          PatternWeightJC69like();
          if(fout) {
             fprintf(fout, "\n\nPrinting out site pattern counts\n");
@@ -274,8 +275,7 @@ int main (int argc, char *argv[])
          if((com.fhK=(double*)realloc(com.fhK,s2))==NULL) error2("oom");
       }
 
-      printf ("\n%9ld bytes for distance ",
-         com.ns*(com.ns-1)/2*(sizeof(double)+sizeof(int)));
+      printf ("\n%9ld bytes for distance ", com.ns*(com.ns-1)/2*(sizeof(double)+sizeof(int)));
       printf("\n%9lu bytes for conP\n", com.sconP);
       printf("%9lu bytes for fhK\n%9lu bytes for space\n", s2, com.sspace);
 
diff --git a/src/chi2.c b/src/chi2.c
index f2252b2..6ebf40a 100644
--- a/src/chi2.c
+++ b/src/chi2.c
@@ -6,7 +6,7 @@
    degrees of freedom and the tail probability (type I error rate) for 
    given observed chi-square statistic and degree of freedom.
 
-      Ziheng Yang,  October 1993.
+   Ziheng Yang,  October 1993.
 */
 
 #include <stdio.h>
@@ -26,8 +26,8 @@ double IncompleteGamma (double x, double alpha, double ln_gamma_alpha);
 
 int main(int argc, char*argv[])
 {
-   int i,j, n=20, ndf=200, nprob=8, option=0;
-   double df, chi2, d=1.0/n, prob[]={.005, .025, .1, .5, .90, .95, .99, .999};
+   int i,j, n=20, ndf=200, nprob=8, option=0, df;
+   double chi2, d=1.0/n, prob[]={.005, .025, .1, .5, .90, .95, .99, .999};
 
    if (argc!=2 && argc!=3) {
       printf ("\n\nChi-square critical values\n");
@@ -51,21 +51,21 @@ int main(int argc, char*argv[])
    else if(argc==2) {
       for (; ; ) {
          printf ("\nd.f. & Chi^2 value (Ctrl-c to break)? ");
-         scanf ("%lf%lf", &df, &chi2);
+         scanf ("%d%lf", &df, &chi2);
          if(df<1 || chi2<0) break;
          prob[0] = 1-CDFChi2(chi2,df);
-         printf ("\ndf = %2.0f  prob = %.9f = %.3e\n", df, prob[0], prob[0]);
+         printf ("\ndf = %2d  prob = %.9f = %.3e\n", df, prob[0], prob[0]);
       }
    }
    else if(argc==3) {
       df = atoi(argv[1]);
       chi2 = atof(argv[2]);
       if(df<1 || chi2<0) {
-         printf("df = %d  ch2 = %.4f invalid", df, chi2);
+         printf("df = %2d  ch2 = %.4f invalid", df, chi2);
          exit(-1);
       }
       prob[0] = 1 - CDFChi2(chi2, df);
-      printf ("\ndf = %2.0f  prob = %.9f = %.3e\n", df, prob[0], prob[0]);
+      printf ("\ndf = %2d  prob = %.9f = %.3e\n", df, prob[0], prob[0]);
    }
    printf ("\n");
    return (0);
diff --git a/src/codeml.c b/src/codeml.c
index 186466f..16baf76 100644
--- a/src/codeml.c
+++ b/src/codeml.c
@@ -487,7 +487,7 @@ scanf("%d", &KGaussLegendreRule);
       }
       fflush(fout);
 
-      if(com.seqtype==AAseq && com.model==Poisson && !com.print) {
+      if(com.seqtype==AAseq && com.model==Poisson && com.ngene<=1 && !com.print) {
          PatternWeightJC69like();
          fprintf(fout, "\n\nPrinting out site pattern counts\n");
          printPatterns(fout);
diff --git a/src/evolver.c b/src/evolver.c
index 423eaa5..5f64be3 100644
--- a/src/evolver.c
+++ b/src/evolver.c
@@ -1,1315 +1,1315 @@
-/* evolver.c
-   Copyright, Ziheng Yang, April 1995.
-
-     cl -Ot -O2 evolver.c tools.c
-     cl -Ot -O2 -DCodonNSbranches    -FeevolverNSbranches.exe    evolver.c tools.c
-     cl -Ot -O2 -DCodonNSsites       -FeevolverNSsites.exe       evolver.c tools.c
-     cl -Ot -O2 -DCodonNSbranchsites -FeevolverNSbranchsites.exe evolver.c tools.c
-
-     cc -fast -o evolver evolver.c tools.c -lm
-     cc -O4 -DCodonNSbranches -o evolverNSbranches evolver.c tools.c -lm
-     cc -O4 -DCodonNSsites -o evolverNSsites evolver.c tools.c -lm
-     cc -O4 -DCodonNSbranchsites -o evolverNSbranchsites evolver.c tools.c -lm
-
-     evolver
-     evolver 5 MCbase.dat
-     evolver 6 MCcodon.dat
-     evolver 7 MCaa.dat
-     evolver 9 <TreesFile> <MasterTreeFile>
-*/
-
-/*
-#define CodonNSbranches
-#define CodonNSsites
-#define CodonNSbranchsites
-*/
-
-#include "paml.h"
-
-#define NS            5000
-#define NBRANCH       (NS*2-2)
-#define MAXNSONS      20
-#define LSPNAME       50
-#define NCODE         64
-#define NCATG         40
-
-
-struct CommonInfo {
-   unsigned char *z[2*NS-1];
-   char spname[NS][LSPNAME+1], daafile[512], cleandata, readpattern;
-   int ns, ls, npatt, np, ntime, ncode, clock, rooted, model, icode;
-   int seqtype, *pose, ncatG, NSsites;
-   int ngene, lgene[1], posG[1+1];  /* not used */
-   double piG[1][4], rgene[1];  /* not used */
-   double *fpatt, kappa, omega, alpha, pi[64], *conP, daa[20*20];
-   double freqK[NCATG], rK[NCATG];
-   char *siteID;    /* used if ncatG>1 */
-   double *siterates;   /* rates for gamma or omega for site or branch-site models */
-   double *omegaBS, *QfactorBS;     /* omega IDs for branch-site models */
-}  com;
-struct TREEB {
-   int nbranch, nnode, root, branches[NBRANCH][2];
-}  tree;
-struct TREEN {
-   int father, nson, sons[MAXNSONS], ibranch;
-   double branch, age, omega, label, *conP;
-   char *nodeStr, fossil;
-}  *nodes;
-
-extern char BASEs[];
-extern int GeneticCode[][64], noisy;
-int LASTROUND=0; /* not used */
-
-#define EVOLVER
-#define NODESTRUCTURE
-#define BIRTHDEATH
-#include "treesub.c"
-#include "treespace.c"
-
-void TreeDistances(FILE* fout);
-void Simulate(char *ctlf);
-void MakeSeq(char *z, int ls);
-int EigenQbase(double rates[], double pi[], double Root[],double U[], double V[],double Q[]);
-int EigenQcodon (int getstats, double kappa,double omega,double pi[], double Root[], double U[], double V[], double Q[]);
-int EigenQaa(double pi[], double Root[], double U[], double V[],double Q[]);
-void CladeMrBayesProbabilities (char treefile[]);
-int between_f_and_x(void);
-void LabelClades(FILE *fout);
-
-char *MCctlf0[]={"MCbase.dat","MCcodon.dat","MCaa.dat"};
-char *seqf[]={"mc.paml", "mc.paml", "mc.nex", "mc.nex"};
-
-enum {JC69, K80, F81, F84, HKY85, T92, TN93, REV} BaseModels;
-char *basemodels[]={"JC69","K80","F81","F84","HKY85","T92","TN93","REV"};
-enum {Poisson, EqualInput, Empirical, Empirical_F} AAModels;
-char *aamodels[]={"Poisson", "EqualInput", "Empirical", "Empirical_F"};
-
-
-double PMat[NCODE*NCODE], U[NCODE*NCODE], V[NCODE*NCODE], Root[NCODE];
-static double Qfactor=-1, Qrates[5];  /* Qrates[] hold kappa's for nucleotides */
-
-
-int main (int argc, char*argv[])
-{
-   char *MCctlf=NULL, outf[512]="evolver.out", treefile[512]="mcmc.txt", mastertreefile[512]="\0";
-   int i, option=-1, ntree=1,rooted, BD=0, gotoption=0, pick1tree=-1;
-   double bfactor=1, birth=-1,death=-1,sample=-1,mut=-1, *space;
-   FILE *fout=gfopen(outf,"w");
-
-   printf("EVOLVER in %s\n", pamlVerStr);
-   com.alpha=0; com.cleandata=1; com.model=0; com.NSsites=0;
-
-   if(argc>2 && !strcmp(argv[argc-1], "--stdout-no-buf"))
-      setvbuf(stdout, NULL, _IONBF, 0);
-   if(argc>1) {
-      gotoption=1;   sscanf(argv[1], "%d", &option);
-   }
-   if(argc==1)
-      printf("Results for options 1-4 & 8 go into %s\n",outf);
-   else if(option!=5 && option!=6 && option!=7 && option!=9) {
-      puts("Usage: \n\tevolver \n\tevolver option# MyDataFile"); exit(-1); 
-   }
-   if(option>=4 && option<=6)
-      MCctlf = argv[2];
-   else if(option==9) {
-      strcpy(treefile, argv[2]);
-      if(argc>3) strcpy(mastertreefile, argv[3]);
-      if(argc>4) sscanf(argv[4], "%d", &pick1tree);
-   }
-
-#if defined (CodonNSbranches)
-   option=6;  com.model=1; 
-   MCctlf = (argc==3 ? argv[2] : "MCcodonNSbranches.dat");
-   gotoption = 1;
-#elif defined (CodonNSsites)
-   option=6;  com.NSsites=3; 
-   MCctlf = (argc==3 ? argv[2] : "MCcodonNSsites.dat");
-   gotoption = 1;
-#elif defined (CodonNSbranchsites)
-   option=6;  com.model=1; com.NSsites=3; 
-   MCctlf = (argc==3 ? argv[2] : "MCcodonNSbranchsites.dat");
-   gotoption = 1;
-#endif
-
-   if(!gotoption) {
-      for(; ;) {
-         fflush(fout);
-         printf("\n\t(1) Get random UNROOTED trees?\n"); 
-         printf("\t(2) Get random ROOTED trees?\n"); 
-         printf("\t(3) List all UNROOTED trees?\n");
-         printf("\t(4) List all ROOTED trees?\n");
-         printf("\t(5) Simulate nucleotide data sets (use %s)?\n",MCctlf0[0]);
-         printf("\t(6) Simulate codon data sets      (use %s)?\n",MCctlf0[1]);
-         printf("\t(7) Simulate amino acid data sets (use %s)?\n",MCctlf0[2]);
-         printf("\t(8) Calculate identical bi-partitions between trees?\n");
-         printf("\t(9) Calculate clade support values (evolver 9 treefile mastertreefile <pick1tree>)?\n");
-         printf("\t(11) Label clades?\n");
-         printf("\t(0) Quit?\n");
-
-         option = 9;
-         scanf("%d", &option);
-
-         if(option==0) exit(0);
-         if(option>=5 && option<=7) break;
-         if(option<5)  { 
-            printf ("No. of species: ");
-            scanf ("%d", &com.ns);
-         }
-         if(com.ns>NS) error2 ("Too many species.  Raise NS.");
-         if((space=(double*)malloc(10000*sizeof(double)))==NULL) error2("oom");
-         rooted = !(option%2);
-         if(option<3) {
-            printf("\nnumber of trees & random number seed? ");
-            scanf("%d%d", &ntree, &i);
-            SetSeed(i, 1);
-            printf ("Want branch lengths from the birth-death process (0/1)? ");
-            scanf ("%d", &BD);
-         }
-         if(option<=4) {
-            if(com.ns<3) error2("no need to do this?");
-            i = (com.ns*2-1)*sizeof(struct TREEN);
-            if((nodes=(struct TREEN*)malloc(i)) == NULL) 
-               error2("oom");
-         }
-         switch (option) {
-         case(1):   /* random UNROOTED trees */
-         case(2):   /* random ROOTED trees */
-            /* default names */
-            if(com.ns<=52)
-               for(i=0; i<com.ns; i++)  sprintf(com.spname[i], "%c", (i<26 ? 'A'+i : 'a'+i-26));
-            else
-               for(i=0; i<com.ns; i++)  sprintf(com.spname[i], "S%d", i+1);
-
-            if(BD) {
-               printf ("\nbirth rate, death rate, sampling fraction, and ");
-               printf ("mutation rate (tree height)?\n");
-               scanf ("%lf%lf%lf%lf", &birth, &death, &sample, &mut);
-            }
-            for(i=0;i<ntree;i++) {
-               RandomLHistory (rooted, space);
-               if(BD)
-                  BranchLengthBD (1, birth, death, sample, mut);
-               if(com.ns<20&&ntree<10) { OutTreeN(F0, 0, BD); puts("\n"); }
-               OutTreeN(fout, 1, BD);  FPN(fout);
-            }
-            /*
-            for (i=0; i<com.ns-2-!rooted; i++)
-               Ib[i] = (int)((3.+i)*rndu());
-            MakeTreeIb (com.ns, Ib, rooted);
-            */
-            break;
-         case(3):
-         case(4): 
-            ListTrees(fout, com.ns, rooted);
-            break;
-         case(8):  TreeDistances(fout);  break;
-         case(9):  
-            printf("tree file names? ");
-            scanf("%s%s", treefile, mastertreefile);
-            break;
-         case(10): between_f_and_x();    break;
-         case(11): LabelClades(fout);    break;
-         default:  exit(0);
-         }
-      }
-   }
-
-   if(option>=5 && option<=7) {
-      com.seqtype = option-5;  /* 0, 1, 2 for bases, codons, & amino acids */
-      Simulate(MCctlf ? MCctlf : MCctlf0[option-5]);
-   }
-   else if(option==9) {
-      CladeSupport(fout, treefile, 1, mastertreefile, pick1tree);
-      /* CladeMrBayesProbabilities("/papers/BPPJC3sB/Karol.trees"); */
-   }
-   return(0);
-}
-
-
-int between_f_and_x (void)
-{
-/* this helps with the exponential transform for frequency parameters */
-   int i,n,fromf=0;
-   double x[100];
-
-   for(;;) {
-      printf("\ndirection (0:x=>f; 1:f=>x; -1:end)  &  #classes? ");
-      scanf("%d",&fromf);    
-      if(fromf==-1) return(0);
-      scanf("%d", &n);  if(n>100) error2("too many classes");
-      printf("input the first %d values for %s? ",n-1,(fromf?"f":"x"));
-      FOR(i,n-1) scanf("%lf",&x[i]);
-      x[n-1]=(fromf?1-sum(x,n-1):0);
-      f_and_x(x, x, n, fromf, 1);
-      matout(F0,x,1,n);
-   }
-}
-
-
-void LabelClades(FILE *fout)
-{
-/* This reads in a tree and scan species names to check whether they form a 
-   paraphyletic group and then label the clade.
-   It assumes that the tree is unrooted, and so goes through two rounds to check
-   whether the remaining seqs form a monophyletic clade.
-*/
-   FILE *ftree;
-   int unrooted=1,iclade, sizeclade, mrca, paraphyl, is, imrca, i,j,k, lasts, haslength;
-   char key[96]="A", treef[64]="/A/F/flu/HA.all.prankcodon.tre", *p,chosen[NS], *endstr="end";
-   int *anc[NS-1], loc, bitmask, SI=sizeof(int)*8;
-   int debug;
-
-   printf("Tree file name? ");
-   scanf ("%s", treef);
-   printf("Treat tree as unrooted (0 no, 1 yes)? ");
-   scanf ("%d", &unrooted);
-
-   ftree = gfopen (treef,"r");
-   fscanf (ftree, "%d%d", &com.ns, &j);
-   if(com.ns<=0) error2("need ns in tree file");
-   debug = (com.ns<20);
-
-   i = (com.ns*2-1)*sizeof(struct TREEN);
-   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
-   for(i=0; i<com.ns*2-1; i++)  nodes[i].nodeStr = NULL;
-   for(i=0; i<com.ns-1; i++) {
-      anc[i] = (int*)malloc((com.ns/SI+1)*sizeof(int));
-      if(anc[i]==NULL)  error2("oom");
-   }
-   ReadTreeN(ftree, &haslength, &j, 1, 0);
-   fclose(ftree);
-   if(debug) { OutTreeN(F0, 1, PrNodeNum);  FPN(F0); }
-
-   for(iclade=0; iclade<com.ns-1; iclade++) {
-      printf("\nString for selecting sequences (followed by non-digit) (end to end)? ");
-      scanf("%s", key);
-      if(strcmp(endstr, key) == 0)
-         break;
-      for(i=0; i<com.ns; i++) 
-         chosen[i] = '\0';
-
-
-      k = strlen(key);
-      for(i=0; i<com.ns; i++) {
-         if( (p=strstr(com.spname[i], key)) 
-            && !isdigit(p[k]) )
-               chosen[i] = 1;
-      }
-
-      /*
-      for(i=0; i<com.ns; i++) 
-         if(strstr(com.spname[i], key)) chosen[i] = 1;
-      */
-
-      /* look for MRCA, going through two rounds, assuming unrooted tree */
-      for(imrca=0; imrca<1+unrooted; imrca++) {
-         if(imrca) 
-            for(i=0; i<com.ns; i++) chosen[i] = 1 - chosen[i]; 
-
-         for(i=0,sizeclade=0; i<com.ns; i++) 
-            if(chosen[i]) {
-               sizeclade ++;
-               lasts = i;
-            }
-
-         if(sizeclade <= 1 || sizeclade >= com.ns-1) {
-            puts("unable to form a clade.  <2 seqs.");
-            break;
-         }
-         for(i=0; i<com.ns-1; i++) for(j=0; j<com.ns/SI+1; j++) 
-            anc[i][j] = 0;
-         for(is=0; is<com.ns; is++) {
-            if(chosen[is]==0) continue;
-            loc = is/SI;  bitmask = 1 << (is%SI);
-            for(j=nodes[is].father; j!=-1; j=nodes[j].father) {
-               anc[j-com.ns][loc] |= bitmask;
-               if(is==lasts) {
-                  for(i=0,k=0; i<com.ns; i++)
-                     if(anc[j-com.ns][i/SI] & (1<<(i%SI)))
-                        k ++;
-                  if(k==sizeclade) {
-                     mrca = j;  break;
-                  }
-               }
-            }
-         }
-         if(imrca==0 && mrca!=tree.root) /* 1st round is enough */
-            break;
-      }
-
-      if(sizeclade <= 1 || sizeclade >= com.ns-1 || mrca==tree.root) {
-         printf("Unable to label.  Ignored.");
-         continue;
-      }
-
-      if(debug) 
-         for(is=0; is<com.ns-1; is++) {
-            printf("\nnode %4d: ", is+com.ns);
-            for(j=0; j<com.ns; j++) {
-               loc = j/SI;  bitmask = 1 << (j%SI);
-               printf(" %d", (anc[is][loc] & bitmask) != 0);
-            }
-         }
-
-      printf("\nClade #%d (%s): %d seqs selected, MRCA is %d\n", iclade+1, key, sizeclade, mrca+1);
-      for(is=0,paraphyl=0; is<com.ns; is++) {
-         if(chosen[is] == 0)
-            for(j=nodes[is].father; j!=-1; j=nodes[j].father)
-               if(j==mrca) { paraphyl++;  break; }
-      }
-      if(paraphyl) 
-         printf("\nThis clade is paraphyletic, & includes %d other sequences\n", paraphyl);
-
-      nodes[mrca].label = iclade+1;
-      if(debug) OutTreeN(F0, 1, haslength|PrLabel);
-   }
-
-   for(i=0; i<com.ns-1; i++)  free(anc[i]);
-   OutTreeN(fout, 1, haslength|PrLabel);  FPN(fout);
-   printf("Printed final tree with labels in evolver.out\n");
-   exit(0);
-}
-
-void TreeDistanceDistribution (FILE* fout)
-{
-/* This calculates figure 3.7 of Yang (2006).
-   This reads the file of all trees (such as 7s.all.trees), and calculates the 
-   distribution of partition distance in all pairwise comparisons.
-*/
-   int i,j,ntree, k,*nib, nsame, IBsame[NS], lpart=0;
-   char treef[64]="5s.all.trees", *partition;
-   FILE *ftree;
-   double mPD[NS], PD1[NS];  /* distribution of partition distances */
-
-   puts("Tree file name?");
-   scanf ("%s", treef);
-
-   ftree=gfopen (treef,"r");
-   fscanf (ftree, "%d%d", &com.ns, &ntree);
-   printf("%2d sequences %2d trees.\n", com.ns, ntree);
-   i=(com.ns*2-1)*sizeof(struct TREEN);
-   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
-
-   lpart = (com.ns-1)*com.ns*sizeof(char);
-   i = ntree*lpart;
-   printf("\n%d bytes of space requested.\n", i);
-   partition = (char*)malloc(i);
-   nib = (int*)malloc(ntree*sizeof(int));
-   if (partition==NULL || nib==NULL) error2("out of memory");
-
-   puts("\ntree #: mean prop of tree pairs with 0 1 2 ... shared bipartitions\n");
-   fputs("\ntree #: prop of tree pairs with 0 1 2 ... shared bipartitions\n",fout);
-   for (i=0; i<ntree; i++) {
-      ReadTreeN (ftree, &j, &k, 0, 1); 
-      nib[i]=tree.nbranch-com.ns;
-      Tree2Partition(partition+i*lpart);
-   }
-   for(k=0; k<com.ns-3; k++) mPD[k]=0;
-   for (i=0; i<ntree; i++,FPN(fout)) {
-      for(k=0; k<com.ns-3; k++) PD1[k]=0;
-      for (j=0; j<ntree; j++) {
-         if(j==i) continue;
-         nsame=NSameBranch(partition+i*lpart,partition+j*lpart, nib[i],nib[j],IBsame);
-         PD1[nsame] ++;
-      }
-      for(k=0; k<com.ns-3; k++) PD1[k] /= (ntree-1.);
-      for(k=0; k<com.ns-3; k++) mPD[k] = (mPD[k]*i+PD1[k])/(i+1.);
-      printf("%8d (%5.1f%%):", i+1,(i+1.)/ntree*100);
-      for(k=0; k<com.ns-3; k++) printf(" %7.4f", mPD[k]);
-      fprintf(fout, "%8d:", i+1);  for(k=0; k<com.ns-3; k++) fprintf(fout, " %7.4f", PD1[k]);
-      printf("%s", (com.ns<8||(i+1)%100==0 ? "\n" : "\r"));
-   }
-   free(partition); free(nodes); free(nib); fclose(ftree);
-   exit(0);
-}
-
-
-void TreeDistances (FILE* fout)
-{
-/* I think this is broken after i changed the routine Tree2Partition().
-*/
-   int i,j,ntree, k,*nib, parti2B[NS], nsame, IBsame[NS],nIBsame[NS], lpart=0;
-   char treef[64]="5s.all.trees", *partition;
-   FILE *ftree;
-   double psame, mp, vp;
-
-   /*
-   TreeDistanceDistribution(fout);
-   */
-
-   puts("\nNumber of identical bi-partitions between trees.\nTree file name?");
-   scanf ("%s", treef);
-
-   ftree=gfopen (treef,"r");
-   fscanf (ftree, "%d%d", &com.ns, &ntree);
-   printf("%2d sequences %2d trees.\n", com.ns, ntree);
-   i=(com.ns*2-1)*sizeof(struct TREEN);
-   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
-
-   if(ntree<2) error2("ntree");
-   printf ("\n%d species, %d trees\n", com.ns, ntree);
-   puts("\n\t1: first vs. rest?\n\t2: all pairwise comparisons?\n");
-   k=2;
-   scanf("%d", &k);
-
-   lpart=(com.ns-1)*com.ns*sizeof(char);
-   i=(k==1?2:ntree)*lpart;
-   printf("\n%d bytes of space requested.\n", i);
-   partition=(char*)malloc(i);
-   nib=(int*)malloc(ntree*sizeof(int));
-   if (partition==NULL || nib==NULL) error2("out of memory");
-
-   if(k==2) {    /* pairwise comparisons */
-      fputs("Number of identical bi-partitions in pairwise comparisons\n",fout);
-      for (i=0; i<ntree; i++) {
-         ReadTreeN (ftree, &j, &k, 0, 1); 
-         nib[i]=tree.nbranch-com.ns;
-         Tree2Partition(partition+i*lpart);
-      }
-      for (i=0; i<ntree; i++,FPN(F0),FPN(fout)) {
-         printf("%2d (%2d):", i+1,nib[i]);
-         fprintf(fout,"%2d (%2d):", i+1,nib[i]);
-         for (j=0; j<i; j++) {
-            nsame=NSameBranch(partition+i*lpart,partition+j*lpart, nib[i],nib[j],IBsame);
-            printf(" %2d", nsame);
-            fprintf(fout," %2d", nsame);
-         }
-      }
-   }
-   else {  /* first vs. others */
-      ReadTreeN (ftree, &j, &k, 0, 1);
-      nib[0]=tree.nbranch-com.ns;
-      if (nib[0]==0) error2("1st tree is a star tree..");
-      Tree2Partition (partition);
-      fputs ("Comparing the first tree with the others\nFirst tree:\n",fout);
-      OutTreeN(fout,0,0);  FPN(fout);  OutTreeB(fout);  FPN(fout); 
-      fputs ("\nInternal branches in the first tree:\n",fout);
-      FOR(i,nib[0]) { 
-         k=parti2B[i];
-         fprintf(fout,"%3d (%2d..%-2d): ( ",
-            i+1,tree.branches[k][0]+1,tree.branches[k][1]+1);
-         FOR(j,com.ns) if(partition[i*com.ns+j]) fprintf(fout,"%d ",j+1);
-         fputs(")\n",fout);
-      }
-      if(nodes[tree.root].nson<=2) 
-         fputs("\nRooted tree, results may not be correct.\n",fout);
-      fputs("\nCorrect internal branches compared with the 1st tree:\n",fout);
-      FOR(k,nib[0]) nIBsame[k]=0;
-      for (i=1,mp=vp=0; i<ntree; i++,FPN(fout)) {
-         ReadTreeN (ftree, &j, &k, 0, 1); 
-         nib[1]=tree.nbranch-com.ns;
-         Tree2Partition(partition+lpart);
-         nsame=NSameBranch (partition,partition+lpart, nib[0],nib[1],IBsame);
-
-         psame=nsame/(double)nib[0];
-         FOR(k,nib[0]) nIBsame[k]+=IBsame[k];
-         fprintf(fout,"1 vs. %3d: %4d: ", i+1,nsame);
-         FOR(k,nib[0]) if(IBsame[k]) fprintf(fout," %2d", k+1);
-         printf("1 vs. %5d: %6d/%d  %10.4f\n", i+1,nsame,nib[0],psame);
-         vp += square(psame - mp)*(i-1.)/i;
-         mp=(mp*(i-1.) + psame)/i;
-      }
-      vp=(ntree<=2 ? 0 : sqrt(vp/((ntree-1-1)*(ntree-1.))));
-      fprintf(fout,"\nmean and S.E. of proportion of identical partitions\n");
-      fprintf(fout,"between the 1st and all the other %d trees ", ntree-1);
-      fprintf(fout,"(ignore these if not revelant):\n %.4f +- %.4f\n", mp, vp);
-      fprintf(fout,"\nNumbers of times, out of %d, ", ntree-1);
-      fprintf(fout,"interior branches of tree 1 are present");
-      fputs("\n(This may be bootstrap support for nodes in tree 1)\n",fout);
-      FOR(k,nib[0]) { 
-         i=tree.branches[parti2B[k]][0]+1;  j=tree.branches[parti2B[k]][1]+1; 
-         fprintf(fout,"%3d (%2d..%-2d): %6d (%5.1f%%)\n",
-            k+1,i,j,nIBsame[k],nIBsame[k]*100./(ntree-1.));
-      }
-   }
-   free(partition);  free(nodes); free(nib);  fclose(ftree);
-}
-
-
-
-int EigenQbase(double rates[], double pi[], 
-    double Root[],double U[],double V[],double Q[])
-{
-/* Construct the rate matrix Q[] for nucleotide model REV.
-*/
-   int i,j,k;
-   double mr, space[4];
-
-   zero (Q, 16);
-   for (i=0,k=0; i<3; i++) for (j=i+1; j<4; j++)
-      if (i*4+j!=11) Q[i*4+j]=Q[j*4+i]=rates[k++];
-   for (i=0,Q[3*4+2]=Q[2*4+3]=1; i<4; i++) FOR (j,4) Q[i*4+j] *= pi[j];
-   for (i=0,mr=0; i<4; i++) 
-      { Q[i*4+i]=0; Q[i*4+i]=-sum(Q+i*4, 4); mr-=pi[i]*Q[i*4+i]; }
-   abyx (1/mr, Q, 16);
-
-   eigenQREV(Q, com.pi, 4, Root, U, V, space);
-   return (0);
-}
-
-
-static double freqK_NS=-1;
-
-int EigenQcodon (int getstats, double kappa, double omega, double pi[],
-    double Root[], double U[], double V[], double Q[])
-{
-/* Construct the rate matrix Q[].
-   64 codons are used, and stop codons have 0 freqs.
-*/
-   int n=com.ncode, i,j,k, c[2],ndiff,pos=0,from[3],to[3];
-   double mr, space[64];
-   
-   for(i=0; i<n*n; i++) Q[i] = 0;
-   for (i=0; i<n; i++) FOR (j,i) {
-      from[0]=i/16; from[1]=(i/4)%4; from[2]=i%4;
-      to[0]=j/16;   to[1]=(j/4)%4;   to[2]=j%4;
-      c[0]=GeneticCode[com.icode][i];   c[1]=GeneticCode[com.icode][j];
-      if (c[0]==-1 || c[1]==-1)  continue;
-      for (k=0,ndiff=0; k<3; k++)  if (from[k]!=to[k]) { ndiff++; pos=k; }
-      if (ndiff!=1)  continue;
-      Q[i*n+j]=1;
-      if ((from[pos]+to[pos]-1)*(from[pos]+to[pos]-5)==0)  Q[i*n+j]*=kappa;
-      if(c[0]!=c[1])  Q[i*n+j]*=omega;
-      Q[j*n+i]=Q[i*n+j];
-   }
-   for(i=0; i<n; i++) for(j=0; j<n; j++)
-      Q[i*n+j] *= com.pi[j];
-   for(i=0,mr=0;i<n;i++) { 
-      Q[i*n+i] = -sum(Q+i*n,n);
-      mr -= pi[i]*Q[i*n+i]; 
-   }
-
-   if(getstats)
-      Qfactor += freqK_NS * mr;
-   else {
-      if(com.ncatG==0) FOR(i,n*n) Q[i]*=1/mr;
-      else             FOR(i,n*n) Q[i]*=Qfactor;  /* NSsites models */
-      eigenQREV(Q, com.pi, n, Root, U, V, space);
-   }
-   return (0);
-}
-
-
-
-int EigenQaa (double pi[], double Root[], double U[], double V[], double Q[])
-{
-/* Construct the rate matrix Q[]
-*/
-   int n=20, i,j;
-   double mr, space[20];
-
-   FOR (i,n*n) Q[i]=0;
-   switch (com.model) {
-   case (Poisson)   : case (EqualInput) : 
-      fillxc (Q, 1., n*n);  break;
-   case (Empirical)   : case (Empirical_F):
-      FOR(i,n) FOR(j,i) Q[i*n+j]=Q[j*n+i]=com.daa[i*n+j]/100;
-      break;
-   }
-   FOR (i,n) FOR (j,n) Q[i*n+j]*=com.pi[j];
-   for (i=0,mr=0; i<n; i++) {
-      Q[i*n+i]=0; Q[i*n+i]=-sum(Q+i*n,n);  mr-=com.pi[i]*Q[i*n+i]; 
-   }
-
-   eigenQREV(Q, com.pi, n, Root, U, V, space);
-   FOR(i,n)  Root[i]=Root[i]/mr;
-
-   return (0);
-}
-
-
-int GetDaa (FILE* fout, double daa[])
-{
-/* Get the amino acid substitution rate matrix (grantham, dayhoff, jones, etc).
-*/
-   FILE * fdaa;
-   char aa3[4]="";
-   int i,j, n=20;
-
-   fdaa=gfopen(com.daafile, "r");
-   printf("\nReading rate matrix from %s\n", com.daafile);
-
-   for (i=0; i<n; i++)  for (j=0,daa[i*n+i]=0; j<i; j++)  {
-      fscanf(fdaa, "%lf", &daa[i*n+j]);
-      daa[j*n+i]=daa[i*n+j];
-   }
-   if (com.model==Empirical) {
-      FOR(i,n) if(fscanf(fdaa,"%lf",&com.pi[i])!=1) error2("err aaRatefile");
-      if (fabs(1-sum(com.pi,20))>1e-4) error2("\nSum of aa freq. != 1\n");
-   }
-   fclose (fdaa);
-
-   if (fout) {
-      fprintf (fout, "\n%s\n", com.daafile);
-      FOR (i,n) {
-         fprintf (fout, "\n%4s", getAAstr(aa3,i));
-         FOR (j,i)  fprintf (fout, "%5.0f", daa[i*n+j]); 
-      }
-      FPN (fout);
-   }
-
-   return (0);
-}
-
-
-
-
-void MakeSeq(char*z, int ls)
-{
-/* generate a random sequence of nucleotides, codons, or amino acids by 
-   sampling com.pi[], or read the ancestral sequence from the file RootSeq.txt
-   if the file exists.
-*/
-   int i,j,h, n=com.ncode, ch, n31=(com.seqtype==1?3:1), lst;
-   double p[64],r, small=1e-5;
-   char *pch=(com.seqtype==2?AAs:BASEs);
-   char rootseqf[]="RootSeq.txt", codon[4]="   ";
-   FILE *fseq=(FILE*)fopen(rootseqf,"r");
-   static int times=0;
-
-   if(fseq) {
-      if(times++==0) printf("Reading sequence at the root from file.\n\n");
-      if(com.siterates && com.ncatG>1) 
-         error2("sequence for root doesn't work for site-class models");
-
-      for(lst=0; ; ) {
-         for(i=0; i<n31; i++) {
-            while((ch=fgetc(fseq)) !=EOF && !isalpha(ch)) ;
-            if(ch==EOF) error2("EOF when reading root sequence.");
-            if(isalpha(ch))
-               codon[i]=(char)(ch=CodeChara((char)ch, com.seqtype));
-         }
-         if(com.seqtype==1) ch = codon[0]*16 + codon[1]*4 + codon[2];
-         if(ch<0 || ch>n-1) 
-            printf("error when reading site %d\n", lst+1);
-         if(com.seqtype==1 && com.pi[ch]==0)
-            printf("you seem to have a stop codon in the root sequence\n");
-
-         z[lst++] = (char)ch;
-         if(lst==com.ls) break;
-      }
-      fclose(fseq);
-   }
-   else {
-      for(j=0; j<n; j++)  p[j] = com.pi[j];
-      for(j=1; j<n; j++)  p[j] += p[j-1];
-      if(fabs(p[n-1]-1) > small)
-         { printf("\nsum pi = %.6f != 1!\n", p[n-1]); exit(-1); }
-      for(h=0; h<com.ls; h++) {
-         for(j=0,r=rndu();j<n-1;j++) 
-            if(r<p[j]) break;
-         z[h] = (char)j;
-      }
-   }
-}
-
-
-
-void Evolve1 (int inode)
-{
-/* evolve sequence com.z[tree.root] along the tree to generate com.z[], 
-   using nodes[].branch, nodes[].omega, & com.model
-   Needs com.z[0,1,...,nnode-1], while com.z[0] -- com.z[ns-1] constitute
-   the data.
-   For codon sequences, com.siterates[] has w's for NSsites and NSbranchsite models.
-*/
-   int is, h,i,j, ison, from, n=com.ncode, longseq=100000;
-   double t, rw;
-   
-   for (is=0; is<nodes[inode].nson; is++) {
-      ison=nodes[inode].sons[is];
-      memcpy(com.z[ison],com.z[inode],com.ls*sizeof(unsigned char));
-      t=nodes[ison].branch;
-      
-      if(com.seqtype==1 && com.model && com.NSsites) { /* branch-site models */
-         Qfactor = com.QfactorBS[ison];
-         for(h=0; h<com.ls; h++) 
-            com.siterates[h] = com.omegaBS[ison*com.ncatG+com.siteID[h]];
-      }
-
-      for(h=0; h<com.ls; h++) {
-         /* decide whether to recalcualte PMat[]. */
-         if (h==0 || (com.siterates && com.siterates[h]!=com.siterates[h-1])) {
-            rw = (com.siterates?com.siterates[h]:1);
-
-            switch(com.seqtype) {
-            case (BASEseq):
-               if(com.model<=TN93)
-                  PMatTN93(PMat, t*Qfactor*rw*Qrates[0], 
-                                 t*Qfactor*rw*Qrates[1], t*Qfactor*rw, com.pi);
-               else if(com.model==REV)
-                  PMatUVRoot(PMat, t*rw, com.ncode, U,V,Root);
-               break;
-
-            case (CODONseq): /* Watch out for NSsites model */
-               if(com.model || com.NSsites) { /* no need to update UVRoot if M0 */
-                  if(com.model && com.NSsites==0) /* branch */
-                     rw = nodes[ison].omega;  /* should be equal to com.rK[nodes[].label] */
-
-                  EigenQcodon(0, com.kappa, rw, com.pi, Root, U, V, PMat);
-               }
-               PMatUVRoot(PMat, t, com.ncode, U, V, Root); 
-               break;
-
-            case (AAseq):
-               PMatUVRoot(PMat, t*rw, com.ncode, U, V, Root);
-               break;
-            }
-            for(i=0; i<n; i++)
-               for(j=1;j<n;j++)
-                  PMat[i*n+j] += PMat[i*n+j-1];
-         }
-         for(j=0,from=com.z[ison][h],rw=rndu(); j<n-1; j++)
-            if(rw < PMat[from*n+j]) break;
-         com.z[ison][h] = j;
-      }
-
-      if(com.ls>longseq) printf("\r   nodes %2d -> %2d, evolving . .   ", inode+1, ison+1);
-
-      if(nodes[ison].nson) Evolve1(ison); 
-   }  /* for (is) */
-
-   if(inode==tree.root && com.ls>longseq)  printf("\r%s", strc(50,' '));
-}
-
-
-
-void Simulate (char *ctlf)
-{
-/* simulate nr data sets of nucleotide, codon, or AA sequences.
-   ls: number of nucleotides, codons, or AAs in each sequence.
-   All 64 codons are used for codon sequences.
-   When com.alpha or com.ncatG>1, sites are randomized after sequences are 
-   generated.
-   space[com.ls] is used to hold site marks.
-   format:  0: paml sites; 1: paml patterns; 2: paup nex; 3: paup JC69 format
- */
-   char *ancf="ancestral.txt", *siteIDf="siterates.txt";
-   FILE *fin, *fseq, *ftree=NULL, *fanc=NULL, *fsiteID=NULL;
-   char *paupstart="paupstart",*paupblock="paupblock",*paupend="paupend";
-   char line[32000];
-   int lline=32000, i,j,k, ir,n,nr, fixtree=1, sspace=10000, rooted=1;
-   int h=0,format=0, b[3]={0}, nrate=1, counts[NCATG];
-   int *siteorder=NULL;
-   char *tmpseq=NULL, *pc;
-   double birth=0, death=0, sample=1, mut=1, tlength, *space, *blengthBS;
-   double T,C,A,G,Y,R, Falias[NCATG];
-   int    Lalias[NCATG];
-
-   noisy = 1;
-   printf("\nReading options from data file %s\n", ctlf);
-   com.ncode = n = (com.seqtype==0 ? 4 : (com.seqtype==1?64:20));
-   fin = (FILE*)gfopen(ctlf,"r");
-   fscanf(fin, "%d", &format);
-   fgets(line, lline, fin);
-   printf("\nSimulated data will go into %s.\n", seqf[format]);
-   if(format==2) printf("%s, %s, & %s will be appended if existent.\n", paupstart,paupblock,paupend);
-
-   fscanf (fin, "%d", &i);
-   fgets(line, lline, fin);
-   SetSeed(i, 1);
-   fscanf (fin, "%d%d%d", &com.ns, &com.ls, &nr);
-   fgets(line, lline, fin);
-   i=(com.ns*2-1)*sizeof(struct TREEN);
-   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
-
-   if(com.ns>NS) error2("too many seqs?");
-   printf ("\n%d seqs, %d sites, %d replicate(s)\n", com.ns, com.ls, nr);
-   k=(com.ns*com.ls* (com.seqtype==CODONseq?4:1) *nr)/1000+1;
-   printf ("Seq file will be about %dK bytes.\n",k);
-   for(i=0; i<com.ns; i++)          /* default spname */
-      sprintf(com.spname[i],"S%d",i+1);
-
-   if(fixtree) {
-      fscanf(fin, "%lf", &tlength);   fgets(line, lline, fin);
-      if(ReadTreeN(fin, &i, &j, 1, 1))  /* might overwrite spname */
-         error2("err tree..");
-
-      if(i==0) error2("use : to specify branch lengths in tree");
-      for(i=0,T=0; i<tree.nnode; i++) 
-         if(i!=tree.root) T += nodes[i].branch;
-      if(tlength>0) {
-         for(i=0; i<tree.nnode; i++) 
-            if(i!=tree.root) nodes[i].branch *= tlength/T;
-      }
-      printf("tree length = %.3f\n", (tlength>0?tlength:T));
-      if(com.ns<100) {
-         printf("\nModel tree & branch lengths:\n"); 
-         OutTreeN(F0,1,1); FPN(F0);
-         OutTreeN(F0,0,1); FPN(F0);
-      }
-      if(com.seqtype==CODONseq && com.model && !com.NSsites) { /* branch model */
-         FOR(i,tree.nnode) nodes[i].omega=nodes[i].label;
-         FPN(F0);  OutTreeN(F0, 1, PrBranch|PrLabel);  FPN(F0);
-      }
-   }
-   else {   /* random trees, broken or need testing? */
-      printf ("\nbirth rate, death rate, sampling fraction, mutation rate (tree height)?\n");
-      fscanf (fin, "%lf%lf%lf%lf", &birth, &death, &sample, &mut);
-      fgets(line, lline, fin);
-      printf("%9.4f %9.4f %9.4f %9.4f\n", birth, death, sample, mut);
-   }
-
-   if(com.seqtype==BASEseq) {
-      fscanf(fin,"%d", &com.model);
-      fgets(line, lline, fin);
-      if(com.model<0 || com.model>REV) error2("model err");
-      if(com.model==T92) error2("T92: please use HKY85 with T=A and C=G.");
-
-      printf("\nModel: %s\n", basemodels[com.model]);
-      if(com.model==REV)        nrate=5;
-      else if(com.model==TN93)  nrate=2;
-      FOR(i,nrate) fscanf(fin,"%lf",&Qrates[i]);
-      fgets(line, lline, fin);
-      if(nrate<=2) FOR(i,nrate) printf("kappa %9.5f\n",Qrates[i]); FPN(F0);
-      if(nrate==5) {
-         printf("a & b & c & d & e: ");
-         FOR(i,nrate) printf("%9.5f",Qrates[i]); FPN(F0);
-      }
-      if((com.model==JC69 || com.model==F81)&&Qrates[0]!=1) 
-         error2("kappa should be 1 for this model");
-   }
-   else if(com.seqtype==CODONseq) {
-      for(i=0; i<64; i++) 
-         getcodon(CODONs[i], i);
-      if(com.model==0 && com.NSsites) {  /* site model */
-         fscanf(fin,"%d", &com.ncatG);   fgets(line, lline, fin);
-         if(com.ncatG>NCATG) error2("ncatG>NCATG");
-         FOR(i,com.ncatG) fscanf(fin,"%lf",&com.freqK[i]);  fgets(line, lline, fin);
-         FOR(i,com.ncatG) fscanf(fin,"%lf",&com.rK[i]);     fgets(line, lline, fin);
-         printf("\n\ndN/dS (w) for site classes (K=%d)", com.ncatG);
-         printf("\nf: ");  FOR(i,com.ncatG) printf("%9.5f",com.freqK[i]);
-         printf("\nw: ");  FOR(i,com.ncatG) printf("%9.5f",com.rK[i]);  FPN(F0);
-      }
-      else if(com.model && com.NSsites) {  /* branchsite model */
-         fscanf(fin,"%d",&com.ncatG);   fgets(line, lline, fin);
-         if(com.ncatG>min2(NCATG,127)) error2("ncatG too large");
-         FOR(i,com.ncatG) fscanf(fin,"%lf",&com.freqK[i]);  fgets(line,lline,fin);
-         printf("\n%d site classes.\nFreqs: ", com.ncatG);
-         FOR(i,com.ncatG) printf("%9.5f",com.freqK[i]);
-
-         if((com.omegaBS=(double*)malloc((com.ncatG+2)*tree.nnode*sizeof(double)))==NULL)
-            error2("oom");
-         com.QfactorBS = com.omegaBS + com.ncatG*tree.nnode;
-         blengthBS = com.QfactorBS + tree.nnode;
-
-         for(i=0; i<tree.nnode; i++)
-            blengthBS[i] = nodes[i].branch;
-         for(k=0; k<com.ncatG; k++) {
-            ReadTreeN(fin, &i, &j, 0, 1);
-            if(i) error2("do not include branch lengths except in the first tree.");
-            if(!j) error2("Use # to specify omega's for branches");
-            for(i=0; i<tree.nnode; i++)  com.omegaBS[i*com.ncatG+k]=nodes[i].label;
-         }
-         for(i=0; i<tree.nnode; i++)
-            { nodes[i].branch=blengthBS[i];  nodes[i].label=nodes[i].omega=0; }
-         for(i=0; i<tree.nnode; i++) {  /* print out omega as node labels. */
-            nodes[i].nodeStr=pc=(char*)malloc(20*com.ncatG*sizeof(char));
-            sprintf(pc, "'[%.2f", com.omegaBS[i*com.ncatG+0]);
-            for(k=1,pc+=strlen(pc); k<com.ncatG; k++,pc+=strlen(pc)) 
-               sprintf(pc, ", %.2f", com.omegaBS[i*com.ncatG+k]);
-            sprintf(pc, "]'");
-         }
-         FPN(F0);  OutTreeN(F0,1,PrBranch|PrLabel);  FPN(F0);
-      }
-      else if(com.model==0) {  /* M0 */
-         fscanf(fin,"%lf",&com.omega);
-         fgets(line, lline, fin);
-         printf("omega = %9.5f\n",com.omega);
-         for(i=0; i<tree.nbranch; i++) 
-            nodes[tree.branches[i][1]].omega = com.omega;
-      }
-
-      fscanf(fin, "%lf", &com.kappa);   fgets(line, lline, fin);
-      printf("kappa = %9.5f\n",com.kappa);
-   }
-
-   if(com.seqtype==BASEseq || com.seqtype==AAseq) {
-      fscanf(fin,"%lf%d", &com.alpha, &com.ncatG);
-      fgets(line, lline, fin);
-      if(com.alpha) 
-        printf("Gamma rates, alpha =%.4f (K=%d)\n", com.alpha, com.ncatG);
-      else { 
-         com.ncatG=0; 
-         puts("Rates are constant over sites."); 
-      }
-   }
-   if(com.alpha || com.ncatG) { /* this is used for codon NSsites as well. */
-      k = com.ls;
-      if(com.seqtype==1 && com.model && com.NSsites) k *= tree.nnode;
-      if((com.siterates=(double*)malloc(k*sizeof(double)))==NULL) error2("oom1");
-      if((siteorder=(int*)malloc(com.ls*sizeof(int)))==NULL) error2("oom2");
-   }
-
-   if(com.seqtype==AAseq) { /* get aa substitution model and rate matrix */
-      fscanf(fin,"%d",&com.model);
-      printf("\nmodel: %s",aamodels[com.model]); 
-      if(com.model>=2)  { fscanf(fin,"%s",com.daafile); GetDaa(NULL,com.daa); }
-      fgets(line, lline, fin);
-   }
-
-   /* get freqs com.pi[] */
-   if((com.seqtype==BASEseq && com.model>K80) ||
-       com.seqtype==CODONseq ||
-      (com.seqtype==AAseq && (com.model==1 || com.model==3)))
-         for(k=0; k<com.ncode; k++) fscanf(fin,"%lf", &com.pi[k]);
-   else if(com.model==0 || (com.seqtype==BASEseq && com.model<=K80)) 
-      fillxc(com.pi, 1./com.ncode, com.ncode);
-
-   printf("sum pi = 1 = %.6f:", sum(com.pi,com.ncode));
-   matout2(F0, com.pi, com.ncode/4, 4, 9, 6);
-   if(com.seqtype==CODONseq) {
-      fscanf(fin, "%d", &com.icode);   fgets(line, lline, fin);
-      printf("genetic code = %d\n", com.icode);
-      for(k=0; k<com.ncode; k++) 
-         if(GeneticCode[com.icode][k] == -1 && com.pi[k]) 
-            error2("stop codons should have frequency 0?");
-   }
-   
-   if(com.seqtype==BASEseq) {
-      if(com.model<REV) {
-         T=com.pi[0]; C=com.pi[1]; A=com.pi[2]; G=com.pi[3]; Y=T+C; R=A+G;
-         if (com.model==F84) { 
-            Qrates[1]=1+Qrates[0]/R;   /* kappa2 */
-            Qrates[0]=1+Qrates[0]/Y;   /* kappa1 */
-         }
-         else if (com.model<=HKY85) Qrates[1]=Qrates[0];
-         Qfactor = 1/(2*T*C*Qrates[0] + 2*A*G*Qrates[1] + 2*Y*R);
-      }
-      else
-         if(com.model==REV) EigenQbase(Qrates, com.pi, Root,U,V,PMat);
-   }
-
-   /* get Qfactor for NSsites & NSbranchsite models */
-   if(com.seqtype==CODONseq && com.NSsites) {
-      if(!com.model) {  /* site models */
-         for(k=0,Qfactor=0; k<com.ncatG; k++) {
-            freqK_NS=com.freqK[k];
-            EigenQcodon(1, com.kappa,com.rK[k],com.pi, NULL,NULL,NULL, PMat);
-         }
-         Qfactor=1/Qfactor;
-         printf("Qfactor for NSsites model = %9.5f\n", Qfactor);
-      }
-      else {            /* branch-site models */
-         for(i=0; i<tree.nnode; i++) {
-            if(i==tree.root) { com.QfactorBS[i]=-1; continue; }
-            for(k=0,Qfactor=0; k<com.ncatG; k++) {
-               freqK_NS=com.freqK[k];
-               EigenQcodon(1, com.kappa,com.omegaBS[i*com.ncatG+k],com.pi, NULL,NULL,NULL, PMat);
-            }
-            com.QfactorBS[i]=1/Qfactor;  Qfactor=0;
-            printf("node %2d: Qfactor = %9.5f\n", i+1, com.QfactorBS[i]);
-         }
-      }
-   }
-   if(com.seqtype==CODONseq && com.ncatG<=1 && com.model==0)
-      EigenQcodon(0, com.kappa,com.omega, com.pi, Root, U, V, PMat);
-   else if(com.seqtype==AAseq)
-      EigenQaa(com.pi, Root, U, V,PMat);
-
-   puts("\nAll parameters are read.  Ready to simulate\n");
-   for(j=0; j<com.ns*2-1; j++)
-      com.z[j] = (unsigned char*)malloc(com.ls*sizeof(unsigned char));
-   sspace = max2(sspace, 8000000);
-   space  = (double*)malloc(sspace);
-   if(com.alpha || com.ncatG) tmpseq=(char*)space;
-   if (com.z[com.ns*2-1-1]==NULL) error2("oom for seqs");
-   if (space==NULL) {
-      printf("oom for space, %d bytes needed.", sspace);
-      exit(-1);
-   }
-
-   fseq = gfopen(seqf[format], "w");
-   if(format==2 || format==3) appendfile(fseq, paupstart);
-   
-   fanc = (FILE*)gfopen(ancf, "w");
-   if(fixtree) {
-      fputs("\nAncestral sequences generated during simulation ",fanc);
-      fprintf(fanc, "(check against %s)\n", seqf[format]);
-      OutTreeN(fanc,0,0); FPN(fanc); OutTreeB(fanc); FPN(fanc);
-   }
-   if(com.alpha || com.NSsites) {
-      fsiteID=(FILE*)gfopen(siteIDf,"w");
-      if(com.seqtype==1) fprintf(fsiteID, "\nSite class IDs\n");
-      else               fprintf(fsiteID, "\nRates for sites\n");
-      if(com.seqtype==CODONseq && com.NSsites) {
-         if(!com.model) matout(fsiteID,com.rK, 1,com.ncatG);
-         if((com.siteID=(char*)malloc(com.ls*sizeof(char)))==NULL) 
-            error2("oom siteID");
-      }
-   }
-
-   for (ir=0; ir<nr; ir++) {
-      if (!fixtree) {    /* right now tree is fixed */
-         RandomLHistory (rooted, space);
-         if (rooted && com.ns<10) j = GetIofLHistory ();
-         BranchLengthBD (1, birth, death, sample, mut);
-         if(com.ns<20) { 
-            printf ("\ntree used: "); 
-            OutTreeN(F0,1,1);
-            FPN(F0); 
-         }
-      }
-      MakeSeq(com.z[tree.root], com.ls);
-
-      if (com.alpha)
-         Rates4Sites(com.siterates, com.alpha, com.ncatG, com.ls, 0,space);
-      else if(com.seqtype==1 && com.NSsites) { /* for NSsites */
-         /* the table for the alias algorithm is the same, but ncatG is small. */
-         MultiNomialAliasSetTable(com.ncatG, com.freqK, Falias, Lalias, space);
-         MultiNomialAlias(com.ls, com.ncatG, Falias, Lalias, counts);
-
-         for (i=0,h=0; i<com.ncatG; i++)
-            for (j=0; j<counts[i]; j++) {
-               com.siteID[h]=(char)i;
-               com.siterates[h++]=com.rK[i]; /* overwritten later for branchsite */
-            }
-      }
-
-      Evolve1(tree.root);
-
-      /* randomize sites for site-class model */
-      if(com.siterates && com.ncatG>1) {
-         if(format==1 && ir==0) 
-            puts("\nrequested site pattern counts as output for site-class model.\n");
-         randorder(siteorder, com.ls, (int*)space);
-         for(j=0; j<tree.nnode; j++) {
-            memcpy(tmpseq,com.z[j],com.ls*sizeof(char));
-            for(h=0; h<com.ls; h++) com.z[j][h]=tmpseq[siteorder[h]];
-         }
-         if(com.alpha || com.ncatG>1) {
-            memcpy(space,com.siterates,com.ls*sizeof(double));
-            for(h=0; h<com.ls; h++) com.siterates[h]=space[siteorder[h]];
-         }
-         if(com.siteID) {
-            memcpy((char*)space,com.siteID,com.ls*sizeof(char));
-            for(h=0; h<com.ls; h++) com.siteID[h]=*((char*)space+siteorder[h]);
-         }
-      }
-
-      /* print sequences*/
-      if(format==1 || format==3) {
-         for(i=0; i<com.ns; i++) for(h=0; h<com.ls; h++)    com.z[i][h] ++;  /* coded as 1, 2, ... */
-         PatternWeightSimple();
-         for(i=0; i<com.ns; i++) for(h=0; h<com.npatt; h++) com.z[i][h] --;  /* coded as 0, 1, ... */
-         if(format==3) 
-            PatternWeightJC69like();
-      }
-      if(format==2 || format==3) fprintf(fseq,"\n\n[Replicate # %d]\n", ir+1);
-      printSeqs(fseq, NULL, NULL, format); /* printsma not usable as it codes into 0,1,...,60. */
-
-      if((format==2 || format==3) && !fixtree) {
-         fprintf(fseq,"\nbegin tree;\n   tree true_tree = [&U] "); 
-         OutTreeN(fseq,1,1); fputs(";\n",fseq);
-         fprintf(fseq,"end;\n\n");
-      }
-      if(format==2 || format==3) appendfile(fseq, paupblock);
-
-      /* print ancestral seqs, rates for sites. */
-      if(format!=1 && format!=3) {  /* don't print ancestors if site patterns are printed. */
-         j = (com.seqtype==CODONseq?3*com.ls:com.ls);
-         fprintf(fanc,"[replicate %d]\n",ir+1);
-
-         if(!fixtree) {
-            if(format<2)
-               { OutTreeN(fanc,1,1); FPN(fanc); FPN(fanc); }
-         }
-         else {
-            fprintf(fanc,"%6d %6d\n",tree.nnode-com.ns,j);
-            for(j=com.ns; j<tree.nnode; j++,FPN(fanc)) {
-               fprintf(fanc,"node%-26d  ", j+1);
-               print1seq(fanc, com.z[j], com.ls, NULL);
-            }
-            FPN(fanc);
-
-            if(fsiteID) {
-               if(com.seqtype==CODONseq && com.NSsites && com.model==0) { /* site model */
-                  k=0;
-                  if(com.rK[com.ncatG-1]>1)
-                     FOR(h,com.ls) if(com.rK[com.siteID[h]]>1) k++;
-                  fprintf(fsiteID, "\n[replicate %d: %2d]\n",ir+1, k);
-                  if(k)  for(h=0,k=0; h<com.ls; h++) {
-                     if(com.rK[com.siteID[h]]>1) { 
-                        fprintf(fsiteID,"%4d ",h+1); 
-                        if(++k%15==0) FPN(fsiteID);
-                     }
-                  }
-                  FPN(fsiteID);
-               }
-               else if(com.seqtype==CODONseq && com.NSsites && com.model) { /* branchsite */
-                  fprintf(fsiteID, "\n[replicate %d]\n",ir+1);
-                  for(h=0; h<com.ls; h++) {
-                     fprintf(fsiteID," %4d ", com.siteID[h]+1);
-                     if(h==com.ls-1 || (h+1)%15==0) FPN(fsiteID);
-                  }
-               }
-               else {       /* gamma rates */
-                  fprintf(fsiteID,"\n[replicate %d]\n",ir+1);
-                  for(h=0; h<com.ls; h++) {
-                     fprintf(fsiteID,"%7.4f ",com.siterates[h]);
-                     if(h==com.ls-1 || (h+1)%10==0) FPN(fsiteID);
-                  }
-               }
-            }
-         }
-      }
-
-      printf ("\rdid data set %d %s", ir+1, (com.ls>100000||nr<100? "\n" : ""));
-   }   /* for (ir) */
-   if(format==2 || format==3) appendfile(fseq, paupend);
-
-   fclose(fseq);  if(!fixtree) fclose(fanc);  
-   if(com.alpha || com.NSsites) fclose(fsiteID);
-   for(j=0; j<com.ns*2-1; j++) free(com.z[j]);
-   free(space);
-   if(com.model && com.NSsites) /* branch-site model */
-      for(i=0; i<tree.nnode; i++)  free(nodes[i].nodeStr);
-   free(nodes);
-   if(com.alpha || com.ncatG) { 
-      free(com.siterates);  com.siterates=NULL;
-      free(siteorder);
-      if(com.siteID) free(com.siteID);  com.siteID=NULL;
-   }
-   if(com.seqtype==1 && com.model && com.NSsites) free(com.omegaBS); 
-   com.omegaBS = NULL;
-
-   exit (0);
-}
-
-
-int GetSpnamesFromMB (FILE *fmb, char line[], int lline)
-{
-/* This reads species names from MrBayes output file fmb, like the following.
-
-      Taxon  1 -> 1_Arabidopsis_thaliana
-      Taxon  2 -> 2_Taxus_baccata
-*/
-   int j, ispecies;
-   char *p=NULL, *mbstr1="Taxon ", *mbstr2="->";
-
-   puts("Reading species names from mb output file.\n");
-   rewind(fmb);
-   for(ispecies=0; ; ) {
-      if(fgets(line, lline, fmb)==NULL) return(-1);
-      if(strstr(line, mbstr1) && strstr(line, mbstr2)) {
-         p=strstr(line, mbstr1)+5;
-         sscanf(p, "%d", &ispecies);
-         p=strstr(line, mbstr2)+3;
-         if(com.spname[ispecies-1][0]) 
-            error2("species name already read?");
-
-         for(j=0; isgraph(*p)&&j<lline; ) com.spname[ispecies-1][j++] = *p++;
-         com.spname[ispecies-1][j]=0;
-
-         printf("\tTaxon %2d:  %s\n", ispecies, com.spname[ispecies-1]);
-      }
-      else if (ispecies)
-         break;
-   }
-   com.ns=ispecies;
-   rewind(fmb);
-
-   return(0);
-}
-
-char *GrepLine (FILE*fin, char*query, char* line, int lline)
-{
-/* This greps infile to search for query[], and returns NULL or line[].
-*/
-   char *p=NULL;
-
-   rewind(fin);
-   for( ; ; ) {
-      if(fgets(line, lline, fin)==NULL) return(NULL);
-      if(strstr(line, query)) return(line);
-   }
-   return(NULL);
-}
-
-
-void CladeMrBayesProbabilities (char treefile[])
-{
-/* This reads a tree from treefile and then scans a set of MrBayes output files
-   (mbfiles) to retrieve posterior probabilities for every clade in that tree.
-   It first scans the first mb output file to get the species names.
-
-   Sample mb output:
-   6 -- ...........................*************   8001 1.000 0.005 (0.000)
-   7 -- ....................********************   8001 1.000 0.006 (0.000)
-
-   Note 4 Jan 2014: This uses parti2B[], and is broken after i rewrote 
-   Tree2Partition().  
-*/
-   int lline=100000, i,j,k, nib, inode, parti2B[NS];
-   char line[100000], *partition, *p;
-   char symbol[2]=".*", cladestr[NS+1]={0};
-   FILE *ftree, *fmb[20];
-   double *Pclade, t;
-/*
-   int nmbfiles=15;
-   char *mbfiles[]={"mb-1e-5.out", "mb-2e-5.out", "mb-3e-5.out", "mb-4e-5.out",
-"mb-5e-5.out", "mb-6e-5.out", "mb-7e-5.out", "mb-8e-5.out",
-"mb-9e-5.out", "mb-1e-4.out", "mb-2e-4.out", "mb-3e-4.out",
-"mb-5e-4.out", "mb-1e-3.out", "mb-1e-2.out"};
-*/
-   int nmbfiles=2;
-   char *mbfiles[]={"mb-1e-4.out", "mb-1e-1.out"};
-
-   printf("tree file is %s\nmb output files:\n", treefile);
-   ftree=gfopen(treefile,"r");
-   for(k=0; k<nmbfiles; k++)
-      fmb[k]=gfopen(mbfiles[k],"r");
-   for(k=0; k<nmbfiles; k++) printf("\t%s\n", mbfiles[k]);
-
-   GetSpnamesFromMB(fmb[0], line, lline);  /* read species names from mb output */
-
-   fscanf (ftree, "%d%d", &i, &k);
-   if(i && i!=com.ns) error2("do you mean to specify ns in the tree file?");
-   i=(com.ns*2-1)*sizeof(struct TREEN);
-   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
-   ReadTreeN (ftree, &i, &j, 0, 1);
-
-   FPN(F0);  OutTreeN(F0, 0, 0);  FPN(F0);  FPN(F0);
-   nib=tree.nbranch-com.ns;
-   for(i=0;i<tree.nnode;i++) {
-      nodes[i].nodeStr = NULL;
-      if(i>com.ns) nodes[i].nodeStr=(char*)malloc(100*sizeof(char));
-   }
-
-   partition=(char*)malloc(nib*com.ns*sizeof(char));
-   if (partition==NULL) error2("oom");
-   if((Pclade=(double*)malloc(nib*nmbfiles*sizeof(double)))==NULL)
-      error2("oom");
-   for(i=0;i<nib*nmbfiles; i++) Pclade[i]=0;
-
-   Tree2Partition(partition);
-
-   for(i=0; i<nib; i++) {
-      inode=tree.branches[parti2B[i]][1];
-      if(partition[i*com.ns+0])
-         for(j=0; j<com.ns; j++) cladestr[j]=symbol[1-partition[i*com.ns+j]];
-      else
-         for(j=0; j<com.ns; j++) cladestr[j]=symbol[partition[i*com.ns+j]];
-      printf("#%2d branch %2d node %2d  %s", i+1, parti2B[i], inode, cladestr);
-
-      for(k=0; k<nmbfiles; k++) {
-         if(GrepLine(fmb[k], cladestr, line, lline)) {
-            p=strstr(line,cladestr);
-            sscanf(p+com.ns, "%lf%lf\0", &t, &Pclade[i*nmbfiles+k]);
-         }
-      }
-      for(k=0; k<nmbfiles; k++) printf("%6.2f", Pclade[i*nmbfiles+k]);
-      FPN(F0);
-      for(k=0,p=nodes[inode].nodeStr; k<nmbfiles; k++) {
-         sprintf(p, "%3.0f%s", Pclade[i*nmbfiles+k]*100,(k<nmbfiles-1?"/":""));
-         p+=4;
-      }
-   }
-   FPN(F0);  OutTreeN(F0,1,PrLabel);  FPN(F0);
-
-   for(i=0; i<tree.nnode; i++) free(nodes[i].nodeStr);
-   free(nodes); free(partition);  free(Pclade);
-   fclose(ftree);   
-   for(k=0; k<nmbfiles; k++) fclose(fmb[k]);
-   exit(0);
-}
+/* evolver.c
+   Copyright, Ziheng Yang, April 1995.
+
+     cl -Ot -O2 evolver.c tools.c
+     cl -Ot -O2 -DCodonNSbranches    -FeevolverNSbranches.exe    evolver.c tools.c
+     cl -Ot -O2 -DCodonNSsites       -FeevolverNSsites.exe       evolver.c tools.c
+     cl -Ot -O2 -DCodonNSbranchsites -FeevolverNSbranchsites.exe evolver.c tools.c
+
+     cc -fast -o evolver evolver.c tools.c -lm
+     cc -O4 -DCodonNSbranches -o evolverNSbranches evolver.c tools.c -lm
+     cc -O4 -DCodonNSsites -o evolverNSsites evolver.c tools.c -lm
+     cc -O4 -DCodonNSbranchsites -o evolverNSbranchsites evolver.c tools.c -lm
+
+     evolver
+     evolver 5 MCbase.dat
+     evolver 6 MCcodon.dat
+     evolver 7 MCaa.dat
+     evolver 9 <TreesFile> <MasterTreeFile>
+*/
+
+/*
+#define CodonNSbranches
+#define CodonNSsites
+#define CodonNSbranchsites
+*/
+
+#include "paml.h"
+
+#define NS            5000
+#define NBRANCH       (NS*2-2)
+#define MAXNSONS      20
+#define LSPNAME       50
+#define NCODE         64
+#define NCATG         40
+
+
+struct CommonInfo {
+   unsigned char *z[2*NS-1];
+   char spname[NS][LSPNAME+1], daafile[512], cleandata, readpattern;
+   int ns, ls, npatt, np, ntime, ncode, clock, rooted, model, icode;
+   int seqtype, *pose, ncatG, NSsites;
+   int ngene, lgene[1], posG[1+1];  /* not used */
+   double piG[1][4], rgene[1];  /* not used */
+   double *fpatt, kappa, omega, alpha, pi[64], *conP, daa[20*20];
+   double freqK[NCATG], rK[NCATG];
+   char *siteID;    /* used if ncatG>1 */
+   double *siterates;   /* rates for gamma or omega for site or branch-site models */
+   double *omegaBS, *QfactorBS;     /* omega IDs for branch-site models */
+}  com;
+struct TREEB {
+   int nbranch, nnode, root, branches[NBRANCH][2];
+}  tree;
+struct TREEN {
+   int father, nson, sons[MAXNSONS], ibranch;
+   double branch, age, omega, label, *conP;
+   char *nodeStr, fossil;
+}  *nodes;
+
+extern char BASEs[];
+extern int GeneticCode[][64], noisy;
+int LASTROUND=0; /* not used */
+
+#define EVOLVER
+#define NODESTRUCTURE
+#define BIRTHDEATH
+#include "treesub.c"
+#include "treespace.c"
+
+void TreeDistances(FILE* fout);
+void Simulate(char *ctlf);
+void MakeSeq(char *z, int ls);
+int EigenQbase(double rates[], double pi[], double Root[],double U[], double V[],double Q[]);
+int EigenQcodon (int getstats, double kappa,double omega,double pi[], double Root[], double U[], double V[], double Q[]);
+int EigenQaa(double pi[], double Root[], double U[], double V[],double Q[]);
+void CladeMrBayesProbabilities (char treefile[]);
+int between_f_and_x(void);
+void LabelClades(FILE *fout);
+
+char *MCctlf0[]={"MCbase.dat","MCcodon.dat","MCaa.dat"};
+char *seqf[]={"mc.paml", "mc.paml", "mc.nex", "mc.nex"};
+
+enum {JC69, K80, F81, F84, HKY85, T92, TN93, REV} BaseModels;
+char *basemodels[]={"JC69","K80","F81","F84","HKY85","T92","TN93","REV"};
+enum {Poisson, EqualInput, Empirical, Empirical_F} AAModels;
+char *aamodels[]={"Poisson", "EqualInput", "Empirical", "Empirical_F"};
+
+
+double PMat[NCODE*NCODE], U[NCODE*NCODE], V[NCODE*NCODE], Root[NCODE];
+static double Qfactor=-1, Qrates[5];  /* Qrates[] hold kappa's for nucleotides */
+
+
+int main (int argc, char*argv[])
+{
+   char *MCctlf=NULL, outf[512]="evolver.out", treefile[512]="mcmc.txt", mastertreefile[512]="\0";
+   int i, option=-1, ntree=1,rooted, BD=0, gotoption=0, pick1tree=-1;
+   double bfactor=1, birth=-1,death=-1,sample=-1,mut=-1, *space;
+   FILE *fout=gfopen(outf,"w");
+
+   printf("EVOLVER in %s\n", pamlVerStr);
+   com.alpha=0; com.cleandata=1; com.model=0; com.NSsites=0;
+
+   if(argc>2 && !strcmp(argv[argc-1], "--stdout-no-buf"))
+      setvbuf(stdout, NULL, _IONBF, 0);
+   if(argc>1) {
+      gotoption=1;   sscanf(argv[1], "%d", &option);
+   }
+   if(argc==1)
+      printf("Results for options 1-4 & 8 go into %s\n",outf);
+   else if(option!=5 && option!=6 && option!=7 && option!=9) {
+      puts("Usage: \n\tevolver \n\tevolver option# MyDataFile"); exit(-1); 
+   }
+   if(option>=4 && option<=6)
+      MCctlf = argv[2];
+   else if(option==9) {
+      strcpy(treefile, argv[2]);
+      if(argc>3) strcpy(mastertreefile, argv[3]);
+      if(argc>4) sscanf(argv[4], "%d", &pick1tree);
+   }
+
+#if defined (CodonNSbranches)
+   option=6;  com.model=1; 
+   MCctlf = (argc==3 ? argv[2] : "MCcodonNSbranches.dat");
+   gotoption = 1;
+#elif defined (CodonNSsites)
+   option=6;  com.NSsites=3; 
+   MCctlf = (argc==3 ? argv[2] : "MCcodonNSsites.dat");
+   gotoption = 1;
+#elif defined (CodonNSbranchsites)
+   option=6;  com.model=1; com.NSsites=3; 
+   MCctlf = (argc==3 ? argv[2] : "MCcodonNSbranchsites.dat");
+   gotoption = 1;
+#endif
+
+   if(!gotoption) {
+      for(; ;) {
+         fflush(fout);
+         printf("\n\t(1) Get random UNROOTED trees?\n"); 
+         printf("\t(2) Get random ROOTED trees?\n"); 
+         printf("\t(3) List all UNROOTED trees?\n");
+         printf("\t(4) List all ROOTED trees?\n");
+         printf("\t(5) Simulate nucleotide data sets (use %s)?\n",MCctlf0[0]);
+         printf("\t(6) Simulate codon data sets      (use %s)?\n",MCctlf0[1]);
+         printf("\t(7) Simulate amino acid data sets (use %s)?\n",MCctlf0[2]);
+         printf("\t(8) Calculate identical bi-partitions between trees?\n");
+         printf("\t(9) Calculate clade support values (evolver 9 treefile mastertreefile <pick1tree>)?\n");
+         printf("\t(11) Label clades?\n");
+         printf("\t(0) Quit?\n");
+
+         option = 9;
+         scanf("%d", &option);
+
+         if(option==0) exit(0);
+         if(option>=5 && option<=7) break;
+         if(option<5)  { 
+            printf ("No. of species: ");
+            scanf ("%d", &com.ns);
+         }
+         if(com.ns>NS) error2 ("Too many species.  Raise NS.");
+         if((space=(double*)malloc(10000*sizeof(double)))==NULL) error2("oom");
+         rooted = !(option%2);
+         if(option<3) {
+            printf("\nnumber of trees & random number seed? ");
+            scanf("%d%d", &ntree, &i);
+            SetSeed(i, 1);
+            printf ("Want branch lengths from the birth-death process (0/1)? ");
+            scanf ("%d", &BD);
+         }
+         if(option<=4) {
+            if(com.ns<3) error2("no need to do this?");
+            i = (com.ns*2-1)*sizeof(struct TREEN);
+            if((nodes=(struct TREEN*)malloc(i)) == NULL) 
+               error2("oom");
+         }
+         switch (option) {
+         case(1):   /* random UNROOTED trees */
+         case(2):   /* random ROOTED trees */
+            /* default names */
+            if(com.ns<=52)
+               for(i=0; i<com.ns; i++)  sprintf(com.spname[i], "%c", (i<26 ? 'A'+i : 'a'+i-26));
+            else
+               for(i=0; i<com.ns; i++)  sprintf(com.spname[i], "S%d", i+1);
+
+            if(BD) {
+               printf ("\nbirth rate, death rate, sampling fraction, and ");
+               printf ("mutation rate (tree height)?\n");
+               scanf ("%lf%lf%lf%lf", &birth, &death, &sample, &mut);
+            }
+            for(i=0;i<ntree;i++) {
+               RandomLHistory (rooted, space);
+               if(BD)
+                  BranchLengthBD (1, birth, death, sample, mut);
+               if(com.ns<20&&ntree<10) { OutTreeN(F0, 0, BD); puts("\n"); }
+               OutTreeN(fout, 1, BD);  FPN(fout);
+            }
+            /*
+            for (i=0; i<com.ns-2-!rooted; i++)
+               Ib[i] = (int)((3.+i)*rndu());
+            MakeTreeIb (com.ns, Ib, rooted);
+            */
+            break;
+         case(3):
+         case(4): 
+            ListTrees(fout, com.ns, rooted);
+            break;
+         case(8):  TreeDistances(fout);  break;
+         case(9):  
+            printf("tree file names? ");
+            scanf("%s%s", treefile, mastertreefile);
+            break;
+         case(10): between_f_and_x();    break;
+         case(11): LabelClades(fout);    break;
+         default:  exit(0);
+         }
+      }
+   }
+
+   if(option>=5 && option<=7) {
+      com.seqtype = option-5;  /* 0, 1, 2 for bases, codons, & amino acids */
+      Simulate(MCctlf ? MCctlf : MCctlf0[option-5]);
+   }
+   else if(option==9) {
+      CladeSupport(fout, treefile, 1, mastertreefile, pick1tree);
+      /* CladeMrBayesProbabilities("/papers/BPPJC3sB/Karol.trees"); */
+   }
+   return(0);
+}
+
+
+int between_f_and_x (void)
+{
+/* this helps with the exponential transform for frequency parameters */
+   int i,n,fromf=0;
+   double x[100];
+
+   for(;;) {
+      printf("\ndirection (0:x=>f; 1:f=>x; -1:end)  &  #classes? ");
+      scanf("%d",&fromf);    
+      if(fromf==-1) return(0);
+      scanf("%d", &n);  if(n>100) error2("too many classes");
+      printf("input the first %d values for %s? ",n-1,(fromf?"f":"x"));
+      FOR(i,n-1) scanf("%lf",&x[i]);
+      x[n-1]=(fromf?1-sum(x,n-1):0);
+      f_and_x(x, x, n, fromf, 1);
+      matout(F0,x,1,n);
+   }
+}
+
+
+void LabelClades(FILE *fout)
+{
+/* This reads in a tree and scan species names to check whether they form a 
+   paraphyletic group and then label the clade.
+   It assumes that the tree is unrooted, and so goes through two rounds to check
+   whether the remaining seqs form a monophyletic clade.
+*/
+   FILE *ftree;
+   int unrooted=1,iclade, sizeclade, mrca, paraphyl, is, imrca, i,j,k, lasts, haslength;
+   char key[96]="A", treef[64]="/A/F/flu/HA.all.prankcodon.tre", *p,chosen[NS], *endstr="end";
+   int *anc[NS-1], loc, bitmask, SI=sizeof(int)*8;
+   int debug;
+
+   printf("Tree file name? ");
+   scanf ("%s", treef);
+   printf("Treat tree as unrooted (0 no, 1 yes)? ");
+   scanf ("%d", &unrooted);
+
+   ftree = gfopen (treef,"r");
+   fscanf (ftree, "%d%d", &com.ns, &j);
+   if(com.ns<=0) error2("need ns in tree file");
+   debug = (com.ns<20);
+
+   i = (com.ns*2-1)*sizeof(struct TREEN);
+   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
+   for(i=0; i<com.ns*2-1; i++)  nodes[i].nodeStr = NULL;
+   for(i=0; i<com.ns-1; i++) {
+      anc[i] = (int*)malloc((com.ns/SI+1)*sizeof(int));
+      if(anc[i]==NULL)  error2("oom");
+   }
+   ReadTreeN(ftree, &haslength, &j, 1, 0);
+   fclose(ftree);
+   if(debug) { OutTreeN(F0, 1, PrNodeNum);  FPN(F0); }
+
+   for(iclade=0; iclade<com.ns-1; iclade++) {
+      printf("\nString for selecting sequences (followed by non-digit) (end to end)? ");
+      scanf("%s", key);
+      if(strcmp(endstr, key) == 0)
+         break;
+      for(i=0; i<com.ns; i++) 
+         chosen[i] = '\0';
+
+
+      k = strlen(key);
+      for(i=0; i<com.ns; i++) {
+         if( (p=strstr(com.spname[i], key)) 
+            && !isdigit(p[k]) )
+               chosen[i] = 1;
+      }
+
+      /*
+      for(i=0; i<com.ns; i++) 
+         if(strstr(com.spname[i], key)) chosen[i] = 1;
+      */
+
+      /* look for MRCA, going through two rounds, assuming unrooted tree */
+      for(imrca=0; imrca<1+unrooted; imrca++) {
+         if(imrca) 
+            for(i=0; i<com.ns; i++) chosen[i] = 1 - chosen[i]; 
+
+         for(i=0,sizeclade=0; i<com.ns; i++) 
+            if(chosen[i]) {
+               sizeclade ++;
+               lasts = i;
+            }
+
+         if(sizeclade <= 1 || sizeclade >= com.ns-1) {
+            puts("unable to form a clade.  <2 seqs.");
+            break;
+         }
+         for(i=0; i<com.ns-1; i++) for(j=0; j<com.ns/SI+1; j++) 
+            anc[i][j] = 0;
+         for(is=0; is<com.ns; is++) {
+            if(chosen[is]==0) continue;
+            loc = is/SI;  bitmask = 1 << (is%SI);
+            for(j=nodes[is].father; j!=-1; j=nodes[j].father) {
+               anc[j-com.ns][loc] |= bitmask;
+               if(is==lasts) {
+                  for(i=0,k=0; i<com.ns; i++)
+                     if(anc[j-com.ns][i/SI] & (1<<(i%SI)))
+                        k ++;
+                  if(k==sizeclade) {
+                     mrca = j;  break;
+                  }
+               }
+            }
+         }
+         if(imrca==0 && mrca!=tree.root) /* 1st round is enough */
+            break;
+      }
+
+      if(sizeclade <= 1 || sizeclade >= com.ns-1 || mrca==tree.root) {
+         printf("Unable to label.  Ignored.");
+         continue;
+      }
+
+      if(debug) 
+         for(is=0; is<com.ns-1; is++) {
+            printf("\nnode %4d: ", is+com.ns);
+            for(j=0; j<com.ns; j++) {
+               loc = j/SI;  bitmask = 1 << (j%SI);
+               printf(" %d", (anc[is][loc] & bitmask) != 0);
+            }
+         }
+
+      printf("\nClade #%d (%s): %d seqs selected, MRCA is %d\n", iclade+1, key, sizeclade, mrca+1);
+      for(is=0,paraphyl=0; is<com.ns; is++) {
+         if(chosen[is] == 0)
+            for(j=nodes[is].father; j!=-1; j=nodes[j].father)
+               if(j==mrca) { paraphyl++;  break; }
+      }
+      if(paraphyl) 
+         printf("\nThis clade is paraphyletic, & includes %d other sequences\n", paraphyl);
+
+      nodes[mrca].label = iclade+1;
+      if(debug) OutTreeN(F0, 1, haslength|PrLabel);
+   }
+
+   for(i=0; i<com.ns-1; i++)  free(anc[i]);
+   OutTreeN(fout, 1, haslength|PrLabel);  FPN(fout);
+   printf("Printed final tree with labels in evolver.out\n");
+   exit(0);
+}
+
+void TreeDistanceDistribution (FILE* fout)
+{
+/* This calculates figure 3.7 of Yang (2006).
+   This reads the file of all trees (such as 7s.all.trees), and calculates the 
+   distribution of partition distance in all pairwise comparisons.
+*/
+   int i,j,ntree, k,*nib, nsame, IBsame[NS], lpart=0;
+   char treef[64]="5s.all.trees", *partition;
+   FILE *ftree;
+   double mPD[NS], PD1[NS];  /* distribution of partition distances */
+
+   puts("Tree file name?");
+   scanf ("%s", treef);
+
+   ftree=gfopen (treef,"r");
+   fscanf (ftree, "%d%d", &com.ns, &ntree);
+   printf("%2d sequences %2d trees.\n", com.ns, ntree);
+   i=(com.ns*2-1)*sizeof(struct TREEN);
+   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
+
+   lpart = (com.ns-1)*com.ns*sizeof(char);
+   i = ntree*lpart;
+   printf("\n%d bytes of space requested.\n", i);
+   partition = (char*)malloc(i);
+   nib = (int*)malloc(ntree*sizeof(int));
+   if (partition==NULL || nib==NULL) error2("out of memory");
+
+   puts("\ntree #: mean prop of tree pairs with 0 1 2 ... shared bipartitions\n");
+   fputs("\ntree #: prop of tree pairs with 0 1 2 ... shared bipartitions\n",fout);
+   for (i=0; i<ntree; i++) {
+      ReadTreeN (ftree, &j, &k, 0, 1); 
+      nib[i]=tree.nbranch-com.ns;
+      Tree2Partition(partition+i*lpart);
+   }
+   for(k=0; k<com.ns-3; k++) mPD[k]=0;
+   for (i=0; i<ntree; i++,FPN(fout)) {
+      for(k=0; k<com.ns-3; k++) PD1[k]=0;
+      for (j=0; j<ntree; j++) {
+         if(j==i) continue;
+         nsame=NSameBranch(partition+i*lpart,partition+j*lpart, nib[i],nib[j],IBsame);
+         PD1[nsame] ++;
+      }
+      for(k=0; k<com.ns-3; k++) PD1[k] /= (ntree-1.);
+      for(k=0; k<com.ns-3; k++) mPD[k] = (mPD[k]*i+PD1[k])/(i+1.);
+      printf("%8d (%5.1f%%):", i+1,(i+1.)/ntree*100);
+      for(k=0; k<com.ns-3; k++) printf(" %7.4f", mPD[k]);
+      fprintf(fout, "%8d:", i+1);  for(k=0; k<com.ns-3; k++) fprintf(fout, " %7.4f", PD1[k]);
+      printf("%s", (com.ns<8||(i+1)%100==0 ? "\n" : "\r"));
+   }
+   free(partition); free(nodes); free(nib); fclose(ftree);
+   exit(0);
+}
+
+
+void TreeDistances (FILE* fout)
+{
+/* I think this is broken after i changed the routine Tree2Partition().
+*/
+   int i,j,ntree, k,*nib, parti2B[NS], nsame, IBsame[NS],nIBsame[NS], lpart=0;
+   char treef[64]="5s.all.trees", *partition;
+   FILE *ftree;
+   double psame, mp, vp;
+
+   /*
+   TreeDistanceDistribution(fout);
+   */
+
+   puts("\nNumber of identical bi-partitions between trees.\nTree file name?");
+   scanf ("%s", treef);
+
+   ftree=gfopen (treef,"r");
+   fscanf (ftree, "%d%d", &com.ns, &ntree);
+   printf("%2d sequences %2d trees.\n", com.ns, ntree);
+   i=(com.ns*2-1)*sizeof(struct TREEN);
+   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
+
+   if(ntree<2) error2("ntree");
+   printf ("\n%d species, %d trees\n", com.ns, ntree);
+   puts("\n\t1: first vs. rest?\n\t2: all pairwise comparisons?\n");
+   k=2;
+   scanf("%d", &k);
+
+   lpart=(com.ns-1)*com.ns*sizeof(char);
+   i=(k==1?2:ntree)*lpart;
+   printf("\n%d bytes of space requested.\n", i);
+   partition=(char*)malloc(i);
+   nib=(int*)malloc(ntree*sizeof(int));
+   if (partition==NULL || nib==NULL) error2("out of memory");
+
+   if(k==2) {    /* pairwise comparisons */
+      fputs("Number of identical bi-partitions in pairwise comparisons\n",fout);
+      for (i=0; i<ntree; i++) {
+         ReadTreeN (ftree, &j, &k, 0, 1); 
+         nib[i]=tree.nbranch-com.ns;
+         Tree2Partition(partition+i*lpart);
+      }
+      for (i=0; i<ntree; i++,FPN(F0),FPN(fout)) {
+         printf("%2d (%2d):", i+1,nib[i]);
+         fprintf(fout,"%2d (%2d):", i+1,nib[i]);
+         for (j=0; j<i; j++) {
+            nsame=NSameBranch(partition+i*lpart,partition+j*lpart, nib[i],nib[j],IBsame);
+            printf(" %2d", nsame);
+            fprintf(fout," %2d", nsame);
+         }
+      }
+   }
+   else {  /* first vs. others */
+      ReadTreeN (ftree, &j, &k, 0, 1);
+      nib[0]=tree.nbranch-com.ns;
+      if (nib[0]==0) error2("1st tree is a star tree..");
+      Tree2Partition (partition);
+      fputs ("Comparing the first tree with the others\nFirst tree:\n",fout);
+      OutTreeN(fout,0,0);  FPN(fout);  OutTreeB(fout);  FPN(fout); 
+      fputs ("\nInternal branches in the first tree:\n",fout);
+      FOR(i,nib[0]) { 
+         k=parti2B[i];
+         fprintf(fout,"%3d (%2d..%-2d): ( ",
+            i+1,tree.branches[k][0]+1,tree.branches[k][1]+1);
+         FOR(j,com.ns) if(partition[i*com.ns+j]) fprintf(fout,"%d ",j+1);
+         fputs(")\n",fout);
+      }
+      if(nodes[tree.root].nson<=2) 
+         fputs("\nRooted tree, results may not be correct.\n",fout);
+      fputs("\nCorrect internal branches compared with the 1st tree:\n",fout);
+      FOR(k,nib[0]) nIBsame[k]=0;
+      for (i=1,mp=vp=0; i<ntree; i++,FPN(fout)) {
+         ReadTreeN (ftree, &j, &k, 0, 1); 
+         nib[1]=tree.nbranch-com.ns;
+         Tree2Partition(partition+lpart);
+         nsame=NSameBranch (partition,partition+lpart, nib[0],nib[1],IBsame);
+
+         psame=nsame/(double)nib[0];
+         FOR(k,nib[0]) nIBsame[k]+=IBsame[k];
+         fprintf(fout,"1 vs. %3d: %4d: ", i+1,nsame);
+         FOR(k,nib[0]) if(IBsame[k]) fprintf(fout," %2d", k+1);
+         printf("1 vs. %5d: %6d/%d  %10.4f\n", i+1,nsame,nib[0],psame);
+         vp += square(psame - mp)*(i-1.)/i;
+         mp=(mp*(i-1.) + psame)/i;
+      }
+      vp=(ntree<=2 ? 0 : sqrt(vp/((ntree-1-1)*(ntree-1.))));
+      fprintf(fout,"\nmean and S.E. of proportion of identical partitions\n");
+      fprintf(fout,"between the 1st and all the other %d trees ", ntree-1);
+      fprintf(fout,"(ignore these if not revelant):\n %.4f +- %.4f\n", mp, vp);
+      fprintf(fout,"\nNumbers of times, out of %d, ", ntree-1);
+      fprintf(fout,"interior branches of tree 1 are present");
+      fputs("\n(This may be bootstrap support for nodes in tree 1)\n",fout);
+      FOR(k,nib[0]) { 
+         i=tree.branches[parti2B[k]][0]+1;  j=tree.branches[parti2B[k]][1]+1; 
+         fprintf(fout,"%3d (%2d..%-2d): %6d (%5.1f%%)\n",
+            k+1,i,j,nIBsame[k],nIBsame[k]*100./(ntree-1.));
+      }
+   }
+   free(partition);  free(nodes); free(nib);  fclose(ftree);
+}
+
+
+
+int EigenQbase(double rates[], double pi[], 
+    double Root[],double U[],double V[],double Q[])
+{
+/* Construct the rate matrix Q[] for nucleotide model REV.
+*/
+   int i,j,k;
+   double mr, space[4];
+
+   zero (Q, 16);
+   for (i=0,k=0; i<3; i++) for (j=i+1; j<4; j++)
+      if (i*4+j!=11) Q[i*4+j]=Q[j*4+i]=rates[k++];
+   for (i=0,Q[3*4+2]=Q[2*4+3]=1; i<4; i++) FOR (j,4) Q[i*4+j] *= pi[j];
+   for (i=0,mr=0; i<4; i++) 
+      { Q[i*4+i]=0; Q[i*4+i]=-sum(Q+i*4, 4); mr-=pi[i]*Q[i*4+i]; }
+   abyx (1/mr, Q, 16);
+
+   eigenQREV(Q, com.pi, 4, Root, U, V, space);
+   return (0);
+}
+
+
+static double freqK_NS=-1;
+
+int EigenQcodon (int getstats, double kappa, double omega, double pi[],
+    double Root[], double U[], double V[], double Q[])
+{
+/* Construct the rate matrix Q[].
+   64 codons are used, and stop codons have 0 freqs.
+*/
+   int n=com.ncode, i,j,k, c[2],ndiff,pos=0,from[3],to[3];
+   double mr, space[64];
+   
+   for(i=0; i<n*n; i++) Q[i] = 0;
+   for (i=0; i<n; i++) FOR (j,i) {
+      from[0]=i/16; from[1]=(i/4)%4; from[2]=i%4;
+      to[0]=j/16;   to[1]=(j/4)%4;   to[2]=j%4;
+      c[0]=GeneticCode[com.icode][i];   c[1]=GeneticCode[com.icode][j];
+      if (c[0]==-1 || c[1]==-1)  continue;
+      for (k=0,ndiff=0; k<3; k++)  if (from[k]!=to[k]) { ndiff++; pos=k; }
+      if (ndiff!=1)  continue;
+      Q[i*n+j]=1;
+      if ((from[pos]+to[pos]-1)*(from[pos]+to[pos]-5)==0)  Q[i*n+j]*=kappa;
+      if(c[0]!=c[1])  Q[i*n+j]*=omega;
+      Q[j*n+i]=Q[i*n+j];
+   }
+   for(i=0; i<n; i++) for(j=0; j<n; j++)
+      Q[i*n+j] *= com.pi[j];
+   for(i=0,mr=0;i<n;i++) { 
+      Q[i*n+i] = -sum(Q+i*n,n);
+      mr -= pi[i]*Q[i*n+i]; 
+   }
+
+   if(getstats)
+      Qfactor += freqK_NS * mr;
+   else {
+      if(com.ncatG==0) FOR(i,n*n) Q[i]*=1/mr;
+      else             FOR(i,n*n) Q[i]*=Qfactor;  /* NSsites models */
+      eigenQREV(Q, com.pi, n, Root, U, V, space);
+   }
+   return (0);
+}
+
+
+
+int EigenQaa (double pi[], double Root[], double U[], double V[], double Q[])
+{
+/* Construct the rate matrix Q[]
+*/
+   int n=20, i,j;
+   double mr, space[20];
+
+   FOR (i,n*n) Q[i]=0;
+   switch (com.model) {
+   case (Poisson)   : case (EqualInput) : 
+      fillxc (Q, 1., n*n);  break;
+   case (Empirical)   : case (Empirical_F):
+      FOR(i,n) FOR(j,i) Q[i*n+j]=Q[j*n+i]=com.daa[i*n+j]/100;
+      break;
+   }
+   FOR (i,n) FOR (j,n) Q[i*n+j]*=com.pi[j];
+   for (i=0,mr=0; i<n; i++) {
+      Q[i*n+i]=0; Q[i*n+i]=-sum(Q+i*n,n);  mr-=com.pi[i]*Q[i*n+i]; 
+   }
+
+   eigenQREV(Q, com.pi, n, Root, U, V, space);
+   FOR(i,n)  Root[i]=Root[i]/mr;
+
+   return (0);
+}
+
+
+int GetDaa (FILE* fout, double daa[])
+{
+/* Get the amino acid substitution rate matrix (grantham, dayhoff, jones, etc).
+*/
+   FILE * fdaa;
+   char aa3[4]="";
+   int i,j, n=20;
+
+   fdaa=gfopen(com.daafile, "r");
+   printf("\nReading rate matrix from %s\n", com.daafile);
+
+   for (i=0; i<n; i++)  for (j=0,daa[i*n+i]=0; j<i; j++)  {
+      fscanf(fdaa, "%lf", &daa[i*n+j]);
+      daa[j*n+i]=daa[i*n+j];
+   }
+   if (com.model==Empirical) {
+      FOR(i,n) if(fscanf(fdaa,"%lf",&com.pi[i])!=1) error2("err aaRatefile");
+      if (fabs(1-sum(com.pi,20))>1e-4) error2("\nSum of aa freq. != 1\n");
+   }
+   fclose (fdaa);
+
+   if (fout) {
+      fprintf (fout, "\n%s\n", com.daafile);
+      FOR (i,n) {
+         fprintf (fout, "\n%4s", getAAstr(aa3,i));
+         FOR (j,i)  fprintf (fout, "%5.0f", daa[i*n+j]); 
+      }
+      FPN (fout);
+   }
+
+   return (0);
+}
+
+
+
+
+void MakeSeq(char*z, int ls)
+{
+/* generate a random sequence of nucleotides, codons, or amino acids by 
+   sampling com.pi[], or read the ancestral sequence from the file RootSeq.txt
+   if the file exists.
+*/
+   int i,j,h, n=com.ncode, ch, n31=(com.seqtype==1?3:1), lst;
+   double p[64],r, small=1e-5;
+   char *pch=(com.seqtype==2?AAs:BASEs);
+   char rootseqf[]="RootSeq.txt", codon[4]="   ";
+   FILE *fseq=(FILE*)fopen(rootseqf,"r");
+   static int times=0;
+
+   if(fseq) {
+      if(times++==0) printf("Reading sequence at the root from file.\n\n");
+      if(com.siterates && com.ncatG>1) 
+         error2("sequence for root doesn't work for site-class models");
+
+      for(lst=0; ; ) {
+         for(i=0; i<n31; i++) {
+            while((ch=fgetc(fseq)) !=EOF && !isalpha(ch)) ;
+            if(ch==EOF) error2("EOF when reading root sequence.");
+            if(isalpha(ch))
+               codon[i]=(char)(ch=CodeChara((char)ch, com.seqtype));
+         }
+         if(com.seqtype==1) ch = codon[0]*16 + codon[1]*4 + codon[2];
+         if(ch<0 || ch>n-1) 
+            printf("error when reading site %d\n", lst+1);
+         if(com.seqtype==1 && com.pi[ch]==0)
+            printf("you seem to have a stop codon in the root sequence\n");
+
+         z[lst++] = (char)ch;
+         if(lst==com.ls) break;
+      }
+      fclose(fseq);
+   }
+   else {
+      for(j=0; j<n; j++)  p[j] = com.pi[j];
+      for(j=1; j<n; j++)  p[j] += p[j-1];
+      if(fabs(p[n-1]-1) > small)
+         { printf("\nsum pi = %.6f != 1!\n", p[n-1]); exit(-1); }
+      for(h=0; h<com.ls; h++) {
+         for(j=0,r=rndu();j<n-1;j++) 
+            if(r<p[j]) break;
+         z[h] = (char)j;
+      }
+   }
+}
+
+
+
+void Evolve1 (int inode)
+{
+/* evolve sequence com.z[tree.root] along the tree to generate com.z[], 
+   using nodes[].branch, nodes[].omega, & com.model
+   Needs com.z[0,1,...,nnode-1], while com.z[0] -- com.z[ns-1] constitute
+   the data.
+   For codon sequences, com.siterates[] has w's for NSsites and NSbranchsite models.
+*/
+   int is, h,i,j, ison, from, n=com.ncode, longseq=100000;
+   double t, rw;
+   
+   for (is=0; is<nodes[inode].nson; is++) {
+      ison=nodes[inode].sons[is];
+      memcpy(com.z[ison],com.z[inode],com.ls*sizeof(unsigned char));
+      t=nodes[ison].branch;
+      
+      if(com.seqtype==1 && com.model && com.NSsites) { /* branch-site models */
+         Qfactor = com.QfactorBS[ison];
+         for(h=0; h<com.ls; h++) 
+            com.siterates[h] = com.omegaBS[ison*com.ncatG+com.siteID[h]];
+      }
+
+      for(h=0; h<com.ls; h++) {
+         /* decide whether to recalcualte PMat[]. */
+         if (h==0 || (com.siterates && com.siterates[h]!=com.siterates[h-1])) {
+            rw = (com.siterates?com.siterates[h]:1);
+
+            switch(com.seqtype) {
+            case (BASEseq):
+               if(com.model<=TN93)
+                  PMatTN93(PMat, t*Qfactor*rw*Qrates[0], 
+                                 t*Qfactor*rw*Qrates[1], t*Qfactor*rw, com.pi);
+               else if(com.model==REV)
+                  PMatUVRoot(PMat, t*rw, com.ncode, U,V,Root);
+               break;
+
+            case (CODONseq): /* Watch out for NSsites model */
+               if(com.model || com.NSsites) { /* no need to update UVRoot if M0 */
+                  if(com.model && com.NSsites==0) /* branch */
+                     rw = nodes[ison].omega;  /* should be equal to com.rK[nodes[].label] */
+
+                  EigenQcodon(0, com.kappa, rw, com.pi, Root, U, V, PMat);
+               }
+               PMatUVRoot(PMat, t, com.ncode, U, V, Root); 
+               break;
+
+            case (AAseq):
+               PMatUVRoot(PMat, t*rw, com.ncode, U, V, Root);
+               break;
+            }
+            for(i=0; i<n; i++)
+               for(j=1;j<n;j++)
+                  PMat[i*n+j] += PMat[i*n+j-1];
+         }
+         for(j=0,from=com.z[ison][h],rw=rndu(); j<n-1; j++)
+            if(rw < PMat[from*n+j]) break;
+         com.z[ison][h] = j;
+      }
+
+      if(com.ls>longseq) printf("\r   nodes %2d -> %2d, evolving . .   ", inode+1, ison+1);
+
+      if(nodes[ison].nson) Evolve1(ison); 
+   }  /* for (is) */
+
+   if(inode==tree.root && com.ls>longseq)  printf("\r%s", strc(50,' '));
+}
+
+
+
+void Simulate (char *ctlf)
+{
+/* simulate nr data sets of nucleotide, codon, or AA sequences.
+   ls: number of nucleotides, codons, or AAs in each sequence.
+   All 64 codons are used for codon sequences.
+   When com.alpha or com.ncatG>1, sites are randomized after sequences are 
+   generated.
+   space[com.ls] is used to hold site marks.
+   format:  0: paml sites; 1: paml patterns; 2: paup nex; 3: paup JC69 format
+ */
+   char *ancf="ancestral.txt", *siteIDf="siterates.txt";
+   FILE *fin, *fseq, *ftree=NULL, *fanc=NULL, *fsiteID=NULL;
+   char *paupstart="paupstart",*paupblock="paupblock",*paupend="paupend";
+   char line[32000];
+   int lline=32000, i,j,k, ir,n,nr, fixtree=1, sspace=10000, rooted=1;
+   int h=0,format=0, b[3]={0}, nrate=1, counts[NCATG];
+   int *siteorder=NULL;
+   char *tmpseq=NULL, *pc;
+   double birth=0, death=0, sample=1, mut=1, tlength, *space, *blengthBS;
+   double T,C,A,G,Y,R, Falias[NCATG];
+   int    Lalias[NCATG];
+
+   noisy = 1;
+   printf("\nReading options from data file %s\n", ctlf);
+   com.ncode = n = (com.seqtype==0 ? 4 : (com.seqtype==1?64:20));
+   fin = (FILE*)gfopen(ctlf,"r");
+   fscanf(fin, "%d", &format);
+   fgets(line, lline, fin);
+   printf("\nSimulated data will go into %s.\n", seqf[format]);
+   if(format==2) printf("%s, %s, & %s will be appended if existent.\n", paupstart,paupblock,paupend);
+
+   fscanf (fin, "%d", &i);
+   fgets(line, lline, fin);
+   SetSeed(i, 1);
+   fscanf (fin, "%d%d%d", &com.ns, &com.ls, &nr);
+   fgets(line, lline, fin);
+   i=(com.ns*2-1)*sizeof(struct TREEN);
+   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
+
+   if(com.ns>NS) error2("too many seqs?");
+   printf ("\n%d seqs, %d sites, %d replicate(s)\n", com.ns, com.ls, nr);
+   k=(com.ns*com.ls* (com.seqtype==CODONseq?4:1) *nr)/1000+1;
+   printf ("Seq file will be about %dK bytes.\n",k);
+   for(i=0; i<com.ns; i++)          /* default spname */
+      sprintf(com.spname[i],"S%d",i+1);
+
+   if(fixtree) {
+      fscanf(fin, "%lf", &tlength);   fgets(line, lline, fin);
+      if(ReadTreeN(fin, &i, &j, 1, 1))  /* might overwrite spname */
+         error2("err tree..");
+
+      if(i==0) error2("use : to specify branch lengths in tree");
+      for(i=0,T=0; i<tree.nnode; i++) 
+         if(i!=tree.root) T += nodes[i].branch;
+      if(tlength>0) {
+         for(i=0; i<tree.nnode; i++) 
+            if(i!=tree.root) nodes[i].branch *= tlength/T;
+      }
+      printf("tree length = %.3f\n", (tlength>0?tlength:T));
+      if(com.ns<100) {
+         printf("\nModel tree & branch lengths:\n"); 
+         OutTreeN(F0,1,1); FPN(F0);
+         OutTreeN(F0,0,1); FPN(F0);
+      }
+      if(com.seqtype==CODONseq && com.model && !com.NSsites) { /* branch model */
+         FOR(i,tree.nnode) nodes[i].omega=nodes[i].label;
+         FPN(F0);  OutTreeN(F0, 1, PrBranch|PrLabel);  FPN(F0);
+      }
+   }
+   else {   /* random trees, broken or need testing? */
+      printf ("\nbirth rate, death rate, sampling fraction, mutation rate (tree height)?\n");
+      fscanf (fin, "%lf%lf%lf%lf", &birth, &death, &sample, &mut);
+      fgets(line, lline, fin);
+      printf("%9.4f %9.4f %9.4f %9.4f\n", birth, death, sample, mut);
+   }
+
+   if(com.seqtype==BASEseq) {
+      fscanf(fin,"%d", &com.model);
+      fgets(line, lline, fin);
+      if(com.model<0 || com.model>REV) error2("model err");
+      if(com.model==T92) error2("T92: please use HKY85 with T=A and C=G.");
+
+      printf("\nModel: %s\n", basemodels[com.model]);
+      if(com.model==REV)        nrate=5;
+      else if(com.model==TN93)  nrate=2;
+      FOR(i,nrate) fscanf(fin,"%lf",&Qrates[i]);
+      fgets(line, lline, fin);
+      if(nrate<=2) FOR(i,nrate) printf("kappa %9.5f\n",Qrates[i]); FPN(F0);
+      if(nrate==5) {
+         printf("a & b & c & d & e: ");
+         FOR(i,nrate) printf("%9.5f",Qrates[i]); FPN(F0);
+      }
+      if((com.model==JC69 || com.model==F81)&&Qrates[0]!=1) 
+         error2("kappa should be 1 for this model");
+   }
+   else if(com.seqtype==CODONseq) {
+      for(i=0; i<64; i++) 
+         getcodon(CODONs[i], i);
+      if(com.model==0 && com.NSsites) {  /* site model */
+         fscanf(fin,"%d", &com.ncatG);   fgets(line, lline, fin);
+         if(com.ncatG>NCATG) error2("ncatG>NCATG");
+         FOR(i,com.ncatG) fscanf(fin,"%lf",&com.freqK[i]);  fgets(line, lline, fin);
+         FOR(i,com.ncatG) fscanf(fin,"%lf",&com.rK[i]);     fgets(line, lline, fin);
+         printf("\n\ndN/dS (w) for site classes (K=%d)", com.ncatG);
+         printf("\nf: ");  FOR(i,com.ncatG) printf("%9.5f",com.freqK[i]);
+         printf("\nw: ");  FOR(i,com.ncatG) printf("%9.5f",com.rK[i]);  FPN(F0);
+      }
+      else if(com.model && com.NSsites) {  /* branchsite model */
+         fscanf(fin,"%d",&com.ncatG);   fgets(line, lline, fin);
+         if(com.ncatG>min2(NCATG,127)) error2("ncatG too large");
+         FOR(i,com.ncatG) fscanf(fin,"%lf",&com.freqK[i]);  fgets(line,lline,fin);
+         printf("\n%d site classes.\nFreqs: ", com.ncatG);
+         FOR(i,com.ncatG) printf("%9.5f",com.freqK[i]);
+
+         if((com.omegaBS=(double*)malloc((com.ncatG+2)*tree.nnode*sizeof(double)))==NULL)
+            error2("oom");
+         com.QfactorBS = com.omegaBS + com.ncatG*tree.nnode;
+         blengthBS = com.QfactorBS + tree.nnode;
+
+         for(i=0; i<tree.nnode; i++)
+            blengthBS[i] = nodes[i].branch;
+         for(k=0; k<com.ncatG; k++) {
+            ReadTreeN(fin, &i, &j, 0, 1);
+            if(i) error2("do not include branch lengths except in the first tree.");
+            if(!j) error2("Use # to specify omega's for branches");
+            for(i=0; i<tree.nnode; i++)  com.omegaBS[i*com.ncatG+k]=nodes[i].label;
+         }
+         for(i=0; i<tree.nnode; i++)
+            { nodes[i].branch=blengthBS[i];  nodes[i].label=nodes[i].omega=0; }
+         for(i=0; i<tree.nnode; i++) {  /* print out omega as node labels. */
+            nodes[i].nodeStr=pc=(char*)malloc(20*com.ncatG*sizeof(char));
+            sprintf(pc, "'[%.2f", com.omegaBS[i*com.ncatG+0]);
+            for(k=1,pc+=strlen(pc); k<com.ncatG; k++,pc+=strlen(pc)) 
+               sprintf(pc, ", %.2f", com.omegaBS[i*com.ncatG+k]);
+            sprintf(pc, "]'");
+         }
+         FPN(F0);  OutTreeN(F0,1,PrBranch|PrLabel);  FPN(F0);
+      }
+      else if(com.model==0) {  /* M0 */
+         fscanf(fin,"%lf",&com.omega);
+         fgets(line, lline, fin);
+         printf("omega = %9.5f\n",com.omega);
+         for(i=0; i<tree.nbranch; i++) 
+            nodes[tree.branches[i][1]].omega = com.omega;
+      }
+
+      fscanf(fin, "%lf", &com.kappa);   fgets(line, lline, fin);
+      printf("kappa = %9.5f\n",com.kappa);
+   }
+
+   if(com.seqtype==BASEseq || com.seqtype==AAseq) {
+      fscanf(fin,"%lf%d", &com.alpha, &com.ncatG);
+      fgets(line, lline, fin);
+      if(com.alpha) 
+        printf("Gamma rates, alpha =%.4f (K=%d)\n", com.alpha, com.ncatG);
+      else { 
+         com.ncatG=0; 
+         puts("Rates are constant over sites."); 
+      }
+   }
+   if(com.alpha || com.ncatG) { /* this is used for codon NSsites as well. */
+      k = com.ls;
+      if(com.seqtype==1 && com.model && com.NSsites) k *= tree.nnode;
+      if((com.siterates=(double*)malloc(k*sizeof(double)))==NULL) error2("oom1");
+      if((siteorder=(int*)malloc(com.ls*sizeof(int)))==NULL) error2("oom2");
+   }
+
+   if(com.seqtype==AAseq) { /* get aa substitution model and rate matrix */
+      fscanf(fin,"%d",&com.model);
+      printf("\nmodel: %s",aamodels[com.model]); 
+      if(com.model>=2)  { fscanf(fin,"%s",com.daafile); GetDaa(NULL,com.daa); }
+      fgets(line, lline, fin);
+   }
+
+   /* get freqs com.pi[] */
+   if((com.seqtype==BASEseq && com.model>K80) ||
+       com.seqtype==CODONseq ||
+      (com.seqtype==AAseq && (com.model==1 || com.model==3)))
+         for(k=0; k<com.ncode; k++) fscanf(fin,"%lf", &com.pi[k]);
+   else if(com.model==0 || (com.seqtype==BASEseq && com.model<=K80)) 
+      fillxc(com.pi, 1./com.ncode, com.ncode);
+
+   printf("sum pi = 1 = %.6f:", sum(com.pi,com.ncode));
+   matout2(F0, com.pi, com.ncode/4, 4, 9, 6);
+   if(com.seqtype==CODONseq) {
+      fscanf(fin, "%d", &com.icode);   fgets(line, lline, fin);
+      printf("genetic code = %d\n", com.icode);
+      for(k=0; k<com.ncode; k++) 
+         if(GeneticCode[com.icode][k] == -1 && com.pi[k]) 
+            error2("stop codons should have frequency 0?");
+   }
+   
+   if(com.seqtype==BASEseq) {
+      if(com.model<REV) {
+         T=com.pi[0]; C=com.pi[1]; A=com.pi[2]; G=com.pi[3]; Y=T+C; R=A+G;
+         if (com.model==F84) { 
+            Qrates[1]=1+Qrates[0]/R;   /* kappa2 */
+            Qrates[0]=1+Qrates[0]/Y;   /* kappa1 */
+         }
+         else if (com.model<=HKY85) Qrates[1]=Qrates[0];
+         Qfactor = 1/(2*T*C*Qrates[0] + 2*A*G*Qrates[1] + 2*Y*R);
+      }
+      else
+         if(com.model==REV) EigenQbase(Qrates, com.pi, Root,U,V,PMat);
+   }
+
+   /* get Qfactor for NSsites & NSbranchsite models */
+   if(com.seqtype==CODONseq && com.NSsites) {
+      if(!com.model) {  /* site models */
+         for(k=0,Qfactor=0; k<com.ncatG; k++) {
+            freqK_NS=com.freqK[k];
+            EigenQcodon(1, com.kappa,com.rK[k],com.pi, NULL,NULL,NULL, PMat);
+         }
+         Qfactor=1/Qfactor;
+         printf("Qfactor for NSsites model = %9.5f\n", Qfactor);
+      }
+      else {            /* branch-site models */
+         for(i=0; i<tree.nnode; i++) {
+            if(i==tree.root) { com.QfactorBS[i]=-1; continue; }
+            for(k=0,Qfactor=0; k<com.ncatG; k++) {
+               freqK_NS=com.freqK[k];
+               EigenQcodon(1, com.kappa,com.omegaBS[i*com.ncatG+k],com.pi, NULL,NULL,NULL, PMat);
+            }
+            com.QfactorBS[i]=1/Qfactor;  Qfactor=0;
+            printf("node %2d: Qfactor = %9.5f\n", i+1, com.QfactorBS[i]);
+         }
+      }
+   }
+   if(com.seqtype==CODONseq && com.ncatG<=1 && com.model==0)
+      EigenQcodon(0, com.kappa,com.omega, com.pi, Root, U, V, PMat);
+   else if(com.seqtype==AAseq)
+      EigenQaa(com.pi, Root, U, V,PMat);
+
+   puts("\nAll parameters are read.  Ready to simulate\n");
+   for(j=0; j<com.ns*2-1; j++)
+      com.z[j] = (unsigned char*)malloc(com.ls*sizeof(unsigned char));
+   sspace = max2(sspace, 8000000);
+   space  = (double*)malloc(sspace);
+   if(com.alpha || com.ncatG) tmpseq=(char*)space;
+   if (com.z[com.ns*2-1-1]==NULL) error2("oom for seqs");
+   if (space==NULL) {
+      printf("oom for space, %d bytes needed.", sspace);
+      exit(-1);
+   }
+
+   fseq = gfopen(seqf[format], "w");
+   if(format==2 || format==3) appendfile(fseq, paupstart);
+   
+   fanc = (FILE*)gfopen(ancf, "w");
+   if(fixtree) {
+      fputs("\nAncestral sequences generated during simulation ",fanc);
+      fprintf(fanc, "(check against %s)\n", seqf[format]);
+      OutTreeN(fanc,0,0); FPN(fanc); OutTreeB(fanc); FPN(fanc);
+   }
+   if(com.alpha || com.NSsites) {
+      fsiteID=(FILE*)gfopen(siteIDf,"w");
+      if(com.seqtype==1) fprintf(fsiteID, "\nSite class IDs\n");
+      else               fprintf(fsiteID, "\nRates for sites\n");
+      if(com.seqtype==CODONseq && com.NSsites) {
+         if(!com.model) matout(fsiteID,com.rK, 1,com.ncatG);
+         if((com.siteID=(char*)malloc(com.ls*sizeof(char)))==NULL) 
+            error2("oom siteID");
+      }
+   }
+
+   for (ir=0; ir<nr; ir++) {
+      if (!fixtree) {    /* right now tree is fixed */
+         RandomLHistory (rooted, space);
+         if (rooted && com.ns<10) j = GetIofLHistory ();
+         BranchLengthBD (1, birth, death, sample, mut);
+         if(com.ns<20) { 
+            printf ("\ntree used: "); 
+            OutTreeN(F0,1,1);
+            FPN(F0); 
+         }
+      }
+      MakeSeq(com.z[tree.root], com.ls);
+
+      if (com.alpha)
+         Rates4Sites(com.siterates, com.alpha, com.ncatG, com.ls, 0,space);
+      else if(com.seqtype==1 && com.NSsites) { /* for NSsites */
+         /* the table for the alias algorithm is the same, but ncatG is small. */
+         MultiNomialAliasSetTable(com.ncatG, com.freqK, Falias, Lalias, space);
+         MultiNomialAlias(com.ls, com.ncatG, Falias, Lalias, counts);
+
+         for (i=0,h=0; i<com.ncatG; i++)
+            for (j=0; j<counts[i]; j++) {
+               com.siteID[h]=(char)i;
+               com.siterates[h++]=com.rK[i]; /* overwritten later for branchsite */
+            }
+      }
+
+      Evolve1(tree.root);
+
+      /* randomize sites for site-class model */
+      if(com.siterates && com.ncatG>1) {
+         if(format==1 && ir==0) 
+            puts("\nrequested site pattern counts as output for site-class model.\n");
+         randorder(siteorder, com.ls, (int*)space);
+         for(j=0; j<tree.nnode; j++) {
+            memcpy(tmpseq,com.z[j],com.ls*sizeof(char));
+            for(h=0; h<com.ls; h++) com.z[j][h]=tmpseq[siteorder[h]];
+         }
+         if(com.alpha || com.ncatG>1) {
+            memcpy(space,com.siterates,com.ls*sizeof(double));
+            for(h=0; h<com.ls; h++) com.siterates[h]=space[siteorder[h]];
+         }
+         if(com.siteID) {
+            memcpy((char*)space,com.siteID,com.ls*sizeof(char));
+            for(h=0; h<com.ls; h++) com.siteID[h]=*((char*)space+siteorder[h]);
+         }
+      }
+
+      /* print sequences*/
+      if(format==1 || format==3) {
+         for(i=0; i<com.ns; i++) for(h=0; h<com.ls; h++)    com.z[i][h] ++;  /* coded as 1, 2, ... */
+         PatternWeightSimple();
+         for(i=0; i<com.ns; i++) for(h=0; h<com.npatt; h++) com.z[i][h] --;  /* coded as 0, 1, ... */
+         if(format==3) 
+            PatternWeightJC69like();
+      }
+      if(format==2 || format==3) fprintf(fseq,"\n\n[Replicate # %d]\n", ir+1);
+      printSeqs(fseq, NULL, NULL, format); /* printsma not usable as it codes into 0,1,...,60. */
+
+      if((format==2 || format==3) && !fixtree) {
+         fprintf(fseq,"\nbegin tree;\n   tree true_tree = [&U] "); 
+         OutTreeN(fseq,1,1); fputs(";\n",fseq);
+         fprintf(fseq,"end;\n\n");
+      }
+      if(format==2 || format==3) appendfile(fseq, paupblock);
+
+      /* print ancestral seqs, rates for sites. */
+      if(format!=1 && format!=3) {  /* don't print ancestors if site patterns are printed. */
+         j = (com.seqtype==CODONseq?3*com.ls:com.ls);
+         fprintf(fanc,"[replicate %d]\n",ir+1);
+
+         if(!fixtree) {
+            if(format<2)
+               { OutTreeN(fanc,1,1); FPN(fanc); FPN(fanc); }
+         }
+         else {
+            fprintf(fanc,"%6d %6d\n",tree.nnode-com.ns,j);
+            for(j=com.ns; j<tree.nnode; j++,FPN(fanc)) {
+               fprintf(fanc,"node%-26d  ", j+1);
+               print1seq(fanc, com.z[j], com.ls, NULL);
+            }
+            FPN(fanc);
+
+            if(fsiteID) {
+               if(com.seqtype==CODONseq && com.NSsites && com.model==0) { /* site model */
+                  k=0;
+                  if(com.rK[com.ncatG-1]>1)
+                     FOR(h,com.ls) if(com.rK[com.siteID[h]]>1) k++;
+                  fprintf(fsiteID, "\n[replicate %d: %2d]\n",ir+1, k);
+                  if(k)  for(h=0,k=0; h<com.ls; h++) {
+                     if(com.rK[com.siteID[h]]>1) { 
+                        fprintf(fsiteID,"%4d ",h+1); 
+                        if(++k%15==0) FPN(fsiteID);
+                     }
+                  }
+                  FPN(fsiteID);
+               }
+               else if(com.seqtype==CODONseq && com.NSsites && com.model) { /* branchsite */
+                  fprintf(fsiteID, "\n[replicate %d]\n",ir+1);
+                  for(h=0; h<com.ls; h++) {
+                     fprintf(fsiteID," %4d ", com.siteID[h]+1);
+                     if(h==com.ls-1 || (h+1)%15==0) FPN(fsiteID);
+                  }
+               }
+               else {       /* gamma rates */
+                  fprintf(fsiteID,"\n[replicate %d]\n",ir+1);
+                  for(h=0; h<com.ls; h++) {
+                     fprintf(fsiteID,"%7.4f ",com.siterates[h]);
+                     if(h==com.ls-1 || (h+1)%10==0) FPN(fsiteID);
+                  }
+               }
+            }
+         }
+      }
+
+      printf ("\rdid data set %d %s", ir+1, (com.ls>100000||nr<100? "\n" : ""));
+   }   /* for (ir) */
+   if(format==2 || format==3) appendfile(fseq, paupend);
+
+   fclose(fseq);  if(!fixtree) fclose(fanc);  
+   if(com.alpha || com.NSsites) fclose(fsiteID);
+   for(j=0; j<com.ns*2-1; j++) free(com.z[j]);
+   free(space);
+   if(com.model && com.NSsites) /* branch-site model */
+      for(i=0; i<tree.nnode; i++)  free(nodes[i].nodeStr);
+   free(nodes);
+   if(com.alpha || com.ncatG) { 
+      free(com.siterates);  com.siterates=NULL;
+      free(siteorder);
+      if(com.siteID) free(com.siteID);  com.siteID=NULL;
+   }
+   if(com.seqtype==1 && com.model && com.NSsites) free(com.omegaBS); 
+   com.omegaBS = NULL;
+
+   exit (0);
+}
+
+
+int GetSpnamesFromMB (FILE *fmb, char line[], int lline)
+{
+/* This reads species names from MrBayes output file fmb, like the following.
+
+      Taxon  1 -> 1_Arabidopsis_thaliana
+      Taxon  2 -> 2_Taxus_baccata
+*/
+   int j, ispecies;
+   char *p=NULL, *mbstr1="Taxon ", *mbstr2="->";
+
+   puts("Reading species names from mb output file.\n");
+   rewind(fmb);
+   for(ispecies=0; ; ) {
+      if(fgets(line, lline, fmb)==NULL) return(-1);
+      if(strstr(line, mbstr1) && strstr(line, mbstr2)) {
+         p=strstr(line, mbstr1)+5;
+         sscanf(p, "%d", &ispecies);
+         p=strstr(line, mbstr2)+3;
+         if(com.spname[ispecies-1][0]) 
+            error2("species name already read?");
+
+         for(j=0; isgraph(*p)&&j<lline; ) com.spname[ispecies-1][j++] = *p++;
+         com.spname[ispecies-1][j]=0;
+
+         printf("\tTaxon %2d:  %s\n", ispecies, com.spname[ispecies-1]);
+      }
+      else if (ispecies)
+         break;
+   }
+   com.ns=ispecies;
+   rewind(fmb);
+
+   return(0);
+}
+
+char *GrepLine (FILE*fin, char*query, char* line, int lline)
+{
+/* This greps infile to search for query[], and returns NULL or line[].
+*/
+   char *p=NULL;
+
+   rewind(fin);
+   for( ; ; ) {
+      if(fgets(line, lline, fin)==NULL) return(NULL);
+      if(strstr(line, query)) return(line);
+   }
+   return(NULL);
+}
+
+
+void CladeMrBayesProbabilities (char treefile[])
+{
+/* This reads a tree from treefile and then scans a set of MrBayes output files
+   (mbfiles) to retrieve posterior probabilities for every clade in that tree.
+   It first scans the first mb output file to get the species names.
+
+   Sample mb output:
+   6 -- ...........................*************   8001 1.000 0.005 (0.000)
+   7 -- ....................********************   8001 1.000 0.006 (0.000)
+
+   Note 4 Jan 2014: This uses parti2B[], and is broken after i rewrote 
+   Tree2Partition().  
+*/
+   int lline=100000, i,j,k, nib, inode, parti2B[NS];
+   char line[100000], *partition, *p;
+   char symbol[2]=".*", cladestr[NS+1]={0};
+   FILE *ftree, *fmb[20];
+   double *Pclade, t;
+/*
+   int nmbfiles=15;
+   char *mbfiles[]={"mb-1e-5.out", "mb-2e-5.out", "mb-3e-5.out", "mb-4e-5.out",
+"mb-5e-5.out", "mb-6e-5.out", "mb-7e-5.out", "mb-8e-5.out",
+"mb-9e-5.out", "mb-1e-4.out", "mb-2e-4.out", "mb-3e-4.out",
+"mb-5e-4.out", "mb-1e-3.out", "mb-1e-2.out"};
+*/
+   int nmbfiles=2;
+   char *mbfiles[]={"mb-1e-4.out", "mb-1e-1.out"};
+
+   printf("tree file is %s\nmb output files:\n", treefile);
+   ftree=gfopen(treefile,"r");
+   for(k=0; k<nmbfiles; k++)
+      fmb[k]=gfopen(mbfiles[k],"r");
+   for(k=0; k<nmbfiles; k++) printf("\t%s\n", mbfiles[k]);
+
+   GetSpnamesFromMB(fmb[0], line, lline);  /* read species names from mb output */
+
+   fscanf (ftree, "%d%d", &i, &k);
+   if(i && i!=com.ns) error2("do you mean to specify ns in the tree file?");
+   i=(com.ns*2-1)*sizeof(struct TREEN);
+   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
+   ReadTreeN (ftree, &i, &j, 0, 1);
+
+   FPN(F0);  OutTreeN(F0, 0, 0);  FPN(F0);  FPN(F0);
+   nib=tree.nbranch-com.ns;
+   for(i=0;i<tree.nnode;i++) {
+      nodes[i].nodeStr = NULL;
+      if(i>com.ns) nodes[i].nodeStr=(char*)malloc(100*sizeof(char));
+   }
+
+   partition=(char*)malloc(nib*com.ns*sizeof(char));
+   if (partition==NULL) error2("oom");
+   if((Pclade=(double*)malloc(nib*nmbfiles*sizeof(double)))==NULL)
+      error2("oom");
+   for(i=0;i<nib*nmbfiles; i++) Pclade[i]=0;
+
+   Tree2Partition(partition);
+
+   for(i=0; i<nib; i++) {
+      inode=tree.branches[parti2B[i]][1];
+      if(partition[i*com.ns+0])
+         for(j=0; j<com.ns; j++) cladestr[j]=symbol[1-partition[i*com.ns+j]];
+      else
+         for(j=0; j<com.ns; j++) cladestr[j]=symbol[partition[i*com.ns+j]];
+      printf("#%2d branch %2d node %2d  %s", i+1, parti2B[i], inode, cladestr);
+
+      for(k=0; k<nmbfiles; k++) {
+         if(GrepLine(fmb[k], cladestr, line, lline)) {
+            p=strstr(line,cladestr);
+            sscanf(p+com.ns, "%lf%lf\0", &t, &Pclade[i*nmbfiles+k]);
+         }
+      }
+      for(k=0; k<nmbfiles; k++) printf("%6.2f", Pclade[i*nmbfiles+k]);
+      FPN(F0);
+      for(k=0,p=nodes[inode].nodeStr; k<nmbfiles; k++) {
+         sprintf(p, "%3.0f%s", Pclade[i*nmbfiles+k]*100,(k<nmbfiles-1?"/":""));
+         p+=4;
+      }
+   }
+   FPN(F0);  OutTreeN(F0,1,PrLabel);  FPN(F0);
+
+   for(i=0; i<tree.nnode; i++) free(nodes[i].nodeStr);
+   free(nodes); free(partition);  free(Pclade);
+   fclose(ftree);   
+   for(k=0; k<nmbfiles; k++) fclose(fmb[k]);
+   exit(0);
+}
diff --git a/src/mcmctree.c b/src/mcmctree.c
index 26cc017..4e47437 100644
--- a/src/mcmctree.c
+++ b/src/mcmctree.c
@@ -21,7 +21,7 @@
 
 #include "paml.h"
 
-#define NS            800
+#define NS            400
 #define NBRANCH      (NS*2-2)
 #define NNODE        (NS*2-1)
 #define MAXNSONS      3
@@ -176,6 +176,8 @@ double PMat[16], Cijk[64], Root[4];
 double _rateSite=1, OldAge=999;
 int debug=0, LASTROUND=0, BayesEB, testlnL=0, NPMat=0; /* no use for this */
 
+double BFbeta=0;
+
 /* for sptree.nodes[].fossil: lower, upper, bounds, gamma, inverse-gamma */
 enum {LOWER_F=1, UPPER_F, BOUND_F, GAMMA_F, SKEWN_F, SKEWT_F, S2N_F} FOSSIL_FLAGS;
 char *fossils[]={" ", "L", "U", "B", "G", "SN", "ST", "S2N"};
@@ -231,6 +233,7 @@ int main (int argc, char *argv[])
    fprintf(fout, "MCMCTREE (%s) %s\n", pamlVerStr, com.seqf);
 
    ReadTreeSeqs(fout);
+
    if(data.pfossilerror && (data.pfossilerror[2]<0 || data.pfossilerror[2]>sptree.nfossil))
       error2("nMinCorrect for fossil errors is out of range.");
 
@@ -305,14 +308,14 @@ int GetMem (void)
    Memory arrangement if(com.conPSiteClass=1):
    ncode*npatt for each node, by node, by iclass, by locus
 */
-   int g = data.ngene, g1=g+(data.rgeneprior==0);
+   int g = data.ngene, g1 = g + (data.rgeneprior == 0);
    int locus, j, k, s = sptree.nspecies, s1, sG = 1, sfhK = 0;
    double *conP, *rates;
 
    /* get mem for conP (internal nodes) */
    if(mcmc.usedata==1) {
       if(!com.fix_alpha && mcmc.saveconP) {
-         com.conPSiteClass=1;  sG=com.ncatG;
+         com.conPSiteClass = 1;  sG = com.ncatG;
       }
       data.conP_offset[0] = 0;
       for(locus=0,com.sconP=0; locus<g; locus++) {
@@ -703,7 +706,7 @@ double lnpD_locus (int locus)
    else if(mcmc.usedata==2)
       lnL = lnpD_locus_Approx(locus);
 
-   return (lnL);
+   return (lnL*BFbeta);
 }
 
 double lnpData (double lnpDi[])
@@ -1026,9 +1029,9 @@ int GenerateBlengthGH (char infile[])
 int GetOptions (char *ctlf)
 {
    int  transform0=ARCSIN_B; /* default transform: SQRT_B, LOG_B, ARCSIN_B */
-   int  iopt, i, j, nopt=29, lline=4096;
-   char line[4096], *pline, *peq, opt[32], *comment="*#";
-   char *optstr[] = {"seed", "seqfile","treefile", "outfile", "mcmcfile", 
+   int  iopt, i, j, nopt=30, lline=4096;
+   char line[4096], *pline, *peq, opt[33], *comment="*#";
+   char *optstr[] = {"seed", "seqfile","treefile", "outfile", "mcmcfile", "BayesFactorBeta",
         "seqtype", "aaRatefile", "icode", "noisy", "usedata", "ndata", "model", "clock", 
         "TipDate", "RootAge", "fossilerror", "alpha", "ncatG", "cleandata", 
         "BDparas", "kappa_gamma", "alpha_gamma", 
@@ -1037,6 +1040,7 @@ int GetOptions (char *ctlf)
    double t=1, *eps=mcmc.finetune;
    FILE  *fctl=gfopen (ctlf, "r");
 
+   data.rgeneprior = 1;  /* default rate prior is gamma-Dirichlet. */
    data.transform = transform0;
    if (fctl) {
       if (noisy) printf ("\nReading options from %s..\n", ctlf);
@@ -1061,25 +1065,26 @@ int GetOptions (char *ctlf)
                   case ( 2): sscanf(pline+1, "%s", com.treef);   break;
                   case ( 3): sscanf(pline+1, "%s", com.outf);    break;
                   case ( 4): sscanf(pline+1, "%s", com.mcmcf);   break;
-                  case ( 5): com.seqtype=(int)t;    break;
-                  case ( 6): sscanf(pline+2,"%s", com.daafile);  break;
-                  case ( 7): com.icode=(int)t;      break;
-                  case ( 8): noisy=(int)t;          break;
-                  case ( 9): 
+                  case ( 5): sscanf(pline + 1, "%lf", &BFbeta);  /* beta for marginal likelihood */
+                  case ( 6): com.seqtype=(int)t;    break;
+                  case ( 7): sscanf(pline+2,"%s", com.daafile);  break;
+                  case ( 8): com.icode=(int)t;      break;
+                  case ( 9): noisy=(int)t;          break;
+                  case (10): 
                      j=sscanf(pline+1, "%d %s%d", &mcmc.usedata, com.inBVf, &data.transform);
                      if(mcmc.usedata==2)
                         if(strchr(com.inBVf, '*')) { strcpy(com.inBVf, "in.BV"); data.transform=transform0; }
                         else if(j==2)              data.transform=transform0;
                      break;
-                  case (10): com.ndata=(int)t;      break;
-                  case (11): com.model=(int)t;      break;
-                  case (12): com.clock=(int)t;      break;
-                  case (13): 
+                  case (11): com.ndata=(int)t;      break;
+                  case (12): com.model=(int)t;      break;
+                  case (13): com.clock=(int)t;      break;
+                  case (14): 
                      sscanf(pline+2, "%lf%lf", &com.TipDate, &com.TipDate_TimeUnit);
                      if(com.TipDate && com.TipDate_TimeUnit==0) error2("should set com.TipDate_TimeUnit");
                      data.transform = SQRT_B;  /* SQRT_B, LOG_B, ARCSIN_B */
                      break;
-                  case (14):
+                  case (15):
                      sptree.RootAge[2] = sptree.RootAge[3] = 0.025;  /* default tail probs */
                      if((strchr(line, '>') || strchr(line, '<')) && (strstr(line, "U(") || strstr(line, "B(")))
                         error2("don't mix < U B on the RootAge line");
@@ -1094,35 +1099,35 @@ int GetOptions (char *ctlf)
                      else if((pline=strstr(line, "B(")))
                         sscanf(pline+2, "%lf,%lf,%lf,%lf", &sptree.RootAge[0], &sptree.RootAge[1], &sptree.RootAge[2], &sptree.RootAge[3]);
                      break;
-                  case (15):
+                  case (16):
                      data.pfossilerror[0] = 0.0;
                      data.pfossilerror[2] = 1;  /* default: minimum 2 good fossils */
                      sscanf(pline+1, "%lf%lf%lf", data.pfossilerror, data.pfossilerror+1, data.pfossilerror+2);
                      break;
-                  case (16): com.alpha=t;           break;
-                  case (17): com.ncatG=(int)t;      break;
-                  case (18): com.cleandata=(int)t;  break;
-                  case (19): 
+                  case (17): com.alpha=t;           break;
+                  case (18): com.ncatG=(int)t;      break;
+                  case (19): com.cleandata=(int)t;  break;
+                  case (20): 
                      sscanf(pline+1,"%lf%lf%lf%lf", &data.BDS[0],&data.BDS[1],&data.BDS[2],&data.BDS[3]);
                      break;
-                  case (20): 
-                     sscanf(pline+1,"%lf%lf", data.kappagamma, data.kappagamma+1); break;
                   case (21): 
-                     sscanf(pline+1,"%lf%lf", data.alphagamma, data.alphagamma+1); break;
+                     sscanf(pline+1,"%lf%lf", data.kappagamma, data.kappagamma+1); break;
                   case (22): 
+                     sscanf(pline+1,"%lf%lf", data.alphagamma, data.alphagamma+1); break;
+                  case (23): 
                      sscanf(pline+1,"%lf%lf%lf%d", &data.rgenepara[0], &data.rgenepara[1], &data.rgenepara[2], &data.rgeneprior); 
-                     if(data.rgenepara[2]<=0) data.rgenepara[2]=1;
-                     if(data.rgeneprior<0)  data.rgeneprior=0;
+                     if(data.rgenepara[2]<=0)  data.rgenepara[2] = 1;
+                     if(data.rgeneprior<0)     data.rgeneprior=0;
                      break;
-                  case (23): 
+                  case (24): 
                      sscanf(pline+1,"%lf%lf%lf", data.sigma2para, data.sigma2para+1, data.sigma2para+2); 
                      if(data.sigma2para[2]<=0) data.sigma2para[2]=1;
                      break;
-                  case (24): mcmc.print=(int)t;     break;
-                  case (25): mcmc.burnin=(int)t;    break;
-                  case (26): mcmc.sampfreq=(int)t;  break;
-                  case (27): mcmc.nsample=(int)t;   break;
-                  case (28):
+                  case (25): mcmc.print=(int)t;     break;
+                  case (26): mcmc.burnin=(int)t;    break;
+                  case (27): mcmc.sampfreq=(int)t;  break;
+                  case (28): mcmc.nsample=(int)t;   break;
+                  case (29):
                      puts("finetune is deprecated now.");
                      break;
                      sscanf(pline + 1, "%d:%lf%lf%lf%lf%lf%lf", &j, eps, eps + 1, eps + 2, eps + 3, eps + 4, eps + 5);
@@ -1151,6 +1156,12 @@ int GetOptions (char *ctlf)
    if(com.alpha==0)  { com.fix_alpha=1; com.nalpha=0; }
    if(com.clock<1 || com.clock>3) error2("clock should be 1, 2, 3?");
    if (mcmc.burnin <= 0) puts("burnin=0: no automatic step adjustment?");
+
+   if (BFbeta && mcmc.usedata ==0)
+      error2("marginal like for prior with usedata =0?");
+   else if (BFbeta==0)
+      BFbeta = 1;
+
    return(0);
 }
 
@@ -1346,6 +1357,7 @@ double Infinitesites(FILE *fout)
    char *FidedDf[2]={"FixedDsClock1.txt", "FixedDsClock23.txt"};
    FILE *fin=gfopen(FidedDf[com.clock>1],"r"), *fmcmc=gfopen(com.mcmcf,"w");
 
+   if(BFbeta != 1) error2("BFbeta should not be used for Infinitesites?");
    com.model=0;  com.alpha=0;
    mcmc.usedata = 0;
    if(data.rgeneprior==0) puts("\aInfiniteSites, not working for cond i.i.d. locus rate prior?");
@@ -2880,7 +2892,7 @@ int UpdateTimes (double *lnL, double finetune[], char accept[])
 }
 
 
-#if (1)  /*  this is not used now. */
+#if (0)  /*  this is not used now. */
 
 int UpdateTimesClock23(double *lnL, double finetune[], char accept[])
 {
@@ -3770,7 +3782,7 @@ int MCMC (FILE* fout)
 {
    FILE *fmcmc = NULL;
    int nxpr[2]={6, 2}, i, j, k, ir, g=data.ngene;
-   double lnL=0, nround=0, *x, *mx, postEFossil[MaxNFossils]={0};
+   double lnL=0, mlnL=0, nround=0, *x, *mx, postEFossil[MaxNFossils]={0};
    double au=data.rgenepara[0], bu=data.rgenepara[1], a=data.rgenepara[2];
    char timestr[36];
 
@@ -3854,6 +3866,7 @@ int MCMC (FILE* fout)
          nround = 0;
          zero(mcmc.Pjump, mcmc.nfinetune);
          zero(mx, com.np); 
+         mlnL = 0;
          testlnL = 1;
          if(fabs(lnL-lnpData(data.lnpDi)) > 0.001) {
             printf("\n%12.6f = %12.6f?  Resetting lnL\n", lnL, lnpData(data.lnpDi));
@@ -3885,6 +3898,7 @@ int MCMC (FILE* fout)
          mcmc.Pjump[j] = (mcmc.Pjump[j]*(nround-1) + mcmc.accept[j])/nround;
       if(mcmc.print) collectx(fmcmc, x);
       for(j=0; j<com.np; j++) mx[j] = (mx[j]*(nround-1) + x[j])/nround;
+      mlnL = (mlnL*(nround-1)+lnL/BFbeta)/nround;
 
       if(data.pfossilerror[0])
          getPfossilerr(postEFossil, nround);
@@ -3892,7 +3906,7 @@ int MCMC (FILE* fout)
       if(mcmc.print && ir>=0 && (ir==0 || (ir+1)%mcmc.sampfreq==0)) {
          fprintf(fmcmc,"%d", ir+1);   
          for(j=0;j<com.np; j++) fprintf(fmcmc,"\t%.7f",x[j]);
-         if(mcmc.usedata) fprintf(fmcmc,"\t%.3f",lnL);
+         if(mcmc.usedata) fprintf(fmcmc,"\t%.3f", lnL/BFbeta);
          FPN(fmcmc);
       }
       if((ir+1)%max2(mcmc.sampfreq, mcmc.sampfreq*mcmc.nsample/100)==0) {
@@ -3904,7 +3918,7 @@ int MCMC (FILE* fout)
          FOR(j,nxpr[0]) printf(" %5.3f", mx[j]);
          if(com.np>nxpr[0]+nxpr[1] && nxpr[1]) printf(" -");
          FOR(j,nxpr[1]) printf(" %5.3f", mx[com.np-nxpr[1]+j]);
-         if(mcmc.usedata) printf(" %4.1f", lnL);
+         if(mcmc.usedata) printf(" %4.1f", mlnL);
       }
 
       if(mcmc.sampfreq*mcmc.nsample>20 && (ir+1)%(mcmc.sampfreq*mcmc.nsample/20)==0) {
@@ -3923,6 +3937,7 @@ int MCMC (FILE* fout)
 
    if(mcmc.print) fclose(fmcmc);
 
+   if(BFbeta!=1) printf("\nBFbeta = %8.6f  E_b(lnf(X)) = %9.4f\n", BFbeta, mlnL);
    printf("\nTime used: %s", printtime(timestr));
    fprintf(fout,"\nTime used: %s", printtime(timestr));
 
diff --git a/src/paml.h b/src/paml.h
index ce94274..d80a329 100644
--- a/src/paml.h
+++ b/src/paml.h
@@ -51,6 +51,7 @@ double reflect(double x, double a, double b);
 #define rndexp(mean) (-(mean)*log(rndu()))
 double rnduM0V1 (void);
 double rndNormal(void);
+int rndBinomial(int n, double p);
 double rndBox(void);
 double rndAirplane(void);
 double rndStrawhat(void);
@@ -82,8 +83,8 @@ double QuantileChi2 (double prob, double v);
 double  PDFGamma(double x, double alpha, double beta);
 #define CDFGamma(x,alpha,beta) IncompleteGamma((beta)*(x),alpha,LnGamma(alpha))
 double logPriorRatioGamma(double xnew, double xold, double a, double b);
-double  PDF_InverseGamma(double x, double alpha, double beta);
-#define CDF_InverseGamma(x,alpha,beta) (1-CDFGamma(1/(x),alpha,beta))
+double  PDFinvGamma(double x, double alpha, double beta);
+#define CDFinvGamma(x,alpha,beta) (1-CDFGamma(1/(x),alpha,beta))
 #define CDFChi2(x,v) CDFGamma(x,(v)/2.0,0.5)
 double PDFBeta(double x, double p, double q);
 double CDFBeta(double x, double p, double q, double lnbeta);
@@ -118,7 +119,7 @@ double probBinomial (int n, int k, double p);
 double probBetaBinomial (int n, int k, double p, double q);
 double factorial (int n);
 double Binomial(double n, int k, double *scale);
-
+int BinomialK(double alpha, int n, double C[], double S[]);
 int GaussLegendreRule(const double **x, const double **w, int order);
 int GaussLaguerreRule(const double **x, const double **w, int order);
 double NIntegrateGaussLegendre(double(*fun)(double x), double a, double b, int order);
@@ -396,6 +397,6 @@ enum {PrBranch=1, PrNodeNum=2, PrLabel=4, PrNodeStr=8, PrAge=16, PrOmega=32} Out
 
 #define FullSeqNames      0   /* 1: numbers at the beginning of sequence name are part of name */
 
-#define pamlVerStr "paml version 4.9, March 2015"
+#define pamlVerStr "paml version 4.9d, February 2017"
 
 #endif
diff --git a/src/pamp.c b/src/pamp.c
index 53830fa..de1445f 100644
--- a/src/pamp.c
+++ b/src/pamp.c
@@ -1,645 +1,645 @@
-/* PAMP.c, Copyright, Ziheng Yang, April 1995.
-   Specify the sequence type in the file pamp.ctl.  Results go into mp.
-
-                    gcc -o pamp pamp.c tools.o
-                    pamp <ControlFileName>
-*/
-
-#include "paml.h"
-
-#define NS            2000
-#define NBRANCH       (NS*2-2)
-#define NNODE         (NS*2-1)
-#define MAXNSONS      10
-#define NGENE         2000
-#define LSPNAME       30
-#define NCODE         20
-#define NCATG         16
-
-double DistanceREV (double Ft[], int n, double alpha, double Root[], double U[],
-   double V[], double pi[], double space[], int *cond);
-int PMatBranch (double Ptb[], int n, double branch[], 
-    double Root[], double U[], double V[], double space[]);
-int PatternLS (FILE *fout, double Ft[],double alpha, double space[], int *cond);
-int testx (double x[], int np);
-int GetOptions (char *ctlf);
-int AlphaMP (FILE* fout);
-int PatternMP (FILE *fout, double Ft[]);
-int PathwayMP1 (FILE *fout, int *maxchange, int NSiteChange[], 
-    double Ft[], double space[], int job);
-double lfunAlpha_Sullivan (double x);
-double lfunAlpha_YK96 (double x);
-
-struct CommonInfo {
-   unsigned char *z[NS];
-   char *spname[NS], seqf[256],outf[256],treef[256];
-   int seqtype, ns, ls, ngene, posG[NGENE+1],lgene[NGENE],*pose,npatt, readpattern;
-   int np, ntime, ncode,fix_kappa,fix_rgene,fix_alpha, clock, model, ncatG, cleandata;
-   int print, nhomo;
-   double *fpatt, *conP;
-   /* not used */
-   double lmax,pi[NCODE], kappa,alpha,rou, rgene[NGENE],piG[NGENE][NCODE];
-}  com;
-struct TREEB {
-   int nbranch, nnode, root, branches[NBRANCH][2];
-   double lnL;
-}  tree;
-struct TREEN {
-   int father, nson, sons[MAXNSONS], ibranch;
-   double branch, age, label, *conP;
-   char *nodeStr, fossil;
-}  *nodes;
-
-
-#define NCATCHANGE 100
-extern int noisy, *ancestor;
-extern double *SeqDistance;
-int maxchange, NSiteChange[NCATCHANGE];
-double MuChange;
-int LASTROUND=0; /* no use for this */
-
-#define LSDISTANCE
-#define REALSEQUENCE
-#define NODESTRUCTURE
-#define RECONSTRUCTION
-#define PAMP
-#include "treesub.c"
-
-int main (int argc, char *argv[])
-{
-   FILE *ftree, *fout, *fseq;
-   char ctlf[32]="pamp.ctl";
-   char *Seqstr[]={"nucleotide", "", "amino-acid", "Binary"};
-   int itree, ntree, i, j, s3;
-   double *space, *Ft;
-
-   com.nhomo=1;  com.print=1;
-   noisy=2;  com.ncatG=8;   com.clock=0; com.cleandata=1;
-   starttimer();
-   GetOptions(ctlf);
-   if(argc>1) { strcpy(ctlf, argv[1]); printf("\nctlfile set to %s.\n",ctlf);}
-
-   printf("PAMP in %s\n", pamlVerStr);
-   if ((fseq=fopen(com.seqf, "r"))==NULL) error2 ("seqfile err.");
-   if ((fout=fopen (com.outf, "w"))==NULL) error2("outfile creation err.");
-   if((fseq=fopen (com.seqf,"r"))==NULL)  error2("No sequence file!");
-   ReadSeq (NULL, fseq, com.cleandata, 0);
-   SetMapAmbiguity();
-   i=(com.ns*2-1)*sizeof(struct TREEN);
-   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
-
-   fprintf (fout,"PAMP %15s, %s sequences\n", com.seqf, Seqstr[com.seqtype]);
-   if (com.nhomo) fprintf (fout, "nonhomogeneous model\n");
-
-   space = (double*)malloc(1000000*sizeof(double));  /* not safe */
-   SeqDistance=(double*)malloc(com.ns*(com.ns-1)/2*sizeof(double));
-   ancestor=(int*)malloc(com.ns*(com.ns-1)/2*sizeof(int));
-   if (SeqDistance==NULL||ancestor==NULL) error2("oom");
-
-   i = com.ns*(com.ns-1)/2;
-   s3 = sizeof(double)*((com.ns*2-2)*(com.ns*2-2 + 4 + i) + i);
-   s3 = max2(s3, com.ncode*com.ncode*(2*com.ns-2+1)*(int)sizeof(double));
-
-   Ft = (double*) malloc(s3);
-   if (space==NULL || Ft==NULL)  error2 ("oom space");
-
-   InitializeBaseAA (fout);
-   if (com.ngene>1) error2 ("option G not allowed yet");
-
-/*
-   PatternLS (fout, Ft, 0., space, &i);
-   printf ("\nPairwise estimation of rate matrix done..\n");
-   fflush(fout);
-*/
-   ftree=gfopen (com.treef,"r");
-   fscanf (ftree, "%d%d", &i, &ntree);
-   if (i!=com.ns) error2 ("ns in the tree file");
-
-   for(itree=0; itree<ntree; itree++) {
-
-      printf ("\nTREE # %2d\n", itree+1);
-      fprintf (fout,"\nTREE # %2d\n", itree+1);
-
-      if (ReadTreeN (ftree, &i,&j, 0, 1)) error2 ("err tree..");
-      OutTreeN (F0, 0, 0);    FPN (F0); 
-      OutTreeN (fout, 0, 0);  FPN (fout);
-
-      for (i=0,maxchange=0; i<NCATCHANGE; i++) NSiteChange[i]=0;
-
-      PathwayMP1 (fout, &maxchange, NSiteChange, Ft, space, 0);
-      printf ("\nHartigan reconstruction done..\n");
-
-      fprintf (fout, "\n\n(1) Branch lengths and substitution pattern\n");
-      PatternMP (fout, Ft);
-      printf ("pattern done..\n");    fflush(fout);
-
-      fprintf (fout, "\n\n(2) Gamma parameter\n");
-      AlphaMP (fout);
-      printf ("gamma done..\n");      fflush(fout);
-
-      fprintf (fout, "\n\n(3) Parsimony reconstructions\n");
-      PathwayMP1 (fout, &maxchange, NSiteChange, Ft, space, 1);
-      printf ("Yang reconstruction done..\n");    fflush(fout);
-   }
-   free(nodes);
-   return (0);
-}
-
-int GetOptions (char *ctlf)
-{
-   int iopt, nopt=6, i, lline=4096, t;
-   char line[4096], *pline, opt[32], *comment="*#";
-   char *optstr[] = {"seqfile","outfile","treefile", "seqtype", "ncatG", "nhomo"};
-   FILE  *fctl=gfopen (ctlf, "r");
-
-   if (fctl) {
-      for (;;) {
-         if (fgets (line, lline, fctl) == NULL) break;
-         for (i=0,t=0; i<lline&&line[i]; i++)
-            if (isalnum(line[i]))  { t=1; break; }
-            else if (strchr(comment,line[i])) break;
-         if (t==0) continue;
-         sscanf (line, "%s%*s%d", opt, &t);
-         if ((pline=strstr(line, "="))==NULL) error2 ("option file.");
-
-         for (iopt=0; iopt<nopt; iopt++) {
-            if (strncmp(opt, optstr[iopt], 8)==0)  {
-               if (noisy>2)
-                  printf ("\n%3d %15s | %-20s %6d", iopt+1,optstr[iopt],opt,t);
-               switch (iopt) {
-                  case ( 0): sscanf(pline+2, "%s", com.seqf);    break;
-                  case ( 1): sscanf(pline+2, "%s", com.outf);    break;
-                  case ( 2): sscanf(pline+2, "%s", com.treef);    break;
-                  case  (3): com.seqtype=t;   break;
-                  case  (4): com.ncatG=t;     break;
-                  case  (5): com.nhomo=t;     break;
-               }
-               break;
-            }
-         }
-         if (iopt==nopt)
-            { printf ("\nopt %s in %s\n", opt, ctlf);  exit (-1); }
-      }
-      fclose (fctl);
-   }
-   else
-      if (noisy) printf ("\nno ctl file..");
-
-   if (com.seqtype==0)       com.ncode=4;
-   else if (com.seqtype==2)  com.ncode=20;
-   else if (com.seqtype==3)  com.ncode=2;
-   else                      error2("seqtype");
-   if (com.ncatG>NCATG) error2 ("raise NCATG?");
-   return (0);
-}
-
-
-int AlphaMP (FILE* fout)
-{
-   int k, ntotal;
-   double x, xb[2], lnL, var;
-
-   xb[0]=1e-3; xb[1]=99; /* alpha */
- 
-   fprintf (fout, "\n# changes .. # sites");
-   for (k=0,ntotal=0,MuChange=var=0; k<maxchange+1; k++) {
-      fprintf (fout, "\n%6d%10d", k, NSiteChange[k]);
-      ntotal+=NSiteChange[k];  MuChange+=k*NSiteChange[k];   
-      var+=k*k*NSiteChange[k];
-   }
-   MuChange/=ntotal;   
-   var=(var-MuChange*MuChange*ntotal)/(ntotal-1.);
-   x=MuChange*MuChange/(var-MuChange);
-   fprintf (fout, "\n\n# sites%6d,  total changes%6d\nmean-var%9.4f%9.4f",
-            ntotal, (int)(ntotal*MuChange+.5), MuChange, var);
-   fprintf (fout, "\nalpha (method of moments)%9.4f", x);
-   if (x<=0) x=9;
-
-   LineSearch(lfunAlpha_Sullivan, &lnL, &x, xb, 0.02, 1e-8);
-   fprintf (fout, "\nalpha (Sullivan et al. 1995)%9.4f\n", x);
-
-   MuChange/=tree.nbranch; 
-   LineSearch(lfunAlpha_YK96, &lnL, &x, xb, 0.02, 1e-8);
-   fprintf (fout, "alpha (Yang & Kumar 1995, ncatG= %d)%9.4f\n", com.ncatG,x);
-   return (0);
-}
-
-double lfunAlpha_Sullivan (double x)
-{
-   int k;
-   double lnL=0, a=x, t;
-
-   FOR (k, maxchange+1) { 
-      if (NSiteChange[k]==0) continue;
-      t=-a*log(1+MuChange/a);
-      if (k)  
-         t+=LnGamma(k+a)-LnGamma(k+1.) - LnGamma(a) 
-          + k*log(MuChange/a/(1+MuChange/a));
-      lnL += NSiteChange[k]*t;
-   }
-   return(-lnL);
-}
-
-double lfunAlpha_YK96 (double x)
-{
-   int k, ir, b=tree.nbranch, n=com.ncode;
-   double lnL=0, prob, a=x, t=MuChange, p;
-   double freqK[NCATG], rK[NCATG];
-
-   DiscreteGamma (freqK, rK, a, a, com.ncatG, 0);
-   FOR (k, maxchange+1) {
-      if (NSiteChange[k]==0) continue;
-      for (ir=0,prob=0; ir<com.ncatG; ir++) {
-         p=1./n+(n-1.)/n*exp(-n/(n-1.)*rK[ir]*t);
-         prob+=freqK[ir]*pow(p,(double)(b-k))*pow((1-p)/(n-1.),(double)k);
-      }
-      lnL += NSiteChange[k]*log(prob);
-   }
-   return (-lnL);
-}
-
-
-int OutQ (FILE *fout, int n, double Q[], double pi[], double Root[],
-    double U[], double V[], double space[])
-{
-   char aa3[4]="";
-   int i,j;
-   double *T1=space, t;
-
-   fprintf(fout,"\nrate matrix Q: Qij*dt = prob(i->j; dt)\n");
-   if (n<=4) {
-/*      matout (fout, pi, 1, n); */
-      matout (fout, Q, n, n);
-      if (n==4) {
-         fprintf (fout, "Order: T, C, A, G");
-         t=pi[0]*Q[0*4+1]+pi[1]*Q[1*4+0]+pi[2]*Q[2*4+3]+pi[3]*Q[3*4+2];
-         fprintf (fout, "\nAverage Ts/Tv =%9.4f\n", t/(1-t));
-      }
-   }
-   else if (n==20) {
-      for (i=0; i<n; i++,FPN(fout))
-         FOR (j,n) fprintf (fout, "%6.0f", Q[i*n+j]*100);
-/*
-      FOR (i,n) {
-         fprintf (fout,"\n%-4s", getAAstr(aa3,i));
-         FOR (j,i) fprintf (fout, "%4.0f", Q[i*n+j]/pi[j]*100);
-         fprintf (fout, "%4.0f", -Q[i*n+i]*100);
-      }
-      fputs("\n     ",fout);  FOR(i,naa) fprintf(fout,"%5s",getAAstr(aa3,i));
-*/
-      fprintf (fout, "\n\nPAM matrix, P(0.01)\n"); 
-      FOR (i,n) FOR (j,n) T1[i*n+j]=U[i*n+j]*exp(0.01*Root[j]);
-      matby (T1, V, Q, n, n, n);
-      FOR (i,n*n) if (Q[i]<0) Q[i]=0;
-      FOR (i,n) {
-         fprintf (fout,"\n%-4s", getAAstr(aa3,i));
-         FOR(j,n) fprintf(fout, "%6.0f", Q[i*n+j]*10000);
-      }
-      fputs("\n     ",fout);  FOR(i,n) fprintf(fout,"%5s",getAAstr(aa3,i));
-   }
-   return (0);
-}
-
-int PMatBranch (double Ptb[], int n, double branch[], 
-    double Root[], double U[], double V[], double space[])
-{
-/* homogeneised transition prob matrix, with one Q assumed for the whole tree
-*/
-   int i, j, k;
-   double *T1=space, *P;
-
-   FOR (k, tree.nbranch) {
-      P=Ptb+k*n*n;
-      FOR (i,n) FOR (j,n) T1[i*n+j]=U[i*n+j]*exp(Root[j]*branch[k]);
-      matby (T1, V, P, n, n, n);
-      FOR (i,n*n) if (P[i]<0) P[i]=0;
-/*
-      printf ("\nbranch %d, P(%.5f)", k+1, branch[k]);
-      matout (F0, P, n, n);
-      testTransP (P, n);
-*/
-   }
-   return (0);
-}
-
-
-int PatternMP (FILE *fout, double Ft[])
-{
-/* Ft[]: input counts for the F(t) matrix for each branch, output P(t) 
-*/
-   int n=com.ncode, i,j,k;
-   double *Q, *pi, *Root, *U, *V, *branch, *space, *T1, t;
-
-   if((Q=(double*)malloc((n*n*6+tree.nbranch)*sizeof(double)))==NULL)
-      error2("PathwayMP: oom");
-   pi=Q+n*n; Root=pi+n; U=Root+n; V=U+n*n; branch=V+n*n; 
-   space=T1=branch+tree.nbranch;
-
-   for (k=0; k<tree.nbranch; k++) {  /* branch lengths */
-      xtoy(Ft+k*n*n, Q, n*n);
-      branch[k]=nodes[tree.branches[k][1]].branch=
-         DistanceREV(Q, n, 0, Root, U, V, pi, space, &j);
-   }
-   OutTreeB (fout);  FPN (fout);
-   FOR (i, tree.nbranch) fprintf(fout,"%9.5f", branch[i]);
-   fprintf (fout,"\ntree length: %9.5f\n", sum(branch,tree.nbranch));
-
-   /* pattern Q from average F(t) */
-   fprintf(fout,"\nF(t)");
-   xtoy (Ft+tree.nbranch*n*n, Q, n*n);
-   matout2 (fout, Q, n, n, 12, 2);
-   DistanceREV (Q, n, 0, Root, U, V, pi, space, &j);
-   if (noisy>=3&&j==-1) { puts("F(t) modified in DistanceREV"); }
-
-   OutQ (fout, n, Q, pi, Root, U, V, T1);
-   if (com.nhomo==0) 
-      PMatBranch (Ft, n, branch, Root, U, V, space);
-   else {
-      for (k=0; k<tree.nbranch; k++) {
-         for (i=0; i<n; i++) {
-            t=sum(Ft+k*n*n+i*n, n);
-            if (t>1e-5) abyx (1/t, Ft+k*n*n+i*n, n);
-            else        Ft[k*n*n+i*n+i]=1;
-         }
-      }
-   }
-   free(Q);
-   return (0); 
-}
-
-
-int PathwayMP1 (FILE *fout, int *maxchange, int NSiteChange[], 
-    double Ft[], double space[], int job)
-{
-/* Hartigan, JA.  1973.  Minimum mutation fits to a given tree. 
-   Biometrics, 29:53-65.
-   Yang, Z.  1996.  
-   job=0: 1st pass: calculates maxchange, NSiteChange[], and Ft[]
-   job=1: 2nd pass: reconstructs ancestral character states (->fout)
-*/
-   char *pch=(com.seqtype==0?BASEs:(com.seqtype==2?AAs:BINs));
-   char *zz[NNODE],nodeb[NNODE],bestPath[NNODE-NS],Equivoc[NS-1];
-   int n=com.ncode, nid=tree.nbranch-com.ns+1, it,i1,i2, i,j,k, h, hp,npath;
-   int *Ftt=NULL, nchange, nchange0, visit[NS-1]={0};
-   double sumpr, bestpr, pr, *pnode=NULL, *pnsite;
-
-
-   fputs("\nList of most parsimonious reconstructions (MPRs) at each site: #MPRs (#changes)\n",fout);
-   fputs("and then the most likely reconstruction out of the MPRs and its probability\n",fout);
-   if((pnsite=(double*)malloc((com.ns-1)*n*sizeof(double)))==NULL)
-      error2("PathwayMP1: oom");
-
-   PATHWay=(char*)malloc(nid*(n+3)*sizeof(char));
-   NCharaCur=PATHWay+nid;  ICharaCur=NCharaCur+nid;  CharaCur=ICharaCur+nid;
-   if (job==0) {
-      zero(Ft,n*n*(tree.nbranch+1));
-      if((Ftt=(int*)malloc(n*n*tree.nbranch*sizeof(int)))==NULL) error2("oom");
-   }
-   else {
-      pnode=(double*)malloc((nid*com.npatt+1)*(sizeof(double)+sizeof(char)));
-      FOR (j,nid) zz[com.ns+j]=(char*)(pnode+nid*com.npatt)+j*com.npatt;
-      FOR (j,com.ns) zz[j]=com.z[j];
-      if (pnode==NULL) error2 ("oom");
-   }
-   for (j=0,visit[i=0]=tree.root-com.ns; j<tree.nbranch; j++) 
-      if (tree.branches[j][1]>=com.ns) 
-         visit[++i]=tree.branches[j][1]-com.ns;
-
-   for (h=0; h<com.ls; h++) {
-      hp=com.pose[h];
-      if (job==1) {
-         fprintf (fout, "\n%4d  ", h+1);
-         FOR (j, com.ns) fprintf (fout, "%c", pch[com.z[j][hp]]);
-         fprintf (fout, ":  ");
-         FOR (j,nid*n) pnsite[j]=0;
-      }
-      FOR (j,com.ns) nodeb[j]=com.z[j][hp];
-      if (job==0) FOR (j,n*n*tree.nbranch) Ftt[j]=0;
-
-      InteriorStatesMP (1, hp, &nchange, NCharaCur, CharaCur, space); 
-      ICharaCur[j=tree.root-com.ns]=0;  PATHWay[j]=CharaCur[j*n+0];
-      FOR (j,nid) Equivoc[j]=(NCharaCur[j]>1);
-
-      if (nchange>*maxchange) *maxchange=nchange;
-      if (nchange>NCATCHANGE-1) error2 ("raise NCATCHANGE");
-
-      NSiteChange[nchange]++;
-      /* NSiteChange[nchange]+=(int)com.fpatt[hp]; */
-
-      DownStates (tree.root);
-      for (npath=0,sumpr=bestpr=0; ;) {
-         for (j=0,k=visit[nid-1]; j<NCharaCur[k]; j++) {
-            PATHWay[k]=CharaCur[k*n+j]; npath++;
-            FOR (i,nid) nodeb[i+com.ns]=PATHWay[i];
-            if (job==1) {
-               FOR (i,nid) fprintf(fout,"%c",pch[PATHWay[i]]); fputc(' ',fout);
-               pr=com.pi[(int)nodeb[tree.root]];
-               for (i=0; i<tree.nbranch; i++) {
-                  i1=nodeb[tree.branches[i][0]];
-                  i2=nodeb[tree.branches[i][1]];
-                  pr*=Ft[i*n*n+i1*n+i2];
-               }
-               sumpr+=pr;
-               FOR (i,nid) pnsite[i*n+nodeb[i+com.ns]]+=pr;
-               if (pr>bestpr) 
-                  { bestpr=pr; FOR(i,nid) bestPath[i]=PATHWay[i];}
-            }
-            else {
-               for (i=0,nchange0=0; i<tree.nbranch; i++) {
-                  i1=nodeb[tree.branches[i][0]]; 
-                  i2=nodeb[tree.branches[i][1]];
-                  if(i1!=i2) nchange0++;
-                  Ftt[i*n*n+i1*n+i2]++;
-               }
-               if (nchange0!=nchange) {
-                  printf("\a\nerr:PathwayMP %d != %d", nchange, nchange0); 
-                  fprintf(fout,".%d. ", nchange0); /* ??? */
-               }
-            }
-         }
-         for (j=nid-2; j>=0; j--) {
-            if(Equivoc[k=visit[j]] == 0) continue;
-            if (ICharaCur[k]+1<NCharaCur[k]) {
-               PATHWay[k] = CharaCur[k*n + (++ICharaCur[k])];
-               DownStates (k+com.ns);
-               break;
-            }
-            else { /* if (next equivocal node is not ancestor) update node k */
-               for (i=j-1; i>=0; i--) if (Equivoc[(int)visit[i]]) break;
-               if (i>=0) { 
-                  for (it=k+com.ns,i=visit[i]+com.ns; ; it=nodes[it].father)
-                     if (it==tree.root || nodes[it].father==i) break;
-                  if (it==tree.root)
-                     DownStatesOneNode (k+com.ns, nodes[k+com.ns].father);
-               }
-            }
-         }
-         if (j<0) break;
-      }      /* for (npath) */
-/*
-      printf ("\rsite pattern %4d/%4d: %6d%6d", hp+1,com.npatt,npath,nchange);
-*/      
-      if (job==0) 
-         FOR (j,n*n*tree.nbranch) Ft[j]+=(double)Ftt[j]/npath*com.fpatt[hp];
-      else {
-         FOR (i,nid) zz[com.ns+i][hp]=bestPath[i];
-         FOR (i,nid) pnode[i*com.npatt+hp]=pnsite[i*n+bestPath[i]]/sumpr;
-         fprintf (fout, " |%4d (%d) | ", npath, nchange);
-         if (npath>1) {
-            FOR (i,nid) fprintf (fout, "%c", pch[bestPath[i]]);
-            fprintf (fout, " (%.3f)", bestpr/sumpr);
-
-         }
-      }
-   }   /* for (h) */
-   free(PATHWay); 
-   if (job==0) {
-      free(Ftt);
-      FOR (i,tree.nbranch) FOR (j,n*n) Ft[tree.nbranch*n*n+j]+=Ft[i*n*n+j];
-   }
-   else {
-      fprintf (fout,"\n\nApprox. relative accuracy at each node, by site\n");
-      FOR (h, com.ls) {
-         hp=com.pose[h];
-         fprintf (fout,"\n%4d  ", h+1);
-         FOR (j, com.ns) fprintf (fout, "%c", pch[com.z[j][hp]]);
-         fprintf (fout, ":  ");
-         FOR (i,nid) if (pnode[i*com.npatt+hp]<.99999) break;
-         if (i<nid)  FOR (j, nid) 
-            fprintf(fout,"%c (%5.3f) ", pch[zz[j][hp]],pnode[j*com.npatt+hp]);
-      }
-      /* Site2Pattern (fout); */
-      fprintf (fout,"\n\nlist of extant and reconstructed sequences\n\n");
-      for(j=0;j<tree.nnode;j++,FPN(fout)) {
-         if(j<com.ns) fprintf(fout,"%-20s", com.spname[j]);
-         else         fprintf(fout,"node #%-14d", j+1);
-         print1seq (fout, zz[j], (com.readpattern?com.npatt:com.ls), com.pose);
-      }
-      free(pnode);
-   }
-   free(pnsite);
-   return (0);
-}
-
-double DistanceREV (double Ft[], int n,double alpha,double Root[],double U[],
-   double V[], double pi[], double space[], int *cond)
-{
-/* input:  Ft, n, alpha
-   output: Q(in Ft), t, Root, U, V, and cond
-   space[n*n*2]
-*/
-   int i,j, InApplicable;
-   double *Q=Ft, *T1=space, *T2=space+n*n, t, pi_sqrt[20], small=0.1/com.ls;
-
-   for (i=0,t=0; i<n; i++) FOR (j,n) if (i-j) t+=Q[i*n+j];
-   if (t<small)  { *cond=1; zero(Q,n*n); return (0); }
-
-   for(i=0;i<n;i++) for (j=0;j<i;j++) 
-      Q[i*n+j]=Q[j*n+i]=(Q[i*n+j]+Q[j*n+i])/2;
-
-   abyx(1./sum(Q,n*n), Q, n*n);
-   for(i=0;i<n;i++) {
-      pi[i]=sum(Q+i*n, n);
-      if(pi[i]>small) 
-         abyx(1/pi[i], Q+i*n, n); 
-   }
-
-   eigenQREV(Q, pi, n, Root, U, V, pi_sqrt);
-   for(i=0,InApplicable=0; i<n; i++) {
-      if (Root[i]<=0)  {
-         InApplicable=1;
-         Root[i]=-300;  /* adhockery */
-      }
-      else 
-         Root[i]=(alpha<=0?log(Root[i]):gammap(Root[i],alpha));
-   }
-   FOR (i,n) FOR (j,n) T1[i*n+j]=U[i*n+j]*Root[j];
-   matby (T1, V, Q, n, n, n);
-   for (i=0,t=0; i<n; i++) t-=pi[i]*Q[i*n+i];
-
-   if(noisy>=9 && InApplicable) printf("Root(P)<0.  adhockery invoked\n"); 
-   if(t<=0) error2("err: DistanceREV");
-
-   FOR (i,n) Root[i]/=t;
-   FOR (i, n) FOR (j,n)  { Q[i*n+j]/=t; if (i-j) Q[i*n+j]=max2(0,Q[i*n+j]); }
-
-   return (t);
-}
-
-
-int PatternLS (FILE *fout, double Ft[], double alpha,double space[],int *cond)
-{
-/* space[n*n*2]
-*/
-   int n=com.ncode, i,j,k,h, it;
-   double *Q=Ft,*Qt=Q+n*n,*Qm=Qt+n*n;
-   double *pi,*Root,*U, *V, *T1=space, *branch, t;
-   FILE *fdist=gfopen("Distance", "w");
-   
-   if((pi=(double*)malloc((n*n*3+tree.nbranch)*sizeof(double)))==NULL)
-      error2("PatternLS: oom");
-   Root=pi+n;  U=Root+n; V=U+n*n; branch=V+n*n;
-
-   *cond=0;
-   for (i=0,zero(Qt,n*n),zero(Qm,n*n); i<com.ns; i++) {
-      for (j=0; j<i; j++) {
-         for (h=0,zero(Q,n*n); h<com.npatt; h++) {
-	    Q[(com.z[i][h])*n+com.z[j][h]] += com.fpatt[h]/2;
-            Q[(com.z[j][h])*n+com.z[i][h]] += com.fpatt[h]/2;
-	 }
-         FOR (k,n*n) Qt[k]+=Q[k]/(com.ns*(com.ns-1)/2);
-         it=i*(i-1)/2+j;
-	 SeqDistance[it]=DistanceREV (Q, n, alpha, Root,U,V, pi, space, &k);
-
-         if (k==-1) { 
-            *cond=-1; printf("\n%d&%d: F(t) modified in DistanceREV",i+1,j+1);
-         }
-
-	 fprintf(fdist,"%9.5f",SeqDistance[it]);
-/*
-FOR (k,n) 
-if (Q[k*n+k]>0) { printf ("%d %d %.5f\n", i+1, j+1, Q[k*n+k]); }
-*/
-         FOR (k,n*n) Qm[k]+=Q[k]/(com.ns*(com.ns-1)/2); 
-      }
-      FPN(fdist);
-   }
-   fclose (fdist);
-   DistanceREV (Qt, n, alpha, Root, U, V, pi, space, &k);
-   if (k==-1) { puts ("F(t) modified in DistanceREV"); }
-
-   fprintf (fout, "\n\nQ: from average F over pairwise comparisons");
-   OutQ(fout, n, Qt, pi, Root, U, V, T1);
-   fprintf (fout, "\n\nQ: average of Qs over pairwise comparisons\n");
-   fprintf (fout, "(disregard this if very different from the previous Q)");
-   OutQ (fout, n, Qm, pi, Root, U, V, T1);
-
-   if (tree.nbranch) {
-      fillxc (branch, 0.1, tree.nbranch);
-      LSDistance (&t, branch, testx);
-      OutTreeB (fout);  FPN (fout);
-      FOR (i,tree.nbranch) fprintf(fout,"%9.5f", branch[i]);
-      PMatBranch (Ft, com.ncode, branch, Root, U, V, space);
-   }
-   free(pi);
-   return (0);
-}
-
-int testx (double x[], int np)
-{
-   int i;
-   double tb[]={1e-5, 99};
-   FOR(i,np) if(x[i]<tb[0] ||x[i]>tb[1]) return(-1);
-   return(0);
-}
-
-int SetBranch (double x[])
-{
-   int i, status=0;
-   double small=1e-5;
-
-   FOR (i,tree.nnode)
-      if (i!=tree.root && (nodes[i].branch=x[nodes[i].ibranch])<-small)
-         status=-1;
-   return (status);
-}
+/* PAMP.c, Copyright, Ziheng Yang, April 1995.
+   Specify the sequence type in the file pamp.ctl.  Results go into mp.
+
+                    gcc -o pamp pamp.c tools.o
+                    pamp <ControlFileName>
+*/
+
+#include "paml.h"
+
+#define NS            2000
+#define NBRANCH       (NS*2-2)
+#define NNODE         (NS*2-1)
+#define MAXNSONS      10
+#define NGENE         2000
+#define LSPNAME       30
+#define NCODE         20
+#define NCATG         16
+
+double DistanceREV (double Ft[], int n, double alpha, double Root[], double U[],
+   double V[], double pi[], double space[], int *cond);
+int PMatBranch (double Ptb[], int n, double branch[], 
+    double Root[], double U[], double V[], double space[]);
+int PatternLS (FILE *fout, double Ft[],double alpha, double space[], int *cond);
+int testx (double x[], int np);
+int GetOptions (char *ctlf);
+int AlphaMP (FILE* fout);
+int PatternMP (FILE *fout, double Ft[]);
+int PathwayMP1 (FILE *fout, int *maxchange, int NSiteChange[], 
+    double Ft[], double space[], int job);
+double lfunAlpha_Sullivan (double x);
+double lfunAlpha_YK96 (double x);
+
+struct CommonInfo {
+   unsigned char *z[NS];
+   char *spname[NS], seqf[256],outf[256],treef[256];
+   int seqtype, ns, ls, ngene, posG[NGENE+1],lgene[NGENE],*pose,npatt, readpattern;
+   int np, ntime, ncode,fix_kappa,fix_rgene,fix_alpha, clock, model, ncatG, cleandata;
+   int print, nhomo;
+   double *fpatt, *conP;
+   /* not used */
+   double lmax,pi[NCODE], kappa,alpha,rou, rgene[NGENE],piG[NGENE][NCODE];
+}  com;
+struct TREEB {
+   int nbranch, nnode, root, branches[NBRANCH][2];
+   double lnL;
+}  tree;
+struct TREEN {
+   int father, nson, sons[MAXNSONS], ibranch;
+   double branch, age, label, *conP;
+   char *nodeStr, fossil;
+}  *nodes;
+
+
+#define NCATCHANGE 100
+extern int noisy, *ancestor;
+extern double *SeqDistance;
+int maxchange, NSiteChange[NCATCHANGE];
+double MuChange;
+int LASTROUND=0; /* no use for this */
+
+#define LSDISTANCE
+#define REALSEQUENCE
+#define NODESTRUCTURE
+#define RECONSTRUCTION
+#define PAMP
+#include "treesub.c"
+
+int main (int argc, char *argv[])
+{
+   FILE *ftree, *fout, *fseq;
+   char ctlf[32]="pamp.ctl";
+   char *Seqstr[]={"nucleotide", "", "amino-acid", "Binary"};
+   int itree, ntree, i, j, s3;
+   double *space, *Ft;
+
+   com.nhomo=1;  com.print=1;
+   noisy=2;  com.ncatG=8;   com.clock=0; com.cleandata=1;
+   starttimer();
+   GetOptions(ctlf);
+   if(argc>1) { strcpy(ctlf, argv[1]); printf("\nctlfile set to %s.\n",ctlf);}
+
+   printf("PAMP in %s\n", pamlVerStr);
+   if ((fseq=fopen(com.seqf, "r"))==NULL) error2 ("seqfile err.");
+   if ((fout=fopen (com.outf, "w"))==NULL) error2("outfile creation err.");
+   if((fseq=fopen (com.seqf,"r"))==NULL)  error2("No sequence file!");
+   ReadSeq (NULL, fseq, com.cleandata, 0);
+   SetMapAmbiguity();
+   i=(com.ns*2-1)*sizeof(struct TREEN);
+   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
+
+   fprintf (fout,"PAMP %15s, %s sequences\n", com.seqf, Seqstr[com.seqtype]);
+   if (com.nhomo) fprintf (fout, "nonhomogeneous model\n");
+
+   space = (double*)malloc(1000000*sizeof(double));  /* not safe */
+   SeqDistance=(double*)malloc(com.ns*(com.ns-1)/2*sizeof(double));
+   ancestor=(int*)malloc(com.ns*(com.ns-1)/2*sizeof(int));
+   if (SeqDistance==NULL||ancestor==NULL) error2("oom");
+
+   i = com.ns*(com.ns-1)/2;
+   s3 = sizeof(double)*((com.ns*2-2)*(com.ns*2-2 + 4 + i) + i);
+   s3 = max2(s3, com.ncode*com.ncode*(2*com.ns-2+1)*(int)sizeof(double));
+
+   Ft = (double*) malloc(s3);
+   if (space==NULL || Ft==NULL)  error2 ("oom space");
+
+   InitializeBaseAA (fout);
+   if (com.ngene>1) error2 ("option G not allowed yet");
+
+/*
+   PatternLS (fout, Ft, 0., space, &i);
+   printf ("\nPairwise estimation of rate matrix done..\n");
+   fflush(fout);
+*/
+   ftree=gfopen (com.treef,"r");
+   fscanf (ftree, "%d%d", &i, &ntree);
+   if (i!=com.ns) error2 ("ns in the tree file");
+
+   for(itree=0; itree<ntree; itree++) {
+
+      printf ("\nTREE # %2d\n", itree+1);
+      fprintf (fout,"\nTREE # %2d\n", itree+1);
+
+      if (ReadTreeN (ftree, &i,&j, 0, 1)) error2 ("err tree..");
+      OutTreeN (F0, 0, 0);    FPN (F0); 
+      OutTreeN (fout, 0, 0);  FPN (fout);
+
+      for (i=0,maxchange=0; i<NCATCHANGE; i++) NSiteChange[i]=0;
+
+      PathwayMP1 (fout, &maxchange, NSiteChange, Ft, space, 0);
+      printf ("\nHartigan reconstruction done..\n");
+
+      fprintf (fout, "\n\n(1) Branch lengths and substitution pattern\n");
+      PatternMP (fout, Ft);
+      printf ("pattern done..\n");    fflush(fout);
+
+      fprintf (fout, "\n\n(2) Gamma parameter\n");
+      AlphaMP (fout);
+      printf ("gamma done..\n");      fflush(fout);
+
+      fprintf (fout, "\n\n(3) Parsimony reconstructions\n");
+      PathwayMP1 (fout, &maxchange, NSiteChange, Ft, space, 1);
+      printf ("Yang reconstruction done..\n");    fflush(fout);
+   }
+   free(nodes);
+   return (0);
+}
+
+int GetOptions (char *ctlf)
+{
+   int iopt, nopt=6, i, lline=4096, t;
+   char line[4096], *pline, opt[32], *comment="*#";
+   char *optstr[] = {"seqfile","outfile","treefile", "seqtype", "ncatG", "nhomo"};
+   FILE  *fctl=gfopen (ctlf, "r");
+
+   if (fctl) {
+      for (;;) {
+         if (fgets (line, lline, fctl) == NULL) break;
+         for (i=0,t=0; i<lline&&line[i]; i++)
+            if (isalnum(line[i]))  { t=1; break; }
+            else if (strchr(comment,line[i])) break;
+         if (t==0) continue;
+         sscanf (line, "%s%*s%d", opt, &t);
+         if ((pline=strstr(line, "="))==NULL) error2 ("option file.");
+
+         for (iopt=0; iopt<nopt; iopt++) {
+            if (strncmp(opt, optstr[iopt], 8)==0)  {
+               if (noisy>2)
+                  printf ("\n%3d %15s | %-20s %6d", iopt+1,optstr[iopt],opt,t);
+               switch (iopt) {
+                  case ( 0): sscanf(pline+2, "%s", com.seqf);    break;
+                  case ( 1): sscanf(pline+2, "%s", com.outf);    break;
+                  case ( 2): sscanf(pline+2, "%s", com.treef);    break;
+                  case  (3): com.seqtype=t;   break;
+                  case  (4): com.ncatG=t;     break;
+                  case  (5): com.nhomo=t;     break;
+               }
+               break;
+            }
+         }
+         if (iopt==nopt)
+            { printf ("\nopt %s in %s\n", opt, ctlf);  exit (-1); }
+      }
+      fclose (fctl);
+   }
+   else
+      if (noisy) printf ("\nno ctl file..");
+
+   if (com.seqtype==0)       com.ncode=4;
+   else if (com.seqtype==2)  com.ncode=20;
+   else if (com.seqtype==3)  com.ncode=2;
+   else                      error2("seqtype");
+   if (com.ncatG>NCATG) error2 ("raise NCATG?");
+   return (0);
+}
+
+
+int AlphaMP (FILE* fout)
+{
+   int k, ntotal;
+   double x, xb[2], lnL, var;
+
+   xb[0]=1e-3; xb[1]=99; /* alpha */
+ 
+   fprintf (fout, "\n# changes .. # sites");
+   for (k=0,ntotal=0,MuChange=var=0; k<maxchange+1; k++) {
+      fprintf (fout, "\n%6d%10d", k, NSiteChange[k]);
+      ntotal+=NSiteChange[k];  MuChange+=k*NSiteChange[k];   
+      var+=k*k*NSiteChange[k];
+   }
+   MuChange/=ntotal;   
+   var=(var-MuChange*MuChange*ntotal)/(ntotal-1.);
+   x=MuChange*MuChange/(var-MuChange);
+   fprintf (fout, "\n\n# sites%6d,  total changes%6d\nmean-var%9.4f%9.4f",
+            ntotal, (int)(ntotal*MuChange+.5), MuChange, var);
+   fprintf (fout, "\nalpha (method of moments)%9.4f", x);
+   if (x<=0) x=9;
+
+   LineSearch(lfunAlpha_Sullivan, &lnL, &x, xb, 0.02, 1e-8);
+   fprintf (fout, "\nalpha (Sullivan et al. 1995)%9.4f\n", x);
+
+   MuChange/=tree.nbranch; 
+   LineSearch(lfunAlpha_YK96, &lnL, &x, xb, 0.02, 1e-8);
+   fprintf (fout, "alpha (Yang & Kumar 1995, ncatG= %d)%9.4f\n", com.ncatG,x);
+   return (0);
+}
+
+double lfunAlpha_Sullivan (double x)
+{
+   int k;
+   double lnL=0, a=x, t;
+
+   FOR (k, maxchange+1) { 
+      if (NSiteChange[k]==0) continue;
+      t=-a*log(1+MuChange/a);
+      if (k)  
+         t+=LnGamma(k+a)-LnGamma(k+1.) - LnGamma(a) 
+          + k*log(MuChange/a/(1+MuChange/a));
+      lnL += NSiteChange[k]*t;
+   }
+   return(-lnL);
+}
+
+double lfunAlpha_YK96 (double x)
+{
+   int k, ir, b=tree.nbranch, n=com.ncode;
+   double lnL=0, prob, a=x, t=MuChange, p;
+   double freqK[NCATG], rK[NCATG];
+
+   DiscreteGamma (freqK, rK, a, a, com.ncatG, 0);
+   FOR (k, maxchange+1) {
+      if (NSiteChange[k]==0) continue;
+      for (ir=0,prob=0; ir<com.ncatG; ir++) {
+         p=1./n+(n-1.)/n*exp(-n/(n-1.)*rK[ir]*t);
+         prob+=freqK[ir]*pow(p,(double)(b-k))*pow((1-p)/(n-1.),(double)k);
+      }
+      lnL += NSiteChange[k]*log(prob);
+   }
+   return (-lnL);
+}
+
+
+int OutQ (FILE *fout, int n, double Q[], double pi[], double Root[],
+    double U[], double V[], double space[])
+{
+   char aa3[4]="";
+   int i,j;
+   double *T1=space, t;
+
+   fprintf(fout,"\nrate matrix Q: Qij*dt = prob(i->j; dt)\n");
+   if (n<=4) {
+/*      matout (fout, pi, 1, n); */
+      matout (fout, Q, n, n);
+      if (n==4) {
+         fprintf (fout, "Order: T, C, A, G");
+         t=pi[0]*Q[0*4+1]+pi[1]*Q[1*4+0]+pi[2]*Q[2*4+3]+pi[3]*Q[3*4+2];
+         fprintf (fout, "\nAverage Ts/Tv =%9.4f\n", t/(1-t));
+      }
+   }
+   else if (n==20) {
+      for (i=0; i<n; i++,FPN(fout))
+         FOR (j,n) fprintf (fout, "%6.0f", Q[i*n+j]*100);
+/*
+      FOR (i,n) {
+         fprintf (fout,"\n%-4s", getAAstr(aa3,i));
+         FOR (j,i) fprintf (fout, "%4.0f", Q[i*n+j]/pi[j]*100);
+         fprintf (fout, "%4.0f", -Q[i*n+i]*100);
+      }
+      fputs("\n     ",fout);  FOR(i,naa) fprintf(fout,"%5s",getAAstr(aa3,i));
+*/
+      fprintf (fout, "\n\nPAM matrix, P(0.01)\n"); 
+      FOR (i,n) FOR (j,n) T1[i*n+j]=U[i*n+j]*exp(0.01*Root[j]);
+      matby (T1, V, Q, n, n, n);
+      FOR (i,n*n) if (Q[i]<0) Q[i]=0;
+      FOR (i,n) {
+         fprintf (fout,"\n%-4s", getAAstr(aa3,i));
+         FOR(j,n) fprintf(fout, "%6.0f", Q[i*n+j]*10000);
+      }
+      fputs("\n     ",fout);  FOR(i,n) fprintf(fout,"%5s",getAAstr(aa3,i));
+   }
+   return (0);
+}
+
+int PMatBranch (double Ptb[], int n, double branch[], 
+    double Root[], double U[], double V[], double space[])
+{
+/* homogeneised transition prob matrix, with one Q assumed for the whole tree
+*/
+   int i, j, k;
+   double *T1=space, *P;
+
+   FOR (k, tree.nbranch) {
+      P=Ptb+k*n*n;
+      FOR (i,n) FOR (j,n) T1[i*n+j]=U[i*n+j]*exp(Root[j]*branch[k]);
+      matby (T1, V, P, n, n, n);
+      FOR (i,n*n) if (P[i]<0) P[i]=0;
+/*
+      printf ("\nbranch %d, P(%.5f)", k+1, branch[k]);
+      matout (F0, P, n, n);
+      testTransP (P, n);
+*/
+   }
+   return (0);
+}
+
+
+int PatternMP (FILE *fout, double Ft[])
+{
+/* Ft[]: input counts for the F(t) matrix for each branch, output P(t) 
+*/
+   int n=com.ncode, i,j,k;
+   double *Q, *pi, *Root, *U, *V, *branch, *space, *T1, t;
+
+   if((Q=(double*)malloc((n*n*6+tree.nbranch)*sizeof(double)))==NULL)
+      error2("PathwayMP: oom");
+   pi=Q+n*n; Root=pi+n; U=Root+n; V=U+n*n; branch=V+n*n; 
+   space=T1=branch+tree.nbranch;
+
+   for (k=0; k<tree.nbranch; k++) {  /* branch lengths */
+      xtoy(Ft+k*n*n, Q, n*n);
+      branch[k]=nodes[tree.branches[k][1]].branch=
+         DistanceREV(Q, n, 0, Root, U, V, pi, space, &j);
+   }
+   OutTreeB (fout);  FPN (fout);
+   FOR (i, tree.nbranch) fprintf(fout,"%9.5f", branch[i]);
+   fprintf (fout,"\ntree length: %9.5f\n", sum(branch,tree.nbranch));
+
+   /* pattern Q from average F(t) */
+   fprintf(fout,"\nF(t)");
+   xtoy (Ft+tree.nbranch*n*n, Q, n*n);
+   matout2 (fout, Q, n, n, 12, 2);
+   DistanceREV (Q, n, 0, Root, U, V, pi, space, &j);
+   if (noisy>=3&&j==-1) { puts("F(t) modified in DistanceREV"); }
+
+   OutQ (fout, n, Q, pi, Root, U, V, T1);
+   if (com.nhomo==0) 
+      PMatBranch (Ft, n, branch, Root, U, V, space);
+   else {
+      for (k=0; k<tree.nbranch; k++) {
+         for (i=0; i<n; i++) {
+            t=sum(Ft+k*n*n+i*n, n);
+            if (t>1e-5) abyx (1/t, Ft+k*n*n+i*n, n);
+            else        Ft[k*n*n+i*n+i]=1;
+         }
+      }
+   }
+   free(Q);
+   return (0); 
+}
+
+
+int PathwayMP1 (FILE *fout, int *maxchange, int NSiteChange[], 
+    double Ft[], double space[], int job)
+{
+/* Hartigan, JA.  1973.  Minimum mutation fits to a given tree. 
+   Biometrics, 29:53-65.
+   Yang, Z.  1996.  
+   job=0: 1st pass: calculates maxchange, NSiteChange[], and Ft[]
+   job=1: 2nd pass: reconstructs ancestral character states (->fout)
+*/
+   char *pch=(com.seqtype==0?BASEs:(com.seqtype==2?AAs:BINs));
+   char *zz[NNODE],nodeb[NNODE],bestPath[NNODE-NS],Equivoc[NS-1];
+   int n=com.ncode, nid=tree.nbranch-com.ns+1, it,i1,i2, i,j,k, h, hp,npath;
+   int *Ftt=NULL, nchange, nchange0, visit[NS-1]={0};
+   double sumpr, bestpr, pr, *pnode=NULL, *pnsite;
+
+
+   fputs("\nList of most parsimonious reconstructions (MPRs) at each site: #MPRs (#changes)\n",fout);
+   fputs("and then the most likely reconstruction out of the MPRs and its probability\n",fout);
+   if((pnsite=(double*)malloc((com.ns-1)*n*sizeof(double)))==NULL)
+      error2("PathwayMP1: oom");
+
+   PATHWay=(char*)malloc(nid*(n+3)*sizeof(char));
+   NCharaCur=PATHWay+nid;  ICharaCur=NCharaCur+nid;  CharaCur=ICharaCur+nid;
+   if (job==0) {
+      zero(Ft,n*n*(tree.nbranch+1));
+      if((Ftt=(int*)malloc(n*n*tree.nbranch*sizeof(int)))==NULL) error2("oom");
+   }
+   else {
+      pnode=(double*)malloc((nid*com.npatt+1)*(sizeof(double)+sizeof(char)));
+      FOR (j,nid) zz[com.ns+j]=(char*)(pnode+nid*com.npatt)+j*com.npatt;
+      FOR (j,com.ns) zz[j]=com.z[j];
+      if (pnode==NULL) error2 ("oom");
+   }
+   for (j=0,visit[i=0]=tree.root-com.ns; j<tree.nbranch; j++) 
+      if (tree.branches[j][1]>=com.ns) 
+         visit[++i]=tree.branches[j][1]-com.ns;
+
+   for (h=0; h<com.ls; h++) {
+      hp=com.pose[h];
+      if (job==1) {
+         fprintf (fout, "\n%4d  ", h+1);
+         FOR (j, com.ns) fprintf (fout, "%c", pch[com.z[j][hp]]);
+         fprintf (fout, ":  ");
+         FOR (j,nid*n) pnsite[j]=0;
+      }
+      FOR (j,com.ns) nodeb[j]=com.z[j][hp];
+      if (job==0) FOR (j,n*n*tree.nbranch) Ftt[j]=0;
+
+      InteriorStatesMP (1, hp, &nchange, NCharaCur, CharaCur, space); 
+      ICharaCur[j=tree.root-com.ns]=0;  PATHWay[j]=CharaCur[j*n+0];
+      FOR (j,nid) Equivoc[j]=(NCharaCur[j]>1);
+
+      if (nchange>*maxchange) *maxchange=nchange;
+      if (nchange>NCATCHANGE-1) error2 ("raise NCATCHANGE");
+
+      NSiteChange[nchange]++;
+      /* NSiteChange[nchange]+=(int)com.fpatt[hp]; */
+
+      DownStates (tree.root);
+      for (npath=0,sumpr=bestpr=0; ;) {
+         for (j=0,k=visit[nid-1]; j<NCharaCur[k]; j++) {
+            PATHWay[k]=CharaCur[k*n+j]; npath++;
+            FOR (i,nid) nodeb[i+com.ns]=PATHWay[i];
+            if (job==1) {
+               FOR (i,nid) fprintf(fout,"%c",pch[PATHWay[i]]); fputc(' ',fout);
+               pr=com.pi[(int)nodeb[tree.root]];
+               for (i=0; i<tree.nbranch; i++) {
+                  i1=nodeb[tree.branches[i][0]];
+                  i2=nodeb[tree.branches[i][1]];
+                  pr*=Ft[i*n*n+i1*n+i2];
+               }
+               sumpr+=pr;
+               FOR (i,nid) pnsite[i*n+nodeb[i+com.ns]]+=pr;
+               if (pr>bestpr) 
+                  { bestpr=pr; FOR(i,nid) bestPath[i]=PATHWay[i];}
+            }
+            else {
+               for (i=0,nchange0=0; i<tree.nbranch; i++) {
+                  i1=nodeb[tree.branches[i][0]]; 
+                  i2=nodeb[tree.branches[i][1]];
+                  if(i1!=i2) nchange0++;
+                  Ftt[i*n*n+i1*n+i2]++;
+               }
+               if (nchange0!=nchange) {
+                  printf("\a\nerr:PathwayMP %d != %d", nchange, nchange0); 
+                  fprintf(fout,".%d. ", nchange0); /* ??? */
+               }
+            }
+         }
+         for (j=nid-2; j>=0; j--) {
+            if(Equivoc[k=visit[j]] == 0) continue;
+            if (ICharaCur[k]+1<NCharaCur[k]) {
+               PATHWay[k] = CharaCur[k*n + (++ICharaCur[k])];
+               DownStates (k+com.ns);
+               break;
+            }
+            else { /* if (next equivocal node is not ancestor) update node k */
+               for (i=j-1; i>=0; i--) if (Equivoc[(int)visit[i]]) break;
+               if (i>=0) { 
+                  for (it=k+com.ns,i=visit[i]+com.ns; ; it=nodes[it].father)
+                     if (it==tree.root || nodes[it].father==i) break;
+                  if (it==tree.root)
+                     DownStatesOneNode (k+com.ns, nodes[k+com.ns].father);
+               }
+            }
+         }
+         if (j<0) break;
+      }      /* for (npath) */
+/*
+      printf ("\rsite pattern %4d/%4d: %6d%6d", hp+1,com.npatt,npath,nchange);
+*/      
+      if (job==0) 
+         FOR (j,n*n*tree.nbranch) Ft[j]+=(double)Ftt[j]/npath*com.fpatt[hp];
+      else {
+         FOR (i,nid) zz[com.ns+i][hp]=bestPath[i];
+         FOR (i,nid) pnode[i*com.npatt+hp]=pnsite[i*n+bestPath[i]]/sumpr;
+         fprintf (fout, " |%4d (%d) | ", npath, nchange);
+         if (npath>1) {
+            FOR (i,nid) fprintf (fout, "%c", pch[bestPath[i]]);
+            fprintf (fout, " (%.3f)", bestpr/sumpr);
+
+         }
+      }
+   }   /* for (h) */
+   free(PATHWay); 
+   if (job==0) {
+      free(Ftt);
+      FOR (i,tree.nbranch) FOR (j,n*n) Ft[tree.nbranch*n*n+j]+=Ft[i*n*n+j];
+   }
+   else {
+      fprintf (fout,"\n\nApprox. relative accuracy at each node, by site\n");
+      FOR (h, com.ls) {
+         hp=com.pose[h];
+         fprintf (fout,"\n%4d  ", h+1);
+         FOR (j, com.ns) fprintf (fout, "%c", pch[com.z[j][hp]]);
+         fprintf (fout, ":  ");
+         FOR (i,nid) if (pnode[i*com.npatt+hp]<.99999) break;
+         if (i<nid)  FOR (j, nid) 
+            fprintf(fout,"%c (%5.3f) ", pch[zz[j][hp]],pnode[j*com.npatt+hp]);
+      }
+      /* Site2Pattern (fout); */
+      fprintf (fout,"\n\nlist of extant and reconstructed sequences\n\n");
+      for(j=0;j<tree.nnode;j++,FPN(fout)) {
+         if(j<com.ns) fprintf(fout,"%-20s", com.spname[j]);
+         else         fprintf(fout,"node #%-14d", j+1);
+         print1seq (fout, zz[j], (com.readpattern?com.npatt:com.ls), com.pose);
+      }
+      free(pnode);
+   }
+   free(pnsite);
+   return (0);
+}
+
+double DistanceREV (double Ft[], int n,double alpha,double Root[],double U[],
+   double V[], double pi[], double space[], int *cond)
+{
+/* input:  Ft, n, alpha
+   output: Q(in Ft), t, Root, U, V, and cond
+   space[n*n*2]
+*/
+   int i,j, InApplicable;
+   double *Q=Ft, *T1=space, *T2=space+n*n, t, pi_sqrt[20], small=0.1/com.ls;
+
+   for (i=0,t=0; i<n; i++) FOR (j,n) if (i-j) t+=Q[i*n+j];
+   if (t<small)  { *cond=1; zero(Q,n*n); return (0); }
+
+   for(i=0;i<n;i++) for (j=0;j<i;j++) 
+      Q[i*n+j]=Q[j*n+i]=(Q[i*n+j]+Q[j*n+i])/2;
+
+   abyx(1./sum(Q,n*n), Q, n*n);
+   for(i=0;i<n;i++) {
+      pi[i]=sum(Q+i*n, n);
+      if(pi[i]>small) 
+         abyx(1/pi[i], Q+i*n, n); 
+   }
+
+   eigenQREV(Q, pi, n, Root, U, V, pi_sqrt);
+   for(i=0,InApplicable=0; i<n; i++) {
+      if (Root[i]<=0)  {
+         InApplicable=1;
+         Root[i]=-300;  /* adhockery */
+      }
+      else 
+         Root[i]=(alpha<=0?log(Root[i]):gammap(Root[i],alpha));
+   }
+   FOR (i,n) FOR (j,n) T1[i*n+j]=U[i*n+j]*Root[j];
+   matby (T1, V, Q, n, n, n);
+   for (i=0,t=0; i<n; i++) t-=pi[i]*Q[i*n+i];
+
+   if(noisy>=9 && InApplicable) printf("Root(P)<0.  adhockery invoked\n"); 
+   if(t<=0) error2("err: DistanceREV");
+
+   FOR (i,n) Root[i]/=t;
+   FOR (i, n) FOR (j,n)  { Q[i*n+j]/=t; if (i-j) Q[i*n+j]=max2(0,Q[i*n+j]); }
+
+   return (t);
+}
+
+
+int PatternLS (FILE *fout, double Ft[], double alpha,double space[],int *cond)
+{
+/* space[n*n*2]
+*/
+   int n=com.ncode, i,j,k,h, it;
+   double *Q=Ft,*Qt=Q+n*n,*Qm=Qt+n*n;
+   double *pi,*Root,*U, *V, *T1=space, *branch, t;
+   FILE *fdist=gfopen("Distance", "w");
+   
+   if((pi=(double*)malloc((n*n*3+tree.nbranch)*sizeof(double)))==NULL)
+      error2("PatternLS: oom");
+   Root=pi+n;  U=Root+n; V=U+n*n; branch=V+n*n;
+
+   *cond=0;
+   for (i=0,zero(Qt,n*n),zero(Qm,n*n); i<com.ns; i++) {
+      for (j=0; j<i; j++) {
+         for (h=0,zero(Q,n*n); h<com.npatt; h++) {
+	    Q[(com.z[i][h])*n+com.z[j][h]] += com.fpatt[h]/2;
+            Q[(com.z[j][h])*n+com.z[i][h]] += com.fpatt[h]/2;
+	 }
+         FOR (k,n*n) Qt[k]+=Q[k]/(com.ns*(com.ns-1)/2);
+         it=i*(i-1)/2+j;
+	 SeqDistance[it]=DistanceREV (Q, n, alpha, Root,U,V, pi, space, &k);
+
+         if (k==-1) { 
+            *cond=-1; printf("\n%d&%d: F(t) modified in DistanceREV",i+1,j+1);
+         }
+
+	 fprintf(fdist,"%9.5f",SeqDistance[it]);
+/*
+FOR (k,n) 
+if (Q[k*n+k]>0) { printf ("%d %d %.5f\n", i+1, j+1, Q[k*n+k]); }
+*/
+         FOR (k,n*n) Qm[k]+=Q[k]/(com.ns*(com.ns-1)/2); 
+      }
+      FPN(fdist);
+   }
+   fclose (fdist);
+   DistanceREV (Qt, n, alpha, Root, U, V, pi, space, &k);
+   if (k==-1) { puts ("F(t) modified in DistanceREV"); }
+
+   fprintf (fout, "\n\nQ: from average F over pairwise comparisons");
+   OutQ(fout, n, Qt, pi, Root, U, V, T1);
+   fprintf (fout, "\n\nQ: average of Qs over pairwise comparisons\n");
+   fprintf (fout, "(disregard this if very different from the previous Q)");
+   OutQ (fout, n, Qm, pi, Root, U, V, T1);
+
+   if (tree.nbranch) {
+      fillxc (branch, 0.1, tree.nbranch);
+      LSDistance (&t, branch, testx);
+      OutTreeB (fout);  FPN (fout);
+      FOR (i,tree.nbranch) fprintf(fout,"%9.5f", branch[i]);
+      PMatBranch (Ft, com.ncode, branch, Root, U, V, space);
+   }
+   free(pi);
+   return (0);
+}
+
+int testx (double x[], int np)
+{
+   int i;
+   double tb[]={1e-5, 99};
+   FOR(i,np) if(x[i]<tb[0] ||x[i]>tb[1]) return(-1);
+   return(0);
+}
+
+int SetBranch (double x[])
+{
+   int i, status=0;
+   double small=1e-5;
+
+   FOR (i,tree.nnode)
+      if (i!=tree.root && (nodes[i].branch=x[nodes[i].ibranch])<-small)
+         status=-1;
+   return (status);
+}
diff --git a/src/tools.c b/src/tools.c
index 8d30165..36f3bff 100644
--- a/src/tools.c
+++ b/src/tools.c
@@ -14,9 +14,7 @@
 char BASEs[]="TCAGUYRMKSWHBVD-N?";
 char *EquateBASE[]={"T","C","A","G", "T", "TC","AG","CA","TG","CG","TA",
      "TCA","TCG","CAG","TAG", "TCAG","TCAG","TCAG"};
-char BASEs5[]="TCAGEUYRMKSWHBVD-N?";
-char *EquateBASE5[]={"T","C","A","G", "E", "T", "TC","AG","CA","TG","CG","TA",
-     "TCA","TCG","CAG","TAG", "TCAG","TCAG","TCAG"};
+
 char CODONs[256][4], AAs[] = "ARNDCQEGHILKMFPSTWYV-*?X";
 char nChara[256], CharaMap[256][64];
 char AA3Str[]= {"AlaArgAsnAspCysGlnGluGlyHisIleLeuLysMetPheProSerThrTrpTyrVal***"};
@@ -133,7 +131,7 @@ int CodeChara (char b, int seqtype)
 /* This codes nucleotides or amino acids into 0, 1, 2, ...
 */
    int i, n=(seqtype<=1?4:(seqtype==2?20:2));
-   char *pch=(seqtype<=1 ? BASEs : (seqtype==2 ? AAs: (seqtype==5 ? BASEs5 : BINs)));
+   char *pch=(seqtype<=1 ? BASEs : (seqtype==2 ? AAs: BINs));
 
    if (seqtype<=1)
       switch (b) {
@@ -174,7 +172,7 @@ int transform (char *z, int ls, int direction, int seqtype)
 */
    int il, status=0;
    char *p;
-   char *pch=(seqtype<=1 ? BASEs : (seqtype==2 ? AAs: (seqtype==5 ? BASEs5 : BINs)));
+   char *pch=(seqtype<=1 ? BASEs : (seqtype==2 ? AAs: BINs));
 
    if (direction)
       for (il=0,p=z; il<ls; il++,p++) {
@@ -757,16 +755,16 @@ int NucListall(char b, int *nb, int ib[4])
 */
    int j, k;
 
-   k = strchr(BASEs,(int)b) - BASEs;
+   k = (int)(strchr(BASEs,(int)b) - BASEs);
    if(k<0)
       { printf("NucListall: strange character %c\n",b); return(-1);}
    if(k<4) {
       *nb = 1; ib[0] = k;
    }
    else {
-      *nb = strlen(EquateBASE[k]);
+      *nb = (int)strlen(EquateBASE[k]);
       for(j=0; j< *nb; j++)
-         ib[j] = strchr(BASEs,EquateBASE[k][j]) - BASEs;
+         ib[j] = (int)(strchr(BASEs,EquateBASE[k][j]) - BASEs);
    }
    return(0);
 }
@@ -842,7 +840,7 @@ int printcu (FILE *fout, double fcodon[], int icode)
    if (fcodon) { zero(faa,21);  zero(fb3x4,12); }
    else     wc=0;
    for(i=0; i<4; i++) strcpy(ss3[i],"\0\0\0");
-   noodle = strc(4*(10+2+wc)-2,word[1]);
+   noodle = strc(4*(10+2+wc)-2, word[1]);
    fprintf(fout, "\n%s\n", noodle);
    for(i=0; i<4; i++,FPN(fout)) {
       for(j=0; j<4; j++)  {
@@ -1028,7 +1026,7 @@ int printsma (FILE*fout, char*spname[], unsigned char*z[], int ns, int l, int ll
 */
    int igroup, ngroup, lt, h,hp, i, b,b0=-1,igap, lspname=30, lseqlen=7;
    char indel='-', ambi='?', equal='.';
-   char *pch=(seqtype<=1 ? BASEs : (seqtype==2 ? AAs: (seqtype==5 ? BASEs5 : BINs)));
+   char *pch=(seqtype<=1 ? BASEs : (seqtype==2 ? AAs: BINs));
    char codon[4]="   ";
 
    if(l==0) return(1);
@@ -1082,7 +1080,7 @@ void starttimer (void)
    time_start=time(NULL);
 }
 
-char* printtime (char timestr[])
+char *printtime (char timestr[])
 {
 /* print time elapsed since last call to starttimer()
 */
@@ -1093,8 +1091,8 @@ char* printtime (char timestr[])
    h = (int)t/3600;
    m = (int)(t%3600)/60;
    s = (int)(t-(t/60)*60);
-   if(h)  sprintf(timestr,"%d:%02d:%02d", h,m,s);
-   else   sprintf(timestr,"%2d:%02d", m,s);
+   if(h) sprintf(timestr,"%d:%02d:%02d", h,m,s);
+   else  sprintf(timestr, "%2d:%02d", m, s);
    return(timestr);
 }
 
@@ -1238,7 +1236,7 @@ int binarysearch (const void *key, const void *base, size_t n, size_t size, int(
    Each element has size size.  If a match is found, the function returns the index for the 
    element found.  Otherwise it returns the loc where key should be inserted.  This does not deal with ties.
 */
-   int l=0, u=n-1, m=u, z;
+   int l=0, u=(int)n-1, m=u, z;
    
    *found = 0;
    while (l <= u) {
@@ -1316,8 +1314,7 @@ void bigexp(double lnx, double *a, double *b)
    *a = pow(10, z-(*b));
 }
 
-static unsigned int z_rndu=1237;
-static int          w_rndu=1237;
+unsigned int z_rndu=1237, w_rndu=1237;
 
 void SetSeed (int seed, int PrintSeed)
 {
@@ -1533,6 +1530,18 @@ double rndNormal (void)
 }
 
 
+int rndBinomial(int n, double p)
+{
+/* This may be too slow when n is large.
+*/
+   int i, x=0;
+
+   for (i=0; i<n; i++) 
+      if(rndu() < p) x ++;
+   return (x);
+}
+
+
 double rndBactrian (void)
 {
 /* This returns a variate from the 1:1 mixture of two normals N(-m, 1-m^2) and N(m, 1-m^2),
@@ -1789,7 +1798,6 @@ int rndpoisson (double m)
    return ((int) em);
 }
 
-
 double rndgamma (double a)
 {
 /* This returns a random variable from gamma(a, 1).
@@ -1797,38 +1805,36 @@ double rndgamma (double a)
    ACM Transactions on Mathematical Software, 26 (3): 363-372.
    This is not entirely safe and is noted to produce zero when a is small (0.001).
  */
-   double a0=a, c, d, u, v, x;
+   double a0 = a, c, d, u, v, x, small=1E-300;
 
-   if(a<1) a ++;
+   if (a < 1) a++;
 
-   d = a - 1.0/3.0;
-   c = (1.0/3.0) / sqrt(d);
+   d = a - 1.0 / 3.0;
+   c = (1.0 / 3.0) / sqrt(d);
 
-   for ( ; ; ) {
+   for (; ; ) {
       do {
-         x = rndNormal();
+         x = rndNormal( );
          v = 1.0 + c * x;
-      }
-      while (v <= 0);
- 
+      } while (v <= 0);
+
       v *= v * v;
-      u = rndu();
+      u = rndu( );
 
-      if (u < 1 - 0.0331 * x * x * x * x) 
+      if (u < 1 - 0.0331 * x * x * x * x)
          break;
       if (log(u) < 0.5 * x * x + d * (1 - v + log(v)))
          break;
    }
    v *= d;
 
-   if(a0 < 1) 
-      v *= pow(rndu(), 1/a0);
-   if(v==0) 
-      printf("\a\nrndgamma returning 0.\n");
+   if (a0 < 1)    /* this may cause underflow if a is small, like 0.01 */
+      v *= pow(rndu( ), 1 / a0);
+   if (v == 0)   /* underflow */
+      v = small;
    return v;
 }
 
-
 double rndbeta (double p, double q)
 {
 /* this generates a random beta(p,q) variate
@@ -2289,7 +2295,7 @@ double logPriorRatioGamma(double xnew, double x, double a, double b)
 }
 
 
-double PDF_InverseGamma (double x, double alpha, double beta)
+double PDFinvGamma (double x, double alpha, double beta)
 {
 /* inverse-gamma density: 
    mean=beta/(alpha-1); var=beta^2/[(alpha-1)^2*(alpha-2)]
@@ -2838,23 +2844,23 @@ double probBinomial (int n, int k, double p)
 }
 
 
-double probBetaBinomial (int n, int k, double p, double q)
+double probBetaBinomial(int n, int k, double p, double q)
 {
 /* This calculates beta-binomial probability of k succeses out of n trials,
-   The binomial probability parameter has distribution beta(p, q)
+   The binomial probability parameter has distribution beta(a, b)
 
-   prob(x) = C1(-a,k) * C2(-b,n-k)/C3(-a-b,n)
+   prob(x) = C1(-a, k) * C2(-b, n-k) / C3(-a-b, n)
 */
-   double a=p,b=q, C1,C2,C3,scale1,scale2,scale3;
+   double a = p, b = q, C1, C2, C3, scale1, scale2, scale3;
 
-   if(a<=0 || b<=0) return(0);
+   if (a <= 0 || b <= 0) return(0);
    C1 = Binomial(-a, k, &scale1);
-   C2 = Binomial(-b, n-k, &scale2);
-   C3 = Binomial(-a-b, n, &scale3);
-   C1 *= C2/C3;
-   if(C1<0) 
+   C2 = Binomial(-b, n - k, &scale2);
+   C3 = Binomial(-a - b, n, &scale3);
+   C1 *= C2 / C3;
+   if (C1<0)
       error2("error in probBetaBinomial");
-   return C1*exp(scale1+scale2-scale3);
+   return C1*exp(scale1 + scale2 - scale3);
 }
 
 
@@ -4355,11 +4361,11 @@ int ScatterPlot (int n, int nseries, int yLorR[], double x[], double y[],
    printf ("\ny[1]: (%10.2e, %10.2e)\n", ymin[0], ymax[0]);
    if (ny==2) printf ("y[2]: (%10.2e, %10.2e)  \n", ymin[1], ymax[1]);
 
-   chart=(char*)malloc((nrow+1)*ncolr*sizeof(char));
+   chart = (char*)malloc((nrow+1)*ncolr*sizeof(char));
    for (i=0; i<nrow+1; i++) {
       for (j=1; j<ncol; j++) chart[i*ncolr+j]=' ';
-      if (i%5==0) chart[i*ncolr+0]=chart[i*ncolr+j++]='+'; 
-      else        chart[i*ncolr+0]=chart[i*ncolr+j++]='|'; 
+      if (i%5==0) chart[i*ncolr+0] = chart[i*ncolr+j++] = '+'; 
+      else        chart[i*ncolr+0] = chart[i*ncolr+j++] = '|'; 
       chart[i*ncolr+j]='\0'; 
       if (i==0||i==nrow) 
          FOR(j,ncol+1) chart[i*ncolr+j]=(char)(j%10==0?'+':'-');
@@ -4467,26 +4473,59 @@ double Binomial (double n, int k, double *scale)
 /* calculates (n choose k), where n is any real number, and k is integer.
    If(*scale!=0) the result should be c+exp(*scale).
 */
-   double c=1,i,large=1e99;
+   double c = 1, i, large = 1e99;
 
    *scale=0;
    if((int)k!=k) 
       error2("k is not a whole number in Binomial.");
-   if(n<0 && k%2==1) 
-      c = -1;
-   if(k==0) return(1);
-   if(n>0 && (k<0 || k>n)) return (0);
-
-   if(n>0 && (int)n==n) k=min2(k,(int)n-k);
-   for (i=1; i<=k; i++) {
-      c *= (n-k+i)/i;
-      if(c>large) { 
-         *scale += log(c); c=1; 
+   if (k == 0) return(1);
+   if (n>0 && (k<0 || k>n)) return (0);
+
+   if(n>0 && (int)n==n) k = min2(k, (int)n - k);
+   for (i = 1; i <= k; i++) {
+      c *= (n - k + i) / i;
+      if (c > large) {
+         *scale += log(c); c = 1;
       } 
    }
    return(c);
 }
 
+int BinomialK (double alpha, int n, double C[], double S[])
+{
+/* This calculates (alpha, i), for i = 0, ..., n.  The result are in C[i] * exp(S[i]).
+*/
+   int i, nround = n, alphaint = (int)alpha;
+   double c = 1, large = 1E200;
+
+   if (alpha>0 && fabs(alpha - alphaint) < 1e-100) { /* usual combinations */
+      nround = min2(n, alphaint / 2);
+   }
+   C[0] = 1;  S[0] = 0;
+   for (i = 1; i <= nround; i++) {
+      c *= (alpha - i + 1) / i;
+      S[i] = S[i - 1];
+      if (c > large) {
+         S[i] += log(c);  c = 1;
+      }
+      C[i] = c;
+   }
+   for (; i <= min2(n, alphaint); i++) {   /* if alpha is int and n > alpha/2 */
+      C[i] = C[alphaint - i];  S[i] = S[alphaint - i];
+   }
+   for (; i <= n; i++) {                  /* if alpha is int and n > alpha */
+      C[i] = 0;  S[i] = 0;
+   }
+   /*
+   matout2(F0, C, n / 10, 10, 9, 1);
+   matout2(F0, S, n / 10, 10, 9, 1);
+   for (i = 0; i <= n; i++) C[i] *= exp(S[i]);
+   matout2(F0, C, n / 10, 10, 9, 1);
+   */
+   return(0);
+}
+
+
 /****************************
           Vectors and matrices 
 *****************************/
@@ -4555,7 +4594,8 @@ int matout (FILE *fout, double x[], int n, int m)
 {
    int i,j;
    for (i=0,FPN(fout); i<n; i++,FPN(fout)) 
-      FOR(j,m) fprintf(fout," %11.6f", x[i*m+j]);
+      for(j=0; j<m; j++)
+         fprintf(fout," %11.6f", x[i*m+j]);
    return (0);
 }
 
@@ -5704,25 +5744,25 @@ int gradient (int n, double x[], double f0, double g[],
 {
 /*  f0 = fun(x) is always given.
 */
-   int i,j;
-   double *x0=space, *x1=space+n, eh0=Small_Diff, eh;  /* 1e-7 */
+   int i, j;
+   double *x0 = space, *x1 = space + n, eh0 = Small_Diff, eh;  /* 1e-7 */
 
    if (Central) {
-      for(i=0; i<n; i++)  {
-         for(j=0; j<n; j++) 
+      for (i = 0; i<n; i++) {
+         for (j = 0; j<n; j++)
             x0[j] = x1[j] = x[j];
-         eh = eh0*(fabs(x[i])+1);
+         eh = pow(eh0*(fabs(x[i]) + 1), 0.67);
          x0[i] -= eh; x1[i] += eh;
-         g[i] = ((*fun)(x1,n) - (*fun)(x0,n))/(eh*2.0);
+         g[i] = ((*fun)(x1, n) - (*fun)(x0, n)) / (eh*2.0);
       }
    }
    else {
-      for(i=0; i<n; i++)  {
-         for(j=0; j<n; j++)
-            x1[j]=x[j];
-         eh=eh0*(fabs(x[i])+1);
-         x1[i]+=eh;
-         g[i] = ((*fun)(x1,n)-f0)/eh;
+      for (i = 0; i<n; i++) {
+         for (j = 0; j<n; j++)
+            x1[j] = x[j];
+         eh = eh0*(fabs(x[i]) + 1);
+         x1[i] += eh;
+         g[i] = ((*fun)(x1, n) - f0) / eh;
       }
    }
    return(0);
@@ -5815,7 +5855,7 @@ int nls2 (FILE *fout, double *sx, double * x0, int nx,
    (*fun) (x0, y, n, ny);
    for (i=0, s0=0; i<ny; i++)   s0 += y[i]*y[i];
 
-   FOR (ii, maxround)  {
+   for(ii=0; ii<maxround; ii++)  {
       increase=0;
       if (jacobi)  (*jacobi) (x0, J, n, ny);
       else         jacobi_gradient (x0, J, fun, space_J, n, ny);
@@ -5826,7 +5866,7 @@ int nls2 (FILE *fout, double *sx, double * x0, int nx,
          v = sqrt (t) / (double) (ny*n);     /*  v = 0.0;  */
       }
 
-      FOR (i,n)  {
+      for (i = 0; i<n; i++) {
          for (j=0,t=0; j<ny; j++)  t += J[j*n+i] * y[j];
          g[i] = 2*t;
          C[i*(n+1)+n] = -t;
@@ -5841,11 +5881,11 @@ int nls2 (FILE *fout, double *sx, double * x0, int nx,
          v *= bigger;
          continue;
       }
-      FOR (i,n)   p[i] = C[i*(n+1)+n];
+      for (i = 0; i<n; i++)  p[i] = C[i*(n+1)+n];
 
       t = bound (n, x0, p, x, testx);
       if (t>1) t=1;
-      FOR (i,n) x[i] = x0[i] + t * p[i];
+      for (i=0; i<n; i++) x[i] = x0[i] + t * p[i];
 
       (*fun) (x, y, n, ny);
       for (i=0,s=0; i<ny; i++)  s += y[i]*y[i];
@@ -5868,8 +5908,7 @@ int nls2 (FILE *fout, double *sx, double * x0, int nx,
 
 
 
-double bound (int nx, double x0[], double p[], double x[],
-       int(*testx)(double x[], int nx))
+double bound (int nx, double x0[], double p[], double x[], int(*testx)(double x[], int nx))
 {
 /* find largest t so that x[]=x0[]+t*p[] is still acceptable.
    for bounded minimization, p is possibly changed in this function
@@ -5879,7 +5918,7 @@ double bound (int nx, double x0[], double p[], double x[],
    double factor=20, by=1, small=1e-8;  /* small=(SIZEp>1?1e-7:1e-8) */ 
 
    xtoy (x0, x, nx);
-   FOR (i,nx)  {
+   for (i = 0; i<nx; i++)  {
       x[i]=x0[i]+small*p[i];
       if ((*testx) (x, nx))  {  p[i]=0.0;  nd++; }
       x[i]=x0[i];
@@ -5887,7 +5926,7 @@ double bound (int nx, double x0[], double p[], double x[],
    if (nd==nx) { if (noisy) puts ("bound:no move.."); return (0); }
 
    for (by=0.75; ; ) {
-      FOR (i,nx)  x[i]=x0[i]+factor*p[i];
+      for (i = 0; i<nx; i++) x[i]=x0[i]+factor*p[i];
       if ((*testx)(x,nx)==0)  break;
       factor *= by;
    }
@@ -5897,7 +5936,7 @@ double bound (int nx, double x0[], double p[], double x[],
 
 
 
-double LineSearch (double(*fun)(double x),double *f,double *x0,double xb[2],double step, double e)
+double LineSearch(double(*fun)(double x), double *f, double *x0, double xb[2], double step, double e)
 {
 /* linear search using quadratic interpolation 
 
@@ -6286,21 +6325,21 @@ int gradientB (int n, double x[], double f0, double g[],
 /* f0=fun(x) is always provided.
    xmark=0: central; 1: upper; -1: down
 */
-   int i,j;
-   double *x0=space, *x1=space+n, eh0=Small_Diff, eh;  /* eh0=1e-6 || 1e-7 */
-
-   for(i=0; i<n; i++) {
-      eh = eh0*(fabs(x[i])+1);
-      if (xmark[i]==0 && (AlwaysCenter || SIZEp<1)) {   /* central */
-         for(j=0; j<n; j++)  x0[j] = x1[j] = x[j];
-         x0[i] -= eh;  x1[i] += eh;
-         g[i] = ((*fun)(x1,n) - (*fun)(x0,n))/(eh*2.0);
+   int i, j;
+   double *x0 = space, *x1 = space + n, eh0 = Small_Diff, eh;  /* eh0=1e-6 || 1e-7 */
+
+   for (i = 0; i<n; i++) {
+      eh = eh0*(fabs(x[i]) + 1);
+      if (xmark[i] == 0 && (AlwaysCenter || SIZEp<1)) {   /* central */
+         for (j = 0; j<n; j++)  x0[j] = x1[j] = x[j];
+         eh = pow(eh, .67);  x0[i] -= eh;  x1[i] += eh;
+         g[i] = ((*fun)(x1, n) - (*fun)(x0, n)) / (eh*2.0);
       }
-      else  {                         /* forward or backward */
-         for(j=0; j<n; j++)  x1[j] = x[j];
+      else {                         /* forward or backward */
+         for (j = 0; j<n; j++)  x1[j] = x[j];
          if (xmark[i]) eh *= -xmark[i];
          x1[i] += eh;
-         g[i] = ((*fun)(x1,n) - f0)/eh;
+         g[i] = ((*fun)(x1, n) - f0) / eh;
       }
    }
    return(0);
diff --git a/src/treesub.c b/src/treesub.c
index 901aaa8..294f2c1 100644
--- a/src/treesub.c
+++ b/src/treesub.c
@@ -3,7 +3,7 @@
  such as baseml, basemlg, codeml, and pamp.
  */
 
-extern char BASEs[], *EquateBASE[], BASEs5[], *EquateBASE5[], AAs[], BINs[], CODONs[][4], nChara[], CharaMap[][64];
+extern char BASEs[], *EquateBASE[], AAs[], BINs[], CODONs[][4], nChara[], CharaMap[][64];
 
 extern int noisy;
 
@@ -54,29 +54,29 @@ double SS, NN, Sd, Nd;  /* kostas, # of syn. sites,# of non syn. sites,# of syn.
 
 int PatternWeightSimple(void)
 {
-/* This is modified from PatternWeight() and collaps sites into patterns,
-   for nucleotide, amino acid, or codon sequences.
-   This relies on \0 being the end of the string so that sequences should not be
-   encoded before this routine is called.
-   com.pose[i] has labels for genes as input and maps sites to patterns in return.
-   com.fpatt, a vector of doubles, wastes space as site pattern counts are integers.
-   Sequences z[ns*ls] are copied into patterns zt[ls*lpatt], and bsearch is used
-   twice to avoid excessive copying, to count npatt first & to generate fpatt etc.
+/* This is modified from PatternWeight(), and does not deal with multiple genes in 
+   the same alignment (com.ngene, com.lgene[], com.posG[], etc.)
+   Binary search is used to sort site patterns, with patterns represented using 0-ended strings.
+   The routine works with nucleotide, amino acid, or codon sequences.
+   This should work with both encoded and un-encoded sequences.
+   If com.pose is not NULL, this generates the site-to-pattern map in com.pose[].
+   com.fpatt holds site-pattern counts.
+   Sequences z[ns][ls] are copied into patterns zt[ls*lpatt], and bsearch is used
+   twice to avoid excessive copying, first to count npatt and identify the site patterns and 
+   second to generate fpatt[], pose[] etc.
 */
     int maxnpatt = com.ls, h, l, u, ip, j, k, same;
-    /* int n31 = (com.seqtype==CODONseq ? 3 : 1); */
-    int n31 = 1;
-    int lpatt = com.ns*n31 + 1;   /* extra 0 used for easy debugging, can be avoided */
+    int n31 = (com.seqtype==CODONseq ? 3 : 1);
+    int lpatt = com.ns*n31 + 1;
     int *p2s;  /* point patterns to sites in zt */
-    char *zt;
-    unsigned char *p;
+    char timestr[36];
+    unsigned char *p, *zt;
     double nc = (com.seqtype == 1 ? 64 : com.ncode) + !com.cleandata + 1;
     int debug = 0;
-    char DS[] = "DS";
     
     /* (A) Collect and sort patterns.  Get com.npatt.
-     Move sequences com.z[ns][ls] into sites zt[ls*lpatt].
-     Use p2s to map patterns to sites in zt to avoid copying.
+           Move sequences com.z[ns][ls] into sites zt[ls*lpatt].
+           Use p2s to map patterns to sites in zt to avoid copying.
      */
     
     if ((com.seqtype == 1 && com.ns<5) || (com.seqtype != 1 && com.ns<7))
@@ -89,13 +89,13 @@ int PatternWeightSimple(void)
     for (j = 0; j<com.ns; j++)
         for (h = 0; h<com.ls; h++)
             for (k = 0; k<n31; k++)
-                zt[h*lpatt + j*n31 + k] = com.z[j][h*n31 + k];
+                zt[h*lpatt + j*n31 + k] = (unsigned char)(com.z[j][h*n31 + k] + 1);
     
     com.npatt = l = u = ip = 0;
     for (h = 0; h<com.ls; h++) {
         if (debug) printf("\nh %3d %s", h, zt + h*lpatt);
         /* bsearch in existing patterns.  Knuth 1998 Vol3 Ed2 p.410
-         ip is the loc for match or insertion.  [l,u] is the search interval.
+           ip is the loc for match or insertion.  [l,u] is the search interval.
          */
         same = 0;
         if (h != 0) {  /* not 1st pattern? */
@@ -121,16 +121,21 @@ int PatternWeightSimple(void)
         }
         
         if (debug) {
-            printf(": %3d (%c ilu %3d%3d%3d) ", com.npatt, DS[same], ip, l, u);
+            printf(": %3d (%c ilu %3d%3d%3d) ", com.npatt, (same?'S':'D'), ip, l, u);
             for (j = 0; j<com.npatt; j++)
                 printf(" %s", zt + p2s[j] * lpatt);
         }
+        if(noisy>2 && ((h+1)%10000==0 || h+1==com.ls))
+           printf("\r%12d patterns at %8d / %8d sites (%.1f%%), %s",
+              com.npatt, h+1, com.ls, (h+1.)*100/com.ls, printtime(timestr));
     }     /* for (h)  */
-    
+    if(noisy>2) printf("\n%d patterns\n", com.npatt);
+
     /* (B) count pattern frequencies */
     com.fpatt = (double*)realloc(com.fpatt, com.npatt*sizeof(double));
     if (com.fpatt == NULL) error2("oom fpatt");
-    for (ip = 0; ip<com.npatt; ip++) com.fpatt[ip] = 0;
+    memset(com.fpatt, 0, com.npatt*sizeof(double));
+
     for (h = 0; h<com.ls; h++) {
         for (same = 0, l = 0, u = com.npatt - 1;;) {
             if (u<l) break;
@@ -143,114 +148,188 @@ int PatternWeightSimple(void)
         if (!same)
             error2("ghost pattern?");
         com.fpatt[ip]++;
+        if(com.pose) com.pose[h] = ip;
     }     /* for (h)  */
-    
     for (j = 0; j<com.ns; j++) {
-        for (ip = 0, p = com.z[j]; ip<com.npatt; ip++)
-            for (k = 0; k<n31; k++)
-                *p++ = zt[p2s[ip] * lpatt + j*n31 + k];
+       com.z[j] = p = (unsigned char*)realloc(com.z[j], com.npatt * sizeof(unsigned char));
+       for (ip = 0; ip<com.npatt; ip++)
+          for (k = 0; k<n31; k++)
+             *p++ = (unsigned char)(zt[p2s[ip] * lpatt + j*n31 + k] - 1);
     }
     free(p2s);  free(zt);
     
     return (0);
 }
 
+int ConvertSiteJC69like(unsigned char *z[], int ns, int h, unsigned char zh[])
+{
+/* This converts a site (or pattern) of nucleotides or amino acids for JC69-like models.
+   Sequence alignments in com.z[] are already encoded into 0, 1, ...
+   If the data have no ambiguities (com.cleandata=1), the routine converts,
+   for example, a site 1120 (CCAT) into 0012 (TTCA) before checking against old
+   patterns already found.  If a site contain non-gap ambiguities, it is not
+   converted.  For every site, the routine changes ? or N into - first, and then
+   convert the site iff there are no non-gap ambiguities.  Thus CC?T will be
+   converted into CC-T first and then into TT-C.  A site with CCRT will not be
+   convertd.  In theory such sites may be compressed as well, but the effort is
+   perhaps not worthwhile.
+*/
+   char b, gap;
+   char *pch = (com.seqtype == 0 ? BASEs : (com.seqtype == 2 ? AAs : BINs));
+   int npatt0 = com.npatt, j, k, same = 0, convert;
+
+   gap = (char)(strchr(pch, (int)'-') - pch);
+
+   if (com.cleandata) { /* clean data, always convert */
+      zh[0] = b = 0;
+      b++;
+      for (j = 1; j<com.ns; j++) {
+         for (k = 0; k<j; k++)
+            if (z[j][h] == z[k][h]) break;
+         zh[j] = (k<j ? zh[k] : b++);
+      }
+   }
+   else { /* convert only if there are no non-gap ambiguity characters */
+      for (j = 0; j<ns; j++)
+         zh[j] = z[j][h];
+
+      /* After this loop, convert = 0 or 1 decides whether to convert. */
+      for (j = 0, convert = 1; j<ns; j++) {
+         if (zh[j] < com.ncode)
+            continue;
+         if (nChara[(int)zh[j]] == com.ncode) {
+            zh[j] = gap;
+            continue;
+         }
+         convert = 0;
+         break;
+      }
+      if (convert) {
+         b = 0;
+         if (zh[0] != gap)
+            zh[0] = b++;
+         for (j = 1; j<ns; j++) {
+            if (zh[j] != gap) {
+               for (k = 0; k<j; k++)
+                  if (zh[j] == z[k][h]) break;
+               if (k<j) zh[j] = zh[k];
+               else    zh[j] = b++;
+            }
+         }
+      }
+   }
+   for (j = 0; j<ns; j++)  zh[j] ++;  /* change 0 to 1. */
+   return(0);
+}
+
+
 int PatternWeightJC69like (void)
 {
-    /* This collaps site patterns further for JC69-like models, called after
-     PatternWeight().  This is used for JC and poisson amino acid models.
-     The routine could be merged into PatternWeight(), which should lead to
-     faster computation, but this is not done because right now
-     InitializeBaseAA() prints out base or amino acid frequencies after
-     PatternWeight() and before this routine.
-     
-     If the data have no ambiguities (com.cleandata=1), the routine recodes
-     the data, for example, changing data at a site 1120 (CCAT) into 0012
-     (TTCA) before checking against old patterns already found.  If the data
-     contain ambiguities, they are not encoded.  In that case, for every
-     site, the routine changes ? or N into - first.  It then checks whether there
-     are any other ambibiguities and will recode if and only if there are not
-     any other ambiguities.  For example, a site with data CC?T will be
-     changed into CC-T first and then recoded into TT-C and checked against
-     old patterns found.  A site with data CCRT will not be recoded.  In theory
-     such sites may be packed as well, but perhaps the effort is not worthwhile.
-     The routine checks data like CCRT against old patterns already found,
-     
-     If com.pose is not NULL, the routine also updates com.pose.  This allows
-     the program to work if com.readpattern==1.
-     */
-    char zh[NS], b, gap;
-    char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: (com.seqtype==5?BASEs5:BINs)));
-    int npatt0=com.npatt, h, ht, j,k, same=0, ig, recode;
-    
-    if(com.seqtype==1)
-        error2("PatternWeightJC69like does not work for codon seqs");
-    if(noisy>3) printf("Counting site patterns again, for JC69.\n");
-    gap = (char) (strchr(pch, (int)'-') - pch);
-    for (h=0,com.npatt=0,ig=-1; h<npatt0; h++) {
-        if (ig<com.ngene-1 && h==com.posG[ig+1])
-            com.posG[++ig] = com.npatt;
-        
-        if(com.cleandata) { /* clean data, always recode */
-            zh[0] = b = 0;
-            b++;
-            for (j=1; j<com.ns; j++) {
-                for(k=0; k<j; k++)
-                    if (com.z[j][h]==com.z[k][h]) break;
-                zh[j] = (k<j ? zh[k] : b++);
-            }
-        }
-        else { /* recode only if there are no non-gap ambiguity characters */
-            for(j=0; j<com.ns; j++)
-                zh[j] = com.z[j][h];
-            
-            /* After this loop, recode = 0 or 1 decides whether to recode. */
-            for (j=0,recode=1; j<com.ns; j++) {
-                if (zh[j] < com.ncode)
-                    continue;
-                if (nChara[(int)zh[j]] == com.ncode) {
-                    zh[j] = gap;
-                    continue;
-                }
-                recode = 0;
-                break;
-            }
-            if(recode) {
-                b = 0;
-                if(zh[0] != gap)
-                    zh[0] = b++;
-                for (j=1; j<com.ns; j++) {
-                    if(zh[j] != gap) {
-                        for(k=0; k<j; k++)
-                            if (zh[j] == com.z[k][h]) break;
-                        if(k<j) zh[j] = zh[k];
-                        else    zh[j] = b++;
-                    }
-                }
-            }
-        }
-        
-        for (ht=com.posG[ig],same=0; ht<com.npatt; ht++) {
-            for (j=0,same=1; j<com.ns; j++)
-                if (zh[j]!=com.z[j][ht]) {
-                    same = 0;  break;
-                }
-            if (same) break;
-        }
-        if (same)
-            com.fpatt[ht] += com.fpatt[h];
-        else {
-            for(j=0; j<com.ns; j++) com.z[j][com.npatt] = zh[j];
-            com.fpatt[com.npatt++] = com.fpatt[h];
-        }
-        if(com.pose)
-            for(k=0; k<com.ls; k++)
-                if(com.pose[k]==h) com.pose[k] = ht;
-    }     /* for (h)   */
-    com.posG[com.ngene] = com.npatt;
-    if(noisy>3) printf ("new no. site patterns:%7d\n", com.npatt);
-    
-    return (0);
+/*  This collaps site patterns further for JC69-like models, called after
+    PatternWeight().  This is used for JC and poisson amino acid models.
+    The routine could be merged into PatternWeight(), which should lead to
+    faster computation, but this is not done because right now
+    InitializeBaseAA() prints out base or amino acid frequencies after
+    PatternWeight() and before this routine.
+    
+    If com.pose is not NULL, the routine also updates com.pose.  This allows the program 
+    to work if com.readpattern==1.  
+    This works for nucleotide and amino acid models, but not codon models.
+    This routine is nearly identical to PatternWeight, which works for un-encoded sequences.  
+    fpatt0 stores the old com.fpatt info, while com.fpatt is shrunk.
+    Think about merging them (encode sequences first and compress sites).
+*/
+   int npatt0=com.npatt, lpatt = com.ns + 1, h, l, u, ip, j, k, same;
+   int *p2s;  /* point patterns to sites in zt */
+   char timestr[36];
+   unsigned char *p, *zt;
+   double *fpatt0;
+   int debug = 0;
+
+   /* (A) Collect and sort patterns.  Get com.npatt.
+          Move sequences com.z[ns][ls] into sites zt[ls*lpatt].
+          Use p2s to map patterns to sites in zt to avoid copying.
+   */
+   if (noisy>2) printf("Counting site patterns again, for JC69.. %s\n", printtime(timestr));
+   if (com.seqtype == 1) error2("PatternWeightJC69like does not work for codon seqs");
+   if (com.ngene>1) error2("PatternWeightJC69like does not work when ngene > 1");
+
+   p2s = (int*)malloc(npatt0 * sizeof(int));
+   zt = (char*)malloc(npatt0*lpatt * sizeof(char));
+   fpatt0 = (double*)malloc(npatt0* sizeof(double));
+   if (p2s == NULL || zt == NULL || fpatt0 == NULL)  error2("oom p2s or zt or fpatt0");
+   memset(zt, 0, npatt0*lpatt * sizeof(char));
+   memmove(fpatt0, com.fpatt, npatt0*sizeof(double));
+   for (h = 0; h<npatt0; h++)
+      ConvertSiteJC69like(com.z, com.ns, h, zt + h*lpatt);
+
+   l = u = ip = com.npatt = 0;
+   for (h = 0; h<npatt0; h++) {
+      if (debug) printf("\nh %3d %s", h, zt + h*lpatt);
+
+      /* bsearch in existing patterns.  Knuth 1998 Vol3 Ed2 p.410
+      ip is the loc for match or insertion.  [l,u] is the search interval.
+      */
+      same = 0;
+      if (h != 0) {  /* not 1st pattern? */
+         for (l = 0, u = com.npatt - 1; ; ) {
+            if (u<l) break;
+            ip = (l + u) / 2;
+            k = strcmp(zt + h*lpatt, zt + p2s[ip] * lpatt);
+            if (k<0)        u = ip - 1;
+            else if (k>0)   l = ip + 1;
+            else { same = 1;  break; }
+         }
+      }
+      if (!same) {
+         if (l > ip) ip++;        /* last comparison in bsearch had k > 0. */
+         /* Insert new pattern at ip.  This is the expensive step. */
+         if (ip<com.npatt)
+            memmove(p2s + ip + 1, p2s + ip, (com.npatt - ip) * sizeof(int));
+         p2s[ip] = h;
+         com.npatt++;
+      }
+      if (debug) {
+         printf(": %3d (%c ilu %3d%3d%3d) ", com.npatt, (same ? 'S' : 'D'), ip, l, u);
+         for (j = 0; j<com.npatt; j++)
+            printf(" %s", zt + p2s[j] * lpatt);
+      }
+      if (noisy>2 && ((h + 1) % 10000 == 0 || h + 1 == npatt0))
+         printf("\rCompressing, %6d patterns at %6d / %6d sites (%.1f%%), %s",
+            com.npatt, h + 1, npatt0, (h + 1.) * 100 / npatt0, printtime(timestr));
+   }     /* for (h)  */
+   if (noisy>2) printf("\n");
+
+   /* (B) count pattern frequencies and collect pose[] */
+   com.fpatt = (double*)realloc(com.fpatt, com.npatt * sizeof(double));
+   memset(com.fpatt, 0, com.npatt * sizeof(double));
+
+   for (h = 0; h<npatt0; h++) {
+      for (same = 0, l = 0, u = com.npatt - 1; ; ) {
+         if (u<l) break;
+         ip = (l + u) / 2;
+         k = strcmp(zt + h*lpatt, zt + p2s[ip] * lpatt);
+         if (k<0)        u = ip - 1;
+         else if (k>0)   l = ip + 1;
+         else { same = 1;  break; }
+      }
+      if (!same) error2("ghost pattern?");
+      com.fpatt[ip] += fpatt0[h];
+      if(com.pose) com.pose[h] = ip;
+      if (noisy>2 && ((h + 1) % 10000 == 0 || h + 1 == npatt0))
+         printf("\rCollecting patterns, %6d patterns at %6d / %6d sites (%.1f%%), %s",
+            com.npatt, h + 1, npatt0, (h + 1.) * 100 / npatt0, printtime(timestr));
+   }     /* for (h)  */
+   if (noisy>2) printf("\n");
+
+   for (j = 0; j<com.ns; j++) {
+      com.z[j] = (unsigned char*)realloc(com.z[j], com.npatt * sizeof(unsigned char));
+      for (ip = 0, p = com.z[j]; ip<com.npatt; ip++)
+         *p++ = (unsigned char)(zt[p2s[ip] * lpatt + j] - 1);
+   }
+   free(p2s);  free(zt);  free(fpatt0);
+
+   return (0);
 }
 
 
@@ -416,7 +495,7 @@ int ReadSeq (FILE *fout, FILE *fseq, int cleandata, int locus)
     int n31=(com.seqtype==CODONseq||com.seqtype==CODON2AAseq?3:1);
     int gap=(n31==3?3:10), nchar=(com.seqtype==AAseq?20:4);
     int h,b[3]={0};
-    char *pch=((com.seqtype<=1||com.seqtype==CODON2AAseq) ? BASEs : (com.seqtype==2 ? AAs: (com.seqtype==5 ? BASEs5 : BINs)));
+    char *pch=((com.seqtype<=1||com.seqtype==CODON2AAseq) ? BASEs : (com.seqtype==2 ? AAs: BINs));
     char str[4]="   ";
     char *NEXUSend="end;";
     double lst;
@@ -430,7 +509,7 @@ int ReadSeq (FILE *fout, FILE *fseq, int cleandata, int locus)
     if (noisy>=9 && (com.seqtype<=CODONseq||com.seqtype==CODON2AAseq)) {
         puts("\n\nAmbiguity character definition table:\n");
         for(i=0; i<(int)strlen(BASEs); i++) {
-            nb = strlen(EquateBASE[i]);
+            nb = (int)strlen(EquateBASE[i]);
             printf("%c (%d): ", BASEs[i], nb);
             for(j=0; j<nb; j++)  printf("%c ", EquateBASE[i][j]);
             FPN(F0);
@@ -598,9 +677,9 @@ readseq:
             }
             p = line+(line[0]=='=' || line[0]=='>') ;
             while(isspace(*p)) p++;
-            if ((ch=strstr(p,"  ")-p)<lspname && ch>0) lspname=ch;
+            if ((ch=(int)(strstr(p,"  ")-p)) < lspname && ch>0) lspname=ch;
             strncpy (com.spname[j], p, lspname);
-            k = strlen(com.spname[j]);
+            k = (int)strlen(com.spname[j]);
             p += (k<lspname?k:lspname);
             
             for (; k>0; k--) /* trim spaces */
@@ -667,10 +746,10 @@ readseq:
                 if (igroup==0) {
                     lspname = LSPNAME;
                     while(isspace(*p)) p++;
-                    if ((ch=strstr(p,"  ")-p)<lspname && ch>0)
+                    if ((ch = (int)(strstr(p,"  ")-p)) < lspname && ch>0)
                         lspname = ch;
                     strncpy (com.spname[j], p, lspname);
-                    k = strlen(com.spname[j]);
+                    k = (int)strlen(com.spname[j]);
                     p += (k<lspname?k:lspname);
                     
                     for (; k>0; k--)   /* trim spaces */
@@ -996,18 +1075,18 @@ int printPatterns(FILE *fout)
 
 void EncodeSeqs (void)
 {
-    /* This encodes sequences and set up com.TipMap[][], called after sites are collapsed
-     into patterns.
-     */
+/* This encodes sequences and set up com.TipMap[][], called after sites are collapsed
+   into patterns.
+*/
     int n=com.ncode, nA, is,h, i, j, k,ic, indel=0, ch, b[3];
-    char *pch = ((com.seqtype==0||com.seqtype==1) ? BASEs : (com.seqtype==2 ? AAs : (com.seqtype==5 ? BASEs5: BINs)));
+    char *pch = ((com.seqtype==0||com.seqtype==1) ? BASEs : (com.seqtype==2 ? AAs : BINs));
     unsigned char c[4]="", str[4]="   ";
     
     if(com.seqtype != 1) {
         for(is=0; is<com.ns; is++) {
             for (h=0; h<com.npatt; h++) {
                 ch = com.z[is][h];
-                com.z[is][h] = (char)(k = strchr(pch, ch) - pch);
+                com.z[is][h] = (char)(k = (int)(strchr(pch, ch) - pch));
                 if(k<0) {
                     printf("strange character %c in seq %d site %d\n", ch, is+1, h+1);
                     exit(-1);
@@ -1070,9 +1149,9 @@ void SetMapAmbiguity (void)
     /* This sets up CharaMap, the map from the ambiguity characters to resolved characters.
      */
     int n=com.ncode, i,j, i0,i1,i2, nb[3], ib[3][4], ic;
-    char *pch = (com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs : (com.seqtype==5 ? BASEs5: BINs)));
-    char *pbases = (com.seqtype==0 ? BASEs : (com.seqtype==5 ? BASEs5: NULL));
-    char **pEquateBASE = (com.seqtype==0 ? EquateBASE : (com.seqtype==5 ? EquateBASE5 : NULL));
+    char *pch = (com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs : BINs));
+    char *pbases = (com.seqtype==0 ? BASEs : NULL);
+    char **pEquateBASE = (com.seqtype==0 ? EquateBASE : NULL);
     char debug=0;
     
     for(j=0; j<n; j++) {  /* basic characters, coded according to the definition in pch. */
@@ -1221,147 +1300,142 @@ void AllPatterns (FILE* fout)
 }
 
 
-int PatternWeight (void)
+int PatternWeight(void)
 {
-    /* This collaps sites into patterns, for nucleotide, amino acid, or codon sequences.
-     This relies on \0 being the end of the string so that sequences should not be
-     encoded before this routine is called.
-     com.pose[i] has labels for genes as input and maps sites to patterns in return.
-     com.fpatt, a vector of doubles, wastes space as site pattern counts are integers.
-     Sequences z[ns*ls] are copied into patterns zt[ls*lpatt], and bsearch is used
-     twice to avoid excessive copying, to count npatt first & to generate fpatt etc.
-     */
-    int maxnpatt=com.ls, h, ip,l,u, j, k, same, ig, *poset;
-    // int gap = (com.seqtype==CODONseq ? 3 : 10);
-    int n31 = (com.seqtype==CODONseq ? 3 : 1);
-    int lpatt=com.ns*n31+1;   /* extra 0 used for easy debugging, can be voided */
-    int *p2s;  /* point patterns to sites in zt */
-    char *zt, timestr[36];
-    unsigned char *p;
-    double nc = (com.seqtype == 1 ? 64 : com.ncode) + !com.cleandata+1;
-    int debug=0;
-    char DS[]="DS";
-    
-    /* (A)
-     Collect and sort patterns.  Get com.npatt, com.lgene, com.posG.
-     Move sequences com.z[ns][ls] into sites zt[ls*lpatt].
-     Use p2s to map patterns to sites in zt to avoid copying.
-     */
-    if(noisy) printf("Counting site patterns.. %s\n", printtime(timestr));
-    
-    if((com.seqtype==1 && com.ns<5) || (com.seqtype!=1 && com.ns<7))
-        maxnpatt = (int)(pow(nc, (double)com.ns) + 0.5) * com.ngene;
-    if(maxnpatt>com.ls) maxnpatt = com.ls;
-    p2s  = (int*)malloc(maxnpatt*sizeof(int));
-    zt = (char*)malloc(com.ls*lpatt*sizeof(char));
-    if(p2s==NULL || zt==NULL)  error2("oom p2s or zt");
-    memset(zt, 0, com.ls*lpatt*sizeof(char));
-    for(j=0; j<com.ns; j++)
-        for(h=0; h<com.ls; h++)
-            for(k=0; k<n31; k++)
-                zt[h*lpatt+j*n31+k] = com.z[j][h*n31+k];
-    
-    for(j=0; j<com.ns; j++) free(com.z[j]);
-    
-    for(ig=0; ig<com.ngene; ig++) com.lgene[ig] = 0;
-    for(ig=0,com.npatt=0; ig<com.ngene; ig++) {
-        com.posG[ig] = l = u = ip = com.npatt;
-        for(h=0; h<com.ls; h++) {
-            if(com.pose[h] != ig) continue;
-            if(debug) printf("\nh %3d %s", h, zt+h*lpatt);
-            
-            /* bsearch in existing patterns.  Knuth 1998 Vol3 Ed2 p.410
-             ip is the loc for match or insertion.  [l,u] is the search interval.
-             */
-            same = 0;
-            if(com.lgene[ig]++ != 0) {  /* not 1st pattern? */
-                for(l=com.posG[ig], u=com.npatt-1; ; ) {
-                    if(u<l) break;
-                    ip = (l+u)/2;
-                    k = strcmp(zt+h*lpatt, zt+p2s[ip]*lpatt);
-                    if(k<0)        u = ip - 1;
-                    else if(k>0)   l = ip + 1;
-                    else         { same = 1;  break; }
-                }
-            }
-            if(!same) {
-                if(com.npatt>maxnpatt)
-                    error2("npatt > maxnpatt");
-                if(l > ip) ip++;        /* last comparison in bsearch had k > 0. */
-                /* Insert new pattern at ip.  This is the expensive step. */
-                
-                if(ip<com.npatt)
-                    memmove(p2s+ip+1, p2s+ip, (com.npatt-ip)*sizeof(int));
-                
-                /*
-                 for(j=com.npatt; j>ip; j--)
-                 p2s[j] = p2s[j-1];
-                 */
-                p2s[ip] = h;
-                com.npatt ++;
-            }
-            
-            if(debug) {
-                printf(": %3d (%c ilu %3d%3d%3d) ", com.npatt, DS[same], ip, l, u);
-                for(j=0; j<com.npatt; j++)
-                    printf(" %s", zt+p2s[j]*lpatt);
-            }
-            if(noisy && ((h+1)%10000==0 || h+1==com.ls))
-                printf("\r%12d patterns at %8d / %8d sites (%.1f%%), %s",
-                       com.npatt, h+1, com.ls, (h+1.)*100/com.ls, printtime(timestr));
-            
-        }     /* for (h)  */
-    }        /* for (ig) */
-    if(noisy) FPN(F0);
-    
-    /* (B) count pattern frequencies and collect pose[] */
-    com.posG[com.ngene] = com.npatt;
-    for(j=0; j<com.ngene; j++)
-        if(com.lgene[j]==0)
-            error2("some gene labels are missing");
-    for(j=1; j<com.ngene; j++)
-        com.lgene[j] += com.lgene[j-1];
-    
-    com.fpatt = (double*)realloc(com.fpatt, com.npatt*sizeof(double));
-    poset = (int*)malloc(com.ls*sizeof(int));
-    if(com.fpatt==NULL || poset==NULL) error2("oom poset");
-    for(ip=0; ip<com.npatt; ip++) com.fpatt[ip] = 0;
-    
-    for(ig=0; ig<com.ngene; ig++) {
-        for(h=0; h<com.ls; h++) {
-            if(com.pose[h] != ig) continue;
-            for(same=0, l=com.posG[ig], u=com.posG[ig+1]-1; ; ) {
-                if(u<l) break;
-                ip = (l+u)/2;
-                k = strcmp(zt+h*lpatt, zt+p2s[ip]*lpatt);
-                if(k<0)        u = ip - 1;
-                else if(k>0)   l = ip + 1;
-                else         { same = 1;  break; }
-            }
-            if(!same)
-                error2("ghost pattern?");
-            com.fpatt[ip]++;
-            poset[h] = ip;
-        }     /* for (h)  */
-    }        /* for (ig) */
-    
-    if(com.seqtype==CODONseq && com.ngene==3 &&com.lgene[0]==com.ls/3) {
-        puts("\nCheck option G in data file? (Enter)\n");
-    }
-    
-    for(j=0; j<com.ns; j++) {
-        com.z[j] = (unsigned char*)malloc(com.npatt*n31*sizeof(char));
-        for(ip=0,p=com.z[j]; ip<com.npatt; ip++)
-            for(k=0; k<n31; k++)
-                *p++ = zt[p2s[ip]*lpatt + j*n31 + k];
-    }
-    memcpy(com.pose, poset, com.ls*sizeof(int));
-    free(poset);  free(p2s);  free(zt);
-    
-    return (0);
+/* This collaps sites into patterns, for nucleotide, amino acid, or codon sequences.
+   This relies on \0 being the end of the string.
+   com.pose[i] has labels for genes as input and maps sites to patterns in return.
+   com.fpatt has site-pattern counts.
+   This deals with multiple genes/partitions, and uses com.ngene, com.lgene[], com.posG[] etc.
+   Sequences z[ns][ls] are copied into patterns zt[ls*lpatt], and bsearch is used
+   twice to avoid excessive copying, the first round to count npatt and identify the site patterns
+   and the second round to generate fpatt[] & com.pose[].
+*/
+   int maxnpatt = com.ls, h, ip, l, u, j, k, same, ig, *poset;
+   // int gap = (com.seqtype==CODONseq ? 3 : 10);
+   int n31 = (com.seqtype == CODONseq ? 3 : 1);
+   int lpatt = com.ns*n31 + 1;   /* extra 0 used for easy debugging, can be voided */
+   int *p2s;  /* point patterns to sites in zt */
+   char timestr[36];
+   unsigned char *p, *zt;
+   double nc = (com.seqtype == 1 ? 64 : com.ncode) + !com.cleandata + 1;
+   int debug = 0;
+
+   /* (A) Collect and sort patterns.  Get com.npatt, com.lgene, com.posG.
+   Move sequences com.z[ns][ls] into sites zt[ls*lpatt].
+   Use p2s to map patterns to sites in zt to avoid copying.
+   */
+   if (noisy) printf("Counting site patterns.. %s\n", printtime(timestr));
+
+   if ((com.seqtype == 1 && com.ns<5) || (com.seqtype != 1 && com.ns<7))
+      maxnpatt = (int)(pow(nc, (double)com.ns) + 0.5) * com.ngene;
+   if (maxnpatt>com.ls) maxnpatt = com.ls;
+   p2s = (int*)malloc(maxnpatt * sizeof(int));
+   zt = (char*)malloc(com.ls*lpatt * sizeof(char));
+   if (p2s == NULL || zt == NULL)  error2("oom p2s or zt");
+   memset(zt, 0, com.ls*lpatt * sizeof(char));
+   for (j = 0; j<com.ns; j++)
+      for (h = 0; h<com.ls; h++)
+         for (k = 0; k<n31; k++)
+            zt[h*lpatt + j*n31 + k] = (unsigned char)(com.z[j][h*n31 + k] + 1);
+
+   for (ig = 0; ig<com.ngene; ig++) com.lgene[ig] = 0;
+   for (ig = 0, com.npatt = 0; ig<com.ngene; ig++) {
+      com.posG[ig] = l = u = ip = com.npatt;
+      for (h = 0; h<com.ls; h++) {
+         if (com.pose[h] != ig) continue;
+         if (debug) printf("\nh %3d %s", h, zt + h*lpatt);
+
+         /* bsearch in existing patterns.  Knuth 1998 Vol3 Ed2 p.410
+         ip is the loc for match or insertion.  [l,u] is the search interval.
+         */
+         same = 0;
+         if (com.lgene[ig]++ != 0) {  /* not 1st pattern? */
+            for (l = com.posG[ig], u = com.npatt - 1; ; ) {
+               if (u<l) break;
+               ip = (l + u) / 2;
+               k = strcmp(zt + h*lpatt, zt + p2s[ip] * lpatt);
+               if (k<0)        u = ip - 1;
+               else if (k>0)   l = ip + 1;
+               else { same = 1;  break; }
+            }
+         }
+         if (!same) {
+            if (com.npatt>maxnpatt)
+               error2("npatt > maxnpatt");
+            if (l > ip) ip++;        /* last comparison in bsearch had k > 0. */
+                                     /* Insert new pattern at ip.  This is the expensive step. */
+            if (ip<com.npatt)
+               memmove(p2s + ip + 1, p2s + ip, (com.npatt - ip) * sizeof(int));
+            p2s[ip] = h;
+            com.npatt++;
+         }
+
+         if (debug) {
+            printf(": %3d (%c ilu %3d%3d%3d) ", com.npatt, (same ? 'S' : 'D'), ip, l, u);
+            for (j = 0; j<com.npatt; j++)
+               printf(" %s", zt + p2s[j] * lpatt);
+         }
+         if (noisy && ((h + 1) % 10000 == 0 || h + 1 == com.ls))
+            printf("\rCompressing, %6d patterns at %6d / %6d sites (%.1f%%), %s",
+               com.npatt, h + 1, com.ls, (h + 1.) * 100 / com.ls, printtime(timestr));
+
+      }     /* for (h)  */
+      if (noisy) FPN(F0);
+   }        /* for (ig) */
+
+            /* (B) count pattern frequencies and collect pose[] */
+   com.posG[com.ngene] = com.npatt;
+   for (j = 0; j<com.ngene; j++)
+      if (com.lgene[j] == 0)
+         error2("some genes do not have any sites?");
+   for (j = 1; j<com.ngene; j++)
+      com.lgene[j] += com.lgene[j - 1];
+
+   com.fpatt = (double*)realloc(com.fpatt, com.npatt * sizeof(double));
+   poset = (int*)malloc(com.ls * sizeof(int));
+   if (com.fpatt == NULL || poset == NULL) error2("oom poset");
+   memset(com.fpatt, 0, com.npatt * sizeof(double));
+
+   for (ig = 0; ig<com.ngene; ig++) {
+      for (h = 0; h<com.ls; h++) {
+         if (com.pose[h] != ig) continue;
+         for (same = 0, l = com.posG[ig], u = com.posG[ig + 1] - 1; ; ) {
+            if (u<l) break;
+            ip = (l + u) / 2;
+            k = strcmp(zt + h*lpatt, zt + p2s[ip] * lpatt);
+            if (k<0)        u = ip - 1;
+            else if (k>0)   l = ip + 1;
+            else { same = 1;  break; }
+         }
+         if (!same)
+            error2("ghost pattern?");
+         com.fpatt[ip]++;
+         poset[h] = ip;
+         if (noisy && ((h + 1) % 10000 == 0 || h + 1 == com.ls))
+            printf("\rCollecting patterns, %6d patterns at %6d / %6d sites (%.1f%%), %s",
+               com.npatt, h + 1, com.ls, (h + 1.) * 100 / com.ls, printtime(timestr));
+      }     /* for (h)  */
+      if (noisy) FPN(F0);
+   }        /* for (ig) */
+
+   if (com.seqtype == CODONseq && com.ngene == 3 && com.lgene[0] == com.ls / 3)
+      puts("\nCheck option G in data file?\n");
+
+   for (j = 0; j<com.ns; j++) {
+      com.z[j] = (unsigned char*)realloc(com.z[j], com.npatt*n31 * sizeof(unsigned char));
+      for (ip = 0, p = com.z[j]; ip<com.npatt; ip++)
+         for (k = 0; k<n31; k++)
+            *p++ = (unsigned char)(zt[p2s[ip] * lpatt + j*n31 + k] - 1);
+   }
+   memcpy(com.pose, poset, com.ls * sizeof(int));
+   free(poset);  free(p2s);  free(zt);
+
+   return (0);
 }
 
 
+
 void AddFreqSeqGene(int js,int ig,double pi0[],double pi[]);
 
 
@@ -1402,7 +1476,7 @@ int InitializeBaseAA (FILE *fout)
      This routine is called by baseml and aaml.  codonml uses another
      routine InitializeCodon()
      */
-    char *pch = (com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs : (com.seqtype==5 ? BASEs5: BINs)));
+    char *pch = (com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs : BINs));
     char indel[]="-?";
     int wname=30, h,js,k, ig, nconstp, n=com.ncode;
     int irf, nrf=20;
@@ -1551,7 +1625,7 @@ void AddFreqSeqGene(int js, int ig, double pi0[], double pi[])
      using pi0, by resolving ambiguities.  The data are coded.  com.cleandata==1 or 0.
      This is for nucleotide and amino acid sequences only.
      */
-    //char *pch = (com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs : (com.seqtype==5 ? BASEs5: BINs)));
+    //char *pch = (com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs : BINs));
     int k, h, b, n=com.ncode;
     double t;
     
@@ -1598,7 +1672,7 @@ int RemoveIndel(void)
      */
     int  n=com.ncode, h,k, j,js,lnew,nindel, n31=1;
     char b, *miss;  /* miss[h]=1 if site (codon) h is missing, 0 otherwise */
-    char *pch=((com.seqtype<=1||com.seqtype==CODON2AAseq)?BASEs:(com.seqtype==2?AAs: (com.seqtype==5?BASEs5:BINs)));
+    char *pch=((com.seqtype<=1||com.seqtype==CODON2AAseq)?BASEs:(com.seqtype==2?AAs: BINs));
     
     if(com.seqtype==CODONseq || com.seqtype==CODON2AAseq) {
         n31=3; n=4;
@@ -1654,7 +1728,7 @@ int MPInformSites (void)
      Not used for a long time.  Does not work if com.pose is NULL.
      */
     char *imark;
-    char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: (com.seqtype==5?BASEs5:BINs)));
+    char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: BINs));
     int h, i, markb[NS], inf, lsinf;
     FILE *finf, *fninf;
     
@@ -1715,7 +1789,7 @@ int print1seq (FILE*fout, unsigned char *z, int ls, int pose[])
      This uses com.seqtype.
      */
     int h, hp, gap=10;
-    char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: (com.seqtype==5?BASEs5:BINs)));
+    char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: BINs));
     // char str[4]="";
     // int nb = (com.seqtype==CODONseq?3:1);
     
@@ -1781,7 +1855,7 @@ void printSeqs (FILE *fout, int *pose, char keep[], int format)
    else if (format==1) {
        for(h=0,FPN(fout); h<com.npatt; h++) {
            /* fprintf(fout," %12.8f", com.fpatt[h]/(double)com.ls); */
-           fprintf(fout, " %.0f", com.fpatt[h]);
+           fprintf(fout, " %4.0f", com.fpatt[h]);
            if((h+1)%15==0) FPN(fout);
        }
    }
@@ -1789,6 +1863,7 @@ void printSeqs (FILE *fout, int *pose, char keep[], int format)
    fflush(fout);
 }
 
+
 #define gammap(x,alpha) (alpha*(1-pow(x,-1.0/alpha)))
 /* DistanceREV () used to be here, moved to pamp.
  */
@@ -2805,41 +2880,39 @@ int IsNameNumber(char line[])
 
 
 
-int ReadTreeN (FILE *ftree, int *haslength, int *haslabel, int copyname, int popline)
-{
-    /* Read a tree from ftree, using the parenthesis node representation of trees.
-     Branch lengths are read in nodes[].branch, and branch (node) labels
-     (integers) are preceeded by # and read in nodes[].label.  If the clade label
-     $ is used, the label is read into CladeLabel[] first and then moved into
-     nodes[].label in the routine DownTreeCladeLabel().
+/* Read a tree from ftree, using the parenthesis node representation of trees.
+Branch lengths are read in nodes[].branch, and branch (node) labels
+(integers) are preceeded by # and read in nodes[].label.  If the clade label
+$ is used, the label is read into CladeLabel[] first and then moved into
+nodes[].label in the routine DownTreeCladeLabel().
      
-     Calibration information for mcmctree may be read into nodes[].branch and nodes[].label,
-     as well as nodes[].NodeStr, and is processed inside mcmctree.
-     *haslength is set to 1 (branch lengths), 2 (calibration info) or 3 (both).
-     However, the bit for calibrations is set only if the symbols > < exist and not for
-     calibrations specified using L, U, G, etc, which will be stored in nodes[].NodeStr
-     and processed using ProcessFossilInfo() in mcmctree.
-     mcmctree should abort if *haslength == 1 or 3 after this routine.
+Calibration information for mcmctree may be read into nodes[].branch and nodes[].label,
+as well as nodes[].NodeStr, and is processed inside mcmctree.
+*haslength is set to 1 (branch lengths), 2 (calibration info) or 3 (both).
+However, the bit for calibrations is set only if the symbols > < exist and not for
+calibrations specified using L, U, G, etc, which will be stored in nodes[].NodeStr
+and processed using ProcessFossilInfo() in mcmctree.
+mcmctree should abort if *haslength == 1 or 3 after this routine.
      
-     This assumes that com.ns is known.
-     Species names are considered case-sensitive, with trailing spaces ignored.
+This assumes that com.ns is known.  Names are considered case-sensitive, with trailing spaces ignored.
      
-     copyname = 0: species numbers and names are both accepted, but names have
-     to match the names in com.spname[], which are from the
-     sequence data file.  Used by baseml and codeml, for example.
-     1: species names are copied into com.spname[], but species
-     numbers are accepted.  Used by evolver for simulation,
-     in which case no species names were read before.
-     2: the tree must have species names, which are copied into com.spname[].
-     Note that com.ns is assumed known.  To remove this restrition,
-     one has to consider the space for nodes[], CladeLabel, starting
-     node number etc.
+copyname = 0: species numbers and names are both accepted, but names have to match the names
+              in com.spname[], which are from the sequence data file.  
+              Used by baseml and codeml, for example.
+           1: species names are copied into com.spname[], but species numbers are accepted.  
+              Used by evolver for simulation, in which case no species names were read before.
+           2: the tree must have species names, which are copied into com.spname[].
+
+Note that com.ns is assumed known.  To remove this restrition, one has to consider the space 
+for nodes[], CladeLabel, starting node number etc.
      
-     isname = 0:   species number; 1: species name;
+isname = 0:   species number; 1: species name;
      
-     Ziheng note (18/12/2011): I have changed the code so that sequence number is not used
-     anymore.  isname = 1 always.
-     */
+Ziheng note (18/12/2011): I have changed the code so that sequence number is not used
+anymore.  isname = 1 always.
+*/
+int ReadTreeN (FILE *ftree, int *haslength, int *haslabel, int copyname, int popline)
+{
     int hascalibration=0, cnode, cfather = -1;  /* current node and father */
     int inodeb=0;  /* node number that will have the next branch length */
     int cladeLabels=0, i,j,k, level=0, isname, ch=' ', icurspecies=0;
@@ -2912,7 +2985,7 @@ int ReadTreeN (FILE *ftree, int *haslength, int *haslabel, int copyname, int pop
         else if (ch==':'||ch=='>') {
             if(ch==':') *haslength=1;
             else         hascalibration = 1;
-            fscanf(ftree,"%lf",&nodes[inodeb].branch);
+            fscanf(ftree, "%lf", &nodes[inodeb].branch);
         }
         else if (ch==quote[0] || ch==quote[1]) {
             for (k=0; ; k++) {  /* read notes into line[] */
@@ -2932,11 +3005,15 @@ int ReadTreeN (FILE *ftree, int *haslength, int *haslabel, int copyname, int pop
                *haslabel = 1;
                sscanf(pch+1, "%lf", &nodes[inodeb].label);
             }
-            else if((pch = strchr(line,'$'))) {
+            else if(pch = strchr(line,'$')) {
                 *haslabel=1;
                 sscanf(pch+1, "%d", &CladeLabel[inodeb]);
             }
-            else if(pch = strchr(line,'<')) {
+            else if(pch = strchr(line, '>')) {
+               hascalibration = 1;
+               sscanf(pch + 1, "%lf", &nodes[inodeb].branch);
+            }
+            else if(pch = strchr(line, '<')) {
                 hascalibration = 1;
                 sscanf(pch+1, "%lf", &nodes[inodeb].label);
             }
@@ -3372,11 +3449,11 @@ void PointconPnodes (void)
      This routine updates internal nodes com.conP only.
      End nodes (com.conP0) are updated in InitConditionalPNode().
      */
-    size_t nintern=0, i;
+    int nintern=0, i;
     
     for(i=0; i<tree.nbranch+1; i++)
         if(nodes[i].nson>0)  /* more thinking */
-            nodes[i].conP = com.conP + com.ncode*com.npatt*nintern ++;
+            nodes[i].conP = com.conP + (size_t)com.ncode*com.npatt*nintern ++;
 }
 
 
@@ -4492,7 +4569,7 @@ int StepwiseAdditionMP (double space[])
     _U0=(int*)malloc(com.npatt*_mnnode*sizeof(int));
     _step0=(int*)malloc(com.npatt*_mnnode*sizeof(int));
     if (noisy>2)
-        printf("\n%9ld bytes for MP (U0 & N0)\n", 2*com.npatt*_mnnode*sizeof(int));
+        printf("\n%9zd bytes for MP (U0 & N0)\n", 2*com.npatt*_mnnode*sizeof(int));
     if (_U0==NULL || _step0==NULL) error2("oom U0&step0");
     
     FOR (i,ns0)  z0[i]=com.z[i];
@@ -5971,7 +6048,7 @@ int AncestralMarginal (FILE *fout, double x[], double fhsiteAnc[], double Sir[])
      Deals with node scaling to avoid underflows.  See above
      (Z. Yang, 2 Sept 2001)
      */
-    char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: (com.seqtype==5?BASEs5:BINs)));
+    char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: BINs));
     char *zanc, str[4]="",codon[2][4]={"   ","   "}, aa[4]="";
     char *sitepatt=(com.readpattern?"pattern":"site");
     int n=com.ncode, inode, ic=0,b[3],i,j,k1=-1,k2=-1,c1,c2,k3, lsc=com.ls;
@@ -6290,7 +6367,7 @@ int ChangesSites(FILE*frst, int coding, char *zanc)
      nonsynonymous changes are counted separately.
      Added in Nov 2000.
      */
-    char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: (com.seqtype==5?BASEs5:BINs)));
+    char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: BINs));
     char codon[2][4]={"   ","   "};
     int  h,hp,inode,k1,k2,d, ls1=(com.readpattern?com.npatt:com.ls);
     double S,N,Sd,Nd, S1,N1,Sd1,Nd1, b,btotal=0, p,C;
@@ -6578,7 +6655,7 @@ void PrintAncState1site (char ancState1site[], double prob)
 {
     int i;
     char codon[4]="";
-    char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: (com.seqtype==5?BASEs5:BINs)));
+    char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: BINs));
     
     for(i=0; i<tree.nnode-com.ns; i++) {
         if(com.seqtype==1) {
@@ -6626,7 +6703,7 @@ int AncestralJointPPSG2000 (FILE *fout, double x[])
      This outputs results by pattern.  I tried to print results by site (rather than by pattern), 
      but gave up as some variables use the same memory (e.g., combIndex) for different site patterns.
      */
-    char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: (com.seqtype==5?BASEs5:BINs)));
+    char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: BINs));
     char codon[4]="";
     int n=com.ncode, nintern=tree.nnode-com.ns, nson, i,j,k,h,hp,igene;
     int maxnson, maxncomb, lst=(com.readpattern?com.npatt:com.ls);
@@ -7458,7 +7535,7 @@ int print1site (FILE*fout, int h)
      site in the original data file or the h-th pattern.  The data are coded.
      naa > 1 if the codon codes for more than one amino acid.
      */
-    char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: (com.seqtype==5?BASEs5:BINs)));
+    char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: BINs));
     char compatibleAAs[20]="";
     int n=com.ncode, i, b, aa=0;
     
@@ -8501,7 +8578,7 @@ int ReadTreeSeqs (FILE*fout)
     if (haslength & 1)
         error2("Tree should have fossil calibrations but not branch lengths!");
 #endif
-    
+
     /* read sequences at each locus, construct gene tree by pruning sptree */
     data.ngene = com.ndata;
     com.ndata=1;
diff --git a/src/yn00.c b/src/yn00.c
index 52ef594..4ae1bbf 100644
--- a/src/yn00.c
+++ b/src/yn00.c
@@ -1,907 +1,907 @@
-/* yn00.c
-   Pairwise estimation of dS and dN by the method of Yang & Nielsen 
-   (2000 Mol. Biol. Evol. 17:32-43)
-
-   Copyright, 1998, Ziheng Yang
-
-                 cc -o yn00 -fast yn00.c tools.o -lm
-                 cl -O2 yn00.c tools.o
-                 yn00 <SequenceFileName>
-
-  Codon sequences are encoded as 0,1,...,61, as in codeml.c.
-*/
-#include "paml.h"
-#define NS            1000
-#define LSPNAME       30
-#define NCODE         64
-#define NGENE         2000
-
-int GetOptions (char *ctlf);
-int EncodeSeqCodon(void);
-int Statistics(FILE *fout, double space[]);
-int DistanceMatLWL85 (FILE *fout);
-int DistanceYN00(int is, int js, double*S, double*N, double*dS,double*dN,
-    double *SEdS, double *SEdN, double *t,double space[]);
-int GetKappa (void);
-int GetFreqs(int is1, int is2, double f3x4[], double pi[]);
-int CountSites(char z[],double pi[],double*Stot,double*Ntot,
-    double fbS[],double fbN[]);
-int GetPMatCodon(double P[],double t, double kappa, double omega, double space[]);
-int CountDiffs(char z1[],char z2[], 
-               double*Sdts,double*Sdtv,double*Ndts,double*Ndtv,double PMat[]);
-int DistanceF84(double n, double P, double Q, double pi[],
-		          double*k_HKY, double*t, double*SEt);
-double dsdnREV (int is, int js, double space[]);
-
-int ExpPattFreq(double t,double kappa,double omega,double pi[],double space[]);
-int ConsistencyMC(void);
-int InfiniteData(double t,double kappa,double omega,double f3x4_0[],
-    double space[]);
-void SimulateData2s64(FILE* fout, double f3x4_0[], double space[]);
-
-struct common_info {
-   unsigned char *z[NS];
-   char *spname[NS], seqf[512],outf[512];
-   int ns,ls,npatt,codonf,icode,ncode,getSE,*pose,verbose, seqtype, readpattern;
-   int cleandata, fcommon,kcommon, weighting, ndata, print;
-   double *fpatt, pi[NCODE], f3x4s[NS][12], kappa, omega;
-   int ngene,posG[NGENE+1],lgene[NGENE],fix_rgene, model;
-   double rgene[NGENE],piG[NGENE][NCODE], alpha;
-}  com;
-
-
-int FROM61[64], FROM64[64], FourFold[4][4];
-double PMat[NCODE*NCODE];
-char *codonfreqs[]={"Fequal", "F1x4", "F3x4", "Fcodon"};
-enum {Fequal, F1x4, F3x4, Fcodon} CodonFreqs;
-
-FILE *frst, *frst1, *frub;
-extern char BASEs[], AAs[];
-extern int noisy, GeneticCode[][64];
-int Nsensecodon;
-enum {NODEBUG, KAPPA, SITES, DIFF} DebugFunctions;
-int debug=0;
-
-double omega_NG, dN_NG, dS_NG;  /* what are these for? */
-
-
-#define YN00
-#define REALSEQUENCE
-#include "treesub.c"
-
-
-int main(int argc, char *argv[])
-{
-   char dsf[512]="2YN.dS", dnf[512]="2YN.dN", tf[512]="2YN.t";
-   FILE *fout, *fseq, *fds, *fdn, *ft;
-   char ctlf[96]="yn00.ctl", timestr[64];
-   int    n=com.ncode, is,js, j, idata, wname=20, sspace;
-   double t=0.4, dS=0.1,dN=0.1, S,N, SEdS, SEdN, f3x4[12], *space=NULL;
-
-   /* ConsistencyMC(); */
-
-   printf("YN00 in %s\n", pamlVerStr);
-   starttimer();
-   if (argc>1)  strcpy(ctlf, argv[1]); 
-   com.seqtype=1;  com.cleandata=1;  /* works for clean data only? */
-   com.ndata=1;  com.print=0;
-   noisy=1; com.icode=0;  com.fcommon=0;  com.kcommon=1;
-   GetOptions(ctlf);
-   setmark_61_64 ();
-   fout = fopen (com.outf, "w"); 
-   frst = fopen("rst", "w");
-   frst1 = fopen("rst1", "w"); 
-   frub = fopen ("rub", "w");
-   if (fout==NULL || frst==NULL) error2("outfile creation err.");
-   fds = (FILE*)fopen(dsf, "w");
-   fdn = (FILE*)fopen(dnf, "w");
-   ft = (FILE*)fopen(tf, "w"); 
-   if(fds==NULL || fdn==NULL || ft==NULL) error2("file open error");
-
-   if((fseq=fopen (com.seqf,"r"))==NULL) {
-      printf ("\n\nSequence file %s not found!\n", com.seqf);
-      exit(-1);
-   }
-   for (idata=0; idata<com.ndata; idata++) {
-      if (com.ndata>1) {
-         printf("\nData set %d\n", idata+1);
-         fprintf(fout, "\n\nData set %d\n", idata+1);
-         fprintf(frst, "\t%d", idata+1);
-      }
-
-      ReadSeq((com.verbose?fout:NULL), fseq, com.cleandata, 0);
-      SetMapAmbiguity();
-
-      sspace = max2(200000,64*com.ns*sizeof(double));
-      sspace = max2(sspace,64*64*5*sizeof(double));
-      if ((space=(double*)realloc(space,sspace))==NULL) error2("oom space");
-
-      com.kappa = 4.6;
-      com.omega = 1;
-      fprintf(fout,"YN00 %15s", com.seqf);
-      Statistics(fout, space);
-
-      if(noisy) printf("\n\n(A) Nei-Gojobori (1986) method\n");
-      fprintf(fout,"\n\n(A) Nei-Gojobori (1986) method\n");
-      DistanceMatNG86 (fout, NULL, NULL, NULL, 0);
-      fflush(fout);
-
-      if(noisy) printf("\n\n(B) Yang & Nielsen (2000) method\n\n");
-      fprintf(fout,"\n\n(B) Yang & Nielsen (2000) method\n\n");
-      fprintf(fout,"Yang Z, Nielsen R (2000) Estimating synonymous and nonsynonymous substitution rates under realistic evolutionary models. Mol. Biol. Evol. 17:32-43\n");
-      if(!com.weighting) fputs("\n(equal weighting of pathways)\n",fout);
-
-      if(com.fcommon)  GetFreqs(-1, -1, f3x4, com.pi);
-      if(com.kcommon) {
-         GetKappa();
-         printf("kappa = %.2f\n\n",com.kappa);
-         /* puts("kappa?"); scanf("%lf", &com.kappa); */
-      }
-
-      fputs("\nseq. seq.     S       N        t   kappa   omega     dN +- SE    dS +- SE\n\n",fout);
-      fprintf(fds,"%6d\n", com.ns);
-      fprintf(fdn,"%6d\n", com.ns);
-      fprintf(ft,"%6d\n", com.ns);
-      for(is=0; is<com.ns; is++) {
-         fprintf(fds,"%-*s ", wname,com.spname[is]);
-         fprintf(fdn,"%-*s ", wname,com.spname[is]);
-         fprintf(ft,"%-*s ", wname,com.spname[is]);
-         for(js=0; js<is; js++) {
-            if(noisy) printf("%3d vs. %3d\n", is+1, js+1);
-            fprintf(fout, " %3d  %3d ", is+1, js+1);
-
-            if(!com.fcommon) GetFreqs(is, js, f3x4, com.pi);
-            if(!com.kcommon) GetKappa();
-            j = DistanceYN00(is, js, &S, &N, &dS,&dN, &SEdS, &SEdN, &t,space);
-
-            fprintf(fout,"%7.1f %7.1f %8.4f %7.4f %7.4f %6.4f +- %6.4f %7.4f +- %6.4f\n",
-               S,N,t,com.kappa,com.omega,dN,SEdN,dS,SEdS);
-            fprintf(frst," YN: %8.4f%8.4f%8.4f %6.4f +- %6.4f %7.4f +- %6.4f\n",
-               t,com.kappa,com.omega,dN,SEdN,dS,SEdS);
-
-            fprintf(fds," %7.4f",dS); fprintf(fdn," %7.4f",dN); fprintf(ft," %7.4f",t);
-         }    /* for (js) */
-         FPN(fds); FPN(fdn); FPN(ft);    
-         fflush(fds); fflush(fdn); fflush(ft);
-      }       /* for (is) */
-      FPN(fds); FPN(fdn); FPN(ft);
-
-      if(noisy) printf("\n\n(C) LWL85, LPB93 & LWLm methods\n\n");
-      fprintf(fout,"\n\n(C) LWL85, LPB93 & LWLm methods\n\n");
-      fprintf(fout,"Li W.-H., C.-I. Wu, Luo (1985) A new method for estimating synonymous and nonsynonymous rates of nucleotide substitutions considering the relative likelihood of nucleotide and codon changes. Mol. Biol. Evol. 2: 150-174.\n");
-      fprintf(fout,"Li W-H (1993) Unbiased estimation of the rates of synonymous and nonsynonymous substitution. J. Mol. Evol. 36:96-99\n");
-      fprintf(fout,"Pamilo P, Bianchi NO (1993) Evolution of the Zfx and Zfy genes - rates and interdependence between the genes. Mol. Biol. Evol. 10:271-281\n");
-      fprintf(fout,"Yang Z (2006) Computational Molecular Evolution. Oxford University Press, Oxford. Eqs. 2.12 & 2.13\n");
-
-      DistanceMatLWL85(fout);
-
-      fflush(frst);
-      if(noisy) printf("\nTime used: %s\n", printtime(timestr));
-   }
-   return (0);
-}
-
-
-
-int GetOptions (char *ctlf)
-{
-   int i, nopt=9, lline=4096;
-   char line[4096], *pline, opt[20], comment='*';
-   char *optstr[]={"seqfile","outfile", "verbose", "noisy", "icode", 
-        "weighting","commonkappa", "commonf3x4", "ndata"};
-   double t;
-   FILE *fctl;
-
-   if((fctl=fopen(ctlf,"r"))==NULL) error2("\nctl file open error.\n");
-   printf ("\nReading options from %s..\n", ctlf);
-   for (;;) {
-      if (fgets (line, lline, fctl) == NULL) break;
-      for (i=0,t=0,pline=line; i<lline&&line[i]; i++)
-         if (isalnum(line[i]))  { t=1; break; }
-         else if (line[i]==comment) break;
-      if (t==0) continue;
-      sscanf (line, "%s%*s%lf", opt, &t);
-      if ((pline=strstr(line, "="))==NULL) error2("option file.");
-
-      for (i=0; i<nopt; i++) {
-         if (strncmp(opt, optstr[i], 8)==0)  {
-            if (noisy>2)
-               printf ("\n%3d %15s | %-20s %6.2f", i+1,optstr[i],opt,t);
-            switch (i) {
-               case (0): sscanf(pline+2, "%s", com.seqf);    break;
-               case (1): sscanf(pline+2, "%s", com.outf);    break;
-               case (2): com.verbose=(int)t;     break;
-               case (3): noisy=(int)t;           break;
-               case (4): com.icode=(int)t;       break;
-               case (5): com.weighting=(int)t;   break;
-               case (6): com.kcommon=(int)t;     break;
-               case (7): com.fcommon=(int)t;     break;
-               case (8): com.ndata=(int)t;       break;
-            }
-            break;
-         }
-      }
-      if (i==nopt)
-         { printf ("\noption %s in %s\n", opt, ctlf);  exit (-1); }
-   }
-
-   for (i=0,Nsensecodon=0; i<64; i++)
-      if (GeneticCode[com.icode][i]!=-1) Nsensecodon++;
-   com.ncode = Nsensecodon;
-   fclose (fctl);
-   FPN(F0);
-   return (0);
-}
-
-int DistanceYN00(int is, int js, double*S, double*N, double*dS,double*dN,
-    double *SEdS, double *SEdN, double *t,double space[])
-{
-/* calculates dS, dN, w, t by weighting.
-   com.kappa & com.pi[] are calculated beforehand are not updated.
-*/
-   int j,k,ir,nround=10, status=0;
-   double fbS[4],fbN[4],fbSt[4],fbNt[4], St,Nt, Sdts,Sdtv,Ndts,Ndtv, kappaS,kappaN;
-   double w0=0,dS0=0,dN0=0, accu=5e-4, minomega=1e-5,maxomega=99;
-
-   if(*t==0) *t=.5;  
-   if(com.omega<=0) com.omega=1;
-   for(k=0; k<4; k++) fbS[k] = fbN[k] = 0;
-   if(debug) printf("\nCountSites\n");
-   if(noisy>3) printf("\n");
-   for(k=0,*S=*N=0; k<2; k++) {
-      CountSites(com.z[k==0?is:js], com.pi, &St, &Nt, fbSt, fbNt);
-      *S += St/2;
-      *N += Nt/2;
-      for(j=0; j<4; j++) {
-         fbS[j] += fbSt[j]/2;
-         fbN[j] += fbNt[j]/2;
-      }
-      if(noisy>3) printf("Seq. %d: S = %9.3f N=%9.3f\n",k+1,St,Nt);
-   }
-   if(noisy>3) {
-      printf("Ave.  : S = %9.3f N=%9.3f\n\n",*S,*N);
-      printf("Base freqs at syn & nonsyn sites\n%10s%10s%10s%10s\n", "T", "C", "A", "G");
-      for(k=0; k<4; k++) printf(" %9.6f", fbS[k]);  FPN(F0);
-      for(k=0; k<4; k++) printf(" %9.6f", fbN[k]);  FPN(F0);
-   }
-   if(noisy>3) 
-      printf(" #    Sdts   Sdtv   Ndts   Ndtv |       t   kappa       w      dN      dS |   kappaS  kappaN\n");
-
-   /* initial values? */
-   if(com.weighting) { 
-      if(*t<0.001 || *t>5) *t=0.5; 
-      if(com.omega<0.01 || com.omega>5) com.omega=.5;
-   }
-   for (ir=0; ir<(com.weighting?nround:1); ir++) {   /* weighting or iteration */
-      if(com.weighting)
-         GetPMatCodon(PMat,*t,com.kappa,com.omega,space);
-      else
-         for(j=0; j<com.ncode*com.ncode; j++) 
-            PMat[j] = 1;
-
-      CountDiffs(com.z[is], com.z[js], &Sdts, &Sdtv, &Ndts, &Ndtv, PMat);
-
-      if(DistanceF84(*S, Sdts/ *S, Sdtv/ *S, fbS, &kappaS, dS, SEdS)) status=-1;
-      if(DistanceF84(*N, Ndts/ *N, Ndtv/ *N, fbN, &kappaN, dN, SEdN)) status=-1;
-
-      if(*dS<1e-9) { 
-         status = -1;
-         com.omega = maxomega;
-      }
-      else
-         com.omega= max2(minomega, *dN/ *dS);
-      *t = *dS * 3 * *S/(*S + *N) + *dN * 3 * *N/(*S + *N);
-      if(noisy>3) {
-         printf("%2d %7.2f%7.2f%7.2f%7.2f |", ir+1, Sdts,Sdtv,Ndts,Ndtv);
-         printf("%8.4f%8.4f%8.4f%8.4f%8.4f", *t, com.kappa,com.omega,*dN,*dS);
-         printf(" | %8.4f%8.4f\n", kappaS,kappaN);
-      }
-      if(fabs(*dS-dS0)<accu && fabs(*dN-dN0)<accu && fabs(com.omega-w0)<accu)
-         break;
-      dS0=*dS;  dN0=*dN;  w0=com.omega;
-   } /* for (ir) */
-   if(ir==nround) status=-2;
-   /* if(status) printf("\n\tstatus: %d\n", status); */
-   return(status);
-}
-
-
-
-int Statistics(FILE *fout, double space[])
-{
-/* This calculates base frequencies, using npatt & fpatt[]
-*/
-   int h, is,j, c[3], wname=20;
-   double f3x4tot[12], *fb3tot=com.pi, *fb3s=space;
-
-   if(fout) {
-      fprintf(fout, "\n\nns =%4d\tls =%4d", com.ns, com.ls);
-      fprintf(fout,"\n\nCodon position x base (3x4) table for each sequence.");
-   }
-   zero(f3x4tot,12);  zero(fb3s,64*com.ns);
-   for(is=0; is<com.ns; is++)  zero(com.f3x4s[is], 12);
-   for (is=0; is<com.ns; is++) {
-      for (h=0; h<com.npatt; h++) {
-         j = FROM61[com.z[is][h]];
-         c[0]=j/16; c[1]=(j%16)/4; c[2]=j%4;
-         fb3s[is*64+j] += com.fpatt[h];
-         for(j=0; j<3; j++)
-            com.f3x4s[is][j*4+c[j]] += com.fpatt[h]/com.ls;
-      }
-      for(j=0; j<12; j++) f3x4tot[j] += com.f3x4s[is][j]/com.ns;
-      if(fout) { 
-         fprintf(fout,"\n\n%-*s", wname, com.spname[is]);
-         for(j=0; j<3; j++) {
-            fprintf (fout, "\nposition %2d:", j+1);
-            for(h=0; h<4; h++)
-               fprintf (fout,"%5c:%7.5f", BASEs[h], com.f3x4s[is][j*4+h]);
-         }
-      }
-   }
-   if(fout) {
-      fprintf (fout, "\n\nAverage");
-      for(j=0; j<3; j++) {
-         fprintf (fout, "\nposition %2d:", j+1);
-         for(h=0; h<4; h++)
-            fprintf (fout,"%5c:%7.5f", BASEs[h], f3x4tot[j*4+h]);
-      }
-      for(is=0,zero(fb3tot,64);is<com.ns;is++) 
-         for(j=0; j<64; j++) fb3tot[j] += fb3s[is*64+j];
-      fprintf (fout, "\n\nCodon usage for each species\n");
-      printcums (fout, com.ns, fb3s, com.icode);
-      fprintf (fout, "\nSums\n");
-      printcums (fout, 1, fb3tot, com.icode);
-   }
-
-   return(0);
-}
-
-int GetFreqs(int is1, int is2, double f3x4[], double pi[])
-{
-/* uses com.fcommon and com.f3x4s to calculate f3x4[] and pi[].
-   Codon frequencies pi[] are calculated from the f3x4 table.
-   The calculation is duplicated when com.fcommon=1.
-*/
-   int n=com.ncode, j, k, ic, b[3];
-
-   if (com.fcommon)
-      for(j=0,zero(f3x4,12);j<com.ns;j++)
-         for(k=0; k<12; k++) f3x4[k]+=com.f3x4s[j][k]/com.ns;
-   else 
-      for(k=0; k<12; k++)
-         f3x4[k] = (com.f3x4s[is1][k]+com.f3x4s[is2][k])/2;
-
-   if (noisy>=9)
-      matout(F0, f3x4, 3, 4);
-   for(j=0; j<n; j++) {
-      ic=FROM61[j]; b[0]=ic/16; b[1]=(ic%16)/4; b[2]=ic%4;
-      pi[j] = f3x4[b[0]] * f3x4[4+b[1]] * f3x4[8+b[2]];
-   }
-   abyx(1/sum(pi,n), pi, n);
-
-   return (0);
-}
-
-
-int DistanceMatLWL85 (FILE *fout)
-{
-/* This implements 3 methods: LWL85 (Li, Wu & Luo 1985), LPB (Li 1993, 
-   Pamilo & Bianchi 1993), and LWL85m (equation 12 in book; check other refs).
-   alpha is not used.
-*/
-   int i,j,k, h, wname=15;
-   char *codon1, *codon2, str[4]="   ";
-   double L[3], sdiff[3], vdiff[3], Lt[3], sdifft[3], vdifft[3], A[3],B[3];
-   double P[3],Q[3], a,b, dS,dN, pS2, S,N, Sd,Nd;
-
-   for(i=0; i<com.ns; i++) {
-      for(j=0; j<i; j++) {  /* pair i and j */
-         for(k=0; k<3; k++) L[k] = sdiff[k] = vdiff[k] = 0;
-
-         for (h=0; h<com.npatt; h++)  {
-            codon1 = CODONs[com.z[i][h]];
-            codon2 = CODONs[com.z[j][h]];
-            difcodonLWL85(codon1, codon2, Lt, sdifft, vdifft, 0, com.icode);
-            for(k=0; k<3; k++) {
-               L[k]     += Lt[k]*com.fpatt[h];
-               sdiff[k] += sdifft[k]*com.fpatt[h];
-               vdiff[k] += vdifft[k]*com.fpatt[h];
-            }
-         }
-
-         for(k=0; k<3; k++) { 
-            P[k] = sdiff[k]/L[k];
-            Q[k] = vdiff[k]/L[k]; 
-            a = 1 - 2*P[k] - Q[k];
-            b = 1 - 2*Q[k];
-            A[k] = -log(a)/2 + log(b)/4;
-            B[k] = -log(b)/2;
-         }
-         if(fout) {
-            fprintf(fout, "\n%d (%s) vs. %d (%s)\n\n", i+1, com.spname[i], j+1, com.spname[j]);
-            fprintf(fout,"L(i):  %9.1f %9.1f %9.1f  sum=%9.1f\n", L[0],L[1],L[2],L[0]+L[1]+L[2]);
-            fprintf(fout,"Ns(i): %9.4f %9.4f %9.4f  sum=%9.4f\n", sdiff[0],sdiff[1],sdiff[2], sdiff[0]+sdiff[1]+sdiff[2]);
-            fprintf(fout,"Nv(i): %9.4f %9.4f %9.4f  sum=%9.4f\n", vdiff[0],vdiff[1],vdiff[2], vdiff[0]+vdiff[1]+vdiff[2]);
-            fprintf(fout,"A(i):  %9.4f %9.4f %9.4f\n", A[0],A[1],A[2]);
-            fprintf(fout,"B(i):  %9.4f %9.4f %9.4f\n", B[0],B[1],B[2]);
-
-            Sd = L[1]*A[1] + L[2]*(A[2]+B[2]);
-            Nd = L[1]*B[1] + L[0]*(A[0]+B[0]);
-            pS2 = 1/3.;
-            S = L[1]*pS2 + L[2];
-            N = L[1]*(1-pS2) + L[0];
-            dS = Sd/S;
-            dN = Nd/N;
-            fprintf(fout,"LWL85:  dS = %7.4f dN = %7.4f w =%7.4f S =%7.1f N =%7.1f\n", dS,dN, dN/dS, S, N);
-            pS2 = A[2]/(A[2]+B[2]);
-            S = L[1]*pS2 + L[2];
-            N = L[1]*(1-pS2) + L[0];
-            dS = Sd/S;
-            dN = Nd/N;
-            fprintf(fout,"LWL85m: dS = %7.4f dN = %7.4f w =%7.4f S =%7.1f N =%7.1f (rho = %.3f)\n", dS,dN, dN/dS, S, N, pS2);
-
-            dS = (L[1]*A[1]+L[2]*A[2])/(L[1]+L[2]) + B[2];
-            dN = (L[0]*B[0]+L[1]*B[1])/(L[0]+L[1]) + A[0];
-            fprintf(fout,"LPB93:  dS = %7.4f dN = %7.4f w =%7.4f\n", dS, dN, dN/dS);
-         }
-      }
-      if(noisy)  printf(" %3d",i+1);
-   }
-   if(noisy)  FPN(F0);
-   if(fout) FPN(fout);
-   return (0);
-}
-
-
-
-int GetKappa(void)
-{
-/* This calculates mutational transition/transversion rate ratio kappa 
-   using 4-fold degenerate sites from pairwise comparisons 
-   under HKY85, weighting estimates by the numbers of sites
-*/
-   int is,js,j,k,h, i1,pos,c[2],aa[2],b[2][3],a,ndeg,by[3]={16,4,1}, status=0;
-   double ka[2], F[2][16],S[2],wk[2], t,P,Q,pi[4];
-                 /* F&S&wk [0]: non-degenerate; [1]:4-fold;  S:sites */
-   double kdefault=(com.kappa>0?com.kappa:(com.icode==1?10:2));
-   char str1[4]="   ",str2[4]="   ", *sitestr[2]={"non-degenerate","4-fold"};
-
-   for(is=0,com.kappa=0;is<com.ns;is++) {
-      for(js=0; js<is; js++) {
-         if(noisy>=9) printf ("\n%4d vs. %3d", is+1, js+1);
-         for(k=0; k<2; k++) zero(F[k],16);
-         for(h=0; h<com.npatt; h++) {
-            c[0] = FROM61[com.z[is][h]];
-            c[1] = FROM61[com.z[js][h]];
-            for(k=0; k<2; k++) {
-               b[k][0] = c[k]/16;
-               b[k][1] = (c[k]%16)/4;
-               b[k][2] = c[k]%4;
-               aa[k] = GeneticCode[com.icode][c[k]];
-            }
-
-            /* find non-degenerate sites */
-            for(pos=0; pos<3; pos++) {         /* check all positions */
-               for(k=0,ndeg=0;k<2;k++) {       /* two codons */
-                  for(i1=0; i1<4; i1++) {
-                     if(i1==b[k][pos]) continue;
-                     a = GeneticCode[com.icode][c[k]+(i1-b[k][pos])*by[pos]];
-                     if(a==aa[k]) break;
-                  }
-                  if(i1==4) ndeg++;
-               }
-               if(ndeg==2) {
-                  F[0][b[0][pos]*4+b[1][pos]] += .5*com.fpatt[h];
-                  F[0][b[1][pos]*4+b[0][pos]] += .5*com.fpatt[h];
-               }
-
-            }
-            /* find 4-fold degenerate sites at 3rd positions */
-            for(k=0,ndeg=0;k<2;k++) {       /* two codons */
-               for(j=0,i1=c[k]-b[k][2]; j<4; j++) 
-                  if(j!=b[k][2] && GeneticCode[com.icode][i1+j]!=aa[k]) break;
-               if(aa[0]==aa[1] && j==4) ndeg++;
-            }
-            if (ndeg<2) continue;
-            F[1][b[0][2]*4+b[1][2]] += .5*com.fpatt[h]; 
-            F[1][b[1][2]*4+b[0][2]] += .5*com.fpatt[h];
-         }  /* for (h) */
-         for(k=0; k<2; k++) {  /* two kinds of sites */
-            /*
-            if(noisy>3) printf("\n%s:\n",sitestr[k]);
-            */
-            S[k] = sum(F[k],16); 
-            if(S[k]<=0) { wk[k]=0; continue; }
-            for(j=0; j<16; j++) F[k][j]/=S[k];
-            P = (F[k][0*4+1]+F[k][2*4+3])*2;
-            Q = 1-(F[k][0*4+0]+F[k][1*4+1]+F[k][2*4+2]+F[k][3*4+3]) - P;
-            for(j=0; j<4; j++)
-               pi[j] = sum(F[k]+j*4,4);
-            DistanceF84(S[k], P,Q,pi, &ka[k], &t, NULL);
-            wk[k] = (ka[k]>0?S[k]:0);
-
-            /* matout(F0,F[k],4,4);  matout(F0,pi,1,4);  */
-            /*
-            if(noisy>3)
-               printf("\nSPQkt:%9.4f%9.5f%9.5f%9.4f%9.4f\n",S[k],P,Q,ka[k],t);
-            */
-         }
-         if(wk[0]+wk[1]==0) {
-            status = -1;
-            ka[0] = kdefault;
-            if(noisy>3) printf("\ngot no kappa! fix it at %.4f\n",ka[0]);
-         }
-         else
-             ka[0] = (ka[0]*wk[0]+ka[1]*wk[1])/(wk[0]+wk[1]);
-         com.kappa += ka[0]/(com.ns*(com.ns-1.)/2);
-      }  /* for(js) */
-   }     /* for(is) */
-
-   return (status);
-}
-
-
-int CountSites(char z[],double pi[],double*Stot,double*Ntot,double fbS[],double fbN[])
-{
-/* This calculates the total numbers of synonymous and nonsynonymous sites 
-   (Stot & Ntot) in the sequence z[] using com.kappa and pi[].
-   It also count the base frequencies at the synonymous and nonsynonymous 
-   sites.  Total number of sites is scaled to be equal to sequence length
-   even if some changes are to stop codons.  Since pi[] is scaled to sum 
-   to one, rates to stop codons are not considered.
-   The counting goes through the sequence codon by codon, and so is different 
-   from the counting in codeml, which uses pi[] to count the sites.
-*/
-   int h, j,k, c[2],aa[2], b[3], by[3]={16,4,1};
-   double r, S,N, kappa=com.kappa;
-
-   *Stot = *Ntot = 0;  
-   for(k=0; k<4; k++) 
-      fbS[k] = fbN[k] = 0;
-   for (h=0; h<com.npatt; h++)  {
-      c[0] = FROM61[z[h]];
-      b[0] = c[0]/16; b[1]=(c[0]%16)/4; b[2]=c[0]%4;
-      aa[0] = GeneticCode[com.icode][c[0]];
-      if (aa[0]==-1) 
-         error2("stop codon");
-      for (j=0,S=N=0; j<3; j++) {
-         for(k=0; k<4; k++) {    /* b[j] changes to k */
-            if (k==b[j]) continue;
-            c[1]  = c[0]+(k-b[j])*by[j];
-            aa[1] = GeneticCode[com.icode][c[1]];
-            if(aa[1] == -1) continue;
-            r = pi[FROM64[c[1]]];
-            if (k+b[j]==1 || k+b[j]==5)  r *= kappa; /* transition */
-            if (aa[0]==aa[1]) { S += r; fbS[b[j]] += r*com.fpatt[h]; }
-            else              { N += r; fbN[b[j]] += r*com.fpatt[h]; }
-         }
-      }
-      *Stot += com.fpatt[h]*S;
-      *Ntot += com.fpatt[h]*N;
-   }
-   r = 3*com.ls/(*Stot+*Ntot);  *Stot*=r;  *Ntot*=r;
-   r = sum(fbS,4);  for(k=0; k<4; k++) fbS[k] /= r;
-   r = sum(fbN,4);  for(k=0; k<4; k++) fbN[k] /= r;
-   return (0);
-}
-
-
-int GetPMatCodon(double P[],double t, double kappa, double omega, double space[])
-{
-/* Get PMat=exp(Q*t) for weighting pathways
-*/
-   int nterms=100, n=com.ncode, ic1, ic2, i,j,k, aa[2],ndiff,pos=0,from[3],to[3];
-   double *Q=P, *U=space+n*n, *V=U+n*n, *Root=V+n*n, mr, spacesqrt[NCODE];
-
-   for(i=0; i<n*n; i++) Q[i] = 0;
-   for (i=0; i<n; i++) {
-      ic1=FROM61[i]; from[0]=ic1/16; from[1]=(ic1/4)%4; from[2]=ic1%4;
-      for(j=0; j<i; j++) {  
-         ic2=FROM61[j];   to[0]=ic2/16;   to[1]=(ic2/4)%4;   to[2]=ic2%4;
-         aa[0] = GeneticCode[com.icode][ic1];
-         aa[1] = GeneticCode[com.icode][ic2];
-         if (aa[0]==-1 || aa[1]==-1)  continue;
-         for (k=0,ndiff=0; k<3; k++) 
-            if(from[k] != to[k]) { ndiff++; pos=k; }
-         if (ndiff!=1)  continue;
-         Q[i*n+j] = 1;
-         if ((from[pos]+to[pos]-1)*(from[pos]+to[pos]-5)==0)
-            Q[i*n+j] *= kappa;
-         if(aa[0] != aa[1])  Q[i*n+j] *= omega;
-         Q[j*n+i] = Q[i*n+j];
-      }
-   }
-
-   for(i=0; i<n; i++) for(j=0; j<n; j++)
-      Q[i*n+j] *= com.pi[j];
-
-   for (i=0,mr=0; i<n; i++) { 
-      Q[i*n+i] = -sum(Q+i*n,n); 
-      mr -= com.pi[i]*Q[i*n+i]; 
-   }
-
-   eigenQREV(Q, com.pi, n, Root, U, V, spacesqrt);
-   for(i=0; i<n; i++) Root[i] /= mr;
-   PMatUVRoot(P, t, n, U, V, Root);
-   /*
-   testTransP(PMat, n);
-   fprintf(frub,"\a\nP(%.5f)\n", t);
-   for(i=0; i<n; i++,FPN(frub)) for(j=0; j<n; j++)
-   fprintf(frub, " %9.5g", PMat[i*n+j]);
-   fflush(frub);
-   */
-   return (0);
-}
-
-
-
-int CountDiffs(char z1[],char z2[], double*Sdts,double*Sdtv,double*Ndts,double*Ndtv,double PMat[])
-{
-/* Count the numbers of synonymous and nonsynonymous differences between 
-   sequences z1 and z2, weighting pathways with PMat. No weighting if PMat=NULL
-   Modified from difcodon()
-   dmark[i] (=0,1,2) is the i_th different codon position (i=0,1,ndiff).
-   step[j] (=0,1,2) is the codon position to be changed at step j (j=0,1,ndiff).
-   b[i][j] (=0,1,2,3) is the nucleotide at position j (0,1,2) in codon i (0,1)
-   sts,stv,nts,ntv are syn ts & tv and nonsyn ts & tv at a codon site.
-   stspath[k] stvpath[k] ntspath[k] ntvpath[k] are syn ts & tv and 
-   nonsyn ts & tv differences on path k (k=2,6).
-*/
-   char str[4]="   ";
-   int n=com.ncode, h,i1,i2,i,k, transi, c[2],ct[2],aa[2], by[3]={16,4,1};
-   int dmark[3], step[3], b[2][3], bt1[3], bt2[3];
-   int ndiff, npath, nstop, stspath[6],stvpath[6],ntspath[6],ntvpath[6];
-   double sts,stv,nts,ntv; /* syn ts & tv, nonsyn ts & tv for 2 codons */
-   double ppath[6], sump,p;
-
-   *Sdts = *Sdtv = *Ndts = *Ndtv = 0;
-   for (h=0; h<com.npatt; h++)  {
-      c[0] = FROM61[z1[h]];
-      c[1] = FROM61[z2[h]];
-      if (c[0]==c[1]) continue;
-      for(i=0; i<2; i++) {
-         b[i][0]=c[i]/16; b[i][1]=(c[i]%16)/4; b[i][2]=c[i]%4;
-         aa[i] = GeneticCode[com.icode][c[i]];
-      }
-      if (aa[0]==-1 || aa[1]==-1)
-         error2("stop codon in sequence.");
-      ndiff=0;  sts=stv=nts=ntv=0;
-      for(k=0; k<3; k++) dmark[k] = -1;
-      for(k=0; k<3; k++) if(b[0][k] != b[1][k]) dmark[ndiff++] = k;
-      npath=1;
-      if(ndiff>1) npath = (ndiff==2 ? 2 : 6);
-      if (ndiff==1) {
-         transi = b[0][dmark[0]]+b[1][dmark[0]];
-         transi = (transi==1 || transi==5);
-         if (aa[0]==aa[1])  { if (transi) sts++; else stv++; }
-         else               { if (transi) nts++; else ntv++; }
-      }
-      else {   /* ndiff=2 or 3 */
-         if(debug==DIFF) {
-            printf("\n\nh=%d %s (%c) .. ", h+1,getcodon(str,c[0]),AAs[aa[0]]);
-            printf("%s (%c): ", getcodon(str,c[1]), AAs[aa[1]]);
-         }
-         nstop=0;
-         for(k=0; k<npath; k++) {
-            if(debug==DIFF) printf("\npath %d: ", k+1);
-
-            for(i1=0; i1<3; i1++)  step[i1] = -1;
-            if (ndiff==2) {
-               step[0] = dmark[k];
-               step[1] = dmark[1-k];
-            }
-            else {
-               step[0] = k/2;
-               step[1] = k%2;
-               if (step[0]<=step[1]) step[1]++;
-               step[2] = 3-step[0]-step[1];
-            }
-            for(i1=0; i1<3; i1++) bt1[i1] = bt2[i1]=b[0][i1];
-            stspath[k] = stvpath[k] = ntspath[k] = ntvpath[k] = 0;  
-            /* mutations along each path */
-            for (i1=0,ppath[k]=1; i1<ndiff; i1++) { 
-               bt2[step[i1]] = b[1][step[i1]];
-               for (i2=0,ct[0]=ct[1]=0; i2<3; i2++) {
-                  ct[0] += bt1[i2]*by[i2];
-                  ct[1] += bt2[i2]*by[i2];
-               }
-               ppath[k] *= PMat[ FROM64[ct[0]]*n + FROM64[ct[1]] ];
-               for(i2=0; i2<2; i2++) aa[i2] = GeneticCode[com.icode][ct[i2]];
-
-               if(debug==DIFF) printf("%s (%c) %.5f: ", getcodon(str,ct[1]),AAs[aa[1]],PMat[ct[0]*n+ct[1]]);
-
-               if (aa[1]==-1) {
-                  nstop++;  ppath[k]=0; break;
-               }
-               transi = b[0][step[i1]]+b[1][step[i1]];
-               transi = (transi==1 || transi==5);  /* transition? */
-
-               if(aa[0]==aa[1]) { if(transi) stspath[k]++; else stvpath[k]++; }
-               else             { if(transi) ntspath[k]++; else ntvpath[k]++; }
-               for(i2=0; i2<3; i2++) bt1[i2] = bt2[i2];
-            }
-
-            if(debug==DIFF) printf("  p =%.9f", ppath[k]);
-
-         }  /* for(k,npath) */
-         if (npath==nstop) {  /* all paths through stop codons */
-            puts ("all paths through stop codons..");
-            if (ndiff==2) { nts=.5; ntv=1.5; }
-            else          { nts=.5; ntv=2.5; }
-         }
-         else {
-            sump = sum(ppath,npath);
-            if(sump<1e-20) { 
-               printf("\nsump=0, npath=%4d\nh=%2d ", npath, h+1);
-               printf("(%s ", getcodon(str,c[0]));
-               printf("%s)", getcodon(str,c[1]));
-               for(k=0; k<npath; k++) printf(" %9.6g", ppath[k]); FPN(F0);
-               matout(frub, PMat, n, n); 
-               exit(-1);
-
-               /* 
-               sump=1; FOR(k,npath) if(ppath[k]) ppath[k]=1./(npath-nstop); 
-               */
-            }
-            for(k=0; k<npath; k++) { 
-               p = ppath[k]/sump;
-               sts += stspath[k]*p;
-               stv += stvpath[k]*p;  
-               nts += ntspath[k]*p; 
-               ntv += ntvpath[k]*p;
-            }
-
-            if(debug==DIFF) {
-               for(k=0; k<npath; k++) printf("\n p =%.5f", ppath[k]/sump);  FPN(F0);
-               printf(" syn ts & tv, nonsyn ts & tv:%9.5f%9.5f%9.5f%9.5f\n",sts,stv,nts,ntv);
-            }
-         }
-
-         if(debug==DIFF) getchar();
-
-      }     /* if (ndiff) */
-      *Sdts += com.fpatt[h]*sts;
-      *Sdtv += com.fpatt[h]*stv;
-      *Ndts += com.fpatt[h]*nts;
-      *Ndtv += com.fpatt[h]*ntv;
-   }  /* for (h) */
-   return (0);
-}
-
-
-int DistanceF84(double n, double P, double Q, double pi[],
-    double*k_HKY, double*t, double*SEt)
-{
-/* This calculates kappa and d from P (proportion of transitions) & Q 
-   (proportion of transversions) & pi under F84.
-   When F84 fails, we try to use K80.  When K80 fails, we try
-   to use JC69.  When JC69 fails, we set distance t to maxt.
-   Variance formula under F84 is from Tateno et al. (1994), and briefly 
-   checked against simulated data sets.
-*/
-   int failF84=0,failK80=0,failJC69=0;
-   double tc,ag, Y,R, a=0,b=0, A=-1,B=-1,C=-1, k_F84;
-   double Qsmall=min2(1e-10,0.1/n), maxkappa=999,maxt=99;
-
-   *k_HKY=-1;
-   Y=pi[0]+pi[1];  R=pi[2]+pi[3];  tc=pi[0]*pi[1];  ag=pi[2]*pi[3];
-   if (P+Q>1) { *t=maxt; *k_HKY=1; return(3); }
-   if (P<-1e-10 || Q<-1e-10 || fabs(Y+R-1)>1e-8) {
-      printf("\nPQYR & pi[]: %9.5f%9.5f%9.5f%9.5f",P,Q,Y,R);
-      matout(F0,pi,1,4);
-      error2("DistanceF84: input err.");
-   }
-   if(Q<Qsmall)  failF84=failK80=1;
-   else if(Y<=0 || R<=0 || (tc<=0 && ag<=0)) failF84=1;
-   else {
-      A=tc/Y+ag/R; B=tc+ag; C=Y*R;
-      a=(2*B+2*(tc*R/Y+ag*Y/R)*(1-Q/(2*C)) - P) / (2*A);
-      b=1-Q/(2*C);
-      if (a<=0 || b<=0) failF84=1;
-   }
-   if (!failF84) {
-      a=-.5*log(a); b=-.5*log(b);
-      if(b<=0) failF84=1;
-      else {
-         k_F84 = a/b-1;
-         *t = 4*b*(tc*(1+ k_F84/Y) + ag*(1+ k_F84/R)+C);
-         *k_HKY = (B + (tc/Y+ag/R)* k_F84)/B; /* k_F84=>k_HKY85 */
-         if(SEt) {
-            a = A*C/(A*C-C*P/2-(A-B)*Q/2);
-            b = A*(A-B)/(A*C-C*P/2-(A-B)*Q/2) - (A-B-C)/(C-Q/2);
-            *SEt = sqrt((a*a*P+b*b*Q-square(a*P+b*Q))/n);
-         }
-      }
-   }
-   if(failF84 && !failK80) {  /* try K80 */
-      if (noisy>=9) printf("\na=%.5f  b=%.5f, use K80\n", a,b);
-      a=1-2*P-Q;  b=1-2*Q;
-      if (a<=0 || b<=0) failK80=1;
-      else {
-         a=-log(a); b=-log(b);
-         if(b<=0)  failK80=1;
-         else {
-            *k_HKY=(.5*a-.25*b)/(.25*b);
-            *t = .5*a+.25*b;
-         }
-         if(SEt) {
-            a=1/(1-2*P-Q); b=(a+1/(1-2*Q))/2;
-            *SEt = sqrt((a*a*P+b*b*Q-square(a*P+b*Q))/n);
-         }
-      }
-   }
-   if(failK80) {
-      if((P+=Q)>=.75) { failJC69=1; P=.75*(n-1.)/n; }
-      *t = -.75*log(1-P*4/3.); 
-      if(*t>maxt) *t=maxt;
-      if(SEt) {
-         *SEt = sqrt(9*P*(1-P)/n) / (3-4*P);
-      }
-   }
-   if(*k_HKY>maxkappa) *k_HKY=maxkappa;
-
-   return(failF84 + failK80 + failJC69);
-}
-
-
-
-#if 0
-
-double dsdnREV (int is, int js, double space[])
-{
-/* This calculates ds and dn by recovering the Q*t matrix using the equation
-      F(t) = PI * P(t) = PI * exp(Q*t)
-   This is found not to work well and is not published.
-   space[64*64*5]
-   The code here is broken since I changed the coding.  Codons are now coded 0, 1, ..., 60. 
-*/
-   int n=com.ncode, i,j, h;
-   double *F=PMat, *Qt=F;
-   double *Root=space+n*n,*pi=Root+n, *U=pi+n,*V=U+n*n;
-   double *T1=V+n*n,*T2=T1+n*n, t, small=1e-6;
-   
-   fprintf(frst,"\npi in model\n");
-   matout(frst,com.pi,1,n);
-   FOR(i,n*n) F[i]=0;
-   FOR (h,com.npatt) {
-      F[com.z[is][h]*n+com.z[js][h]]+=com.fpatt[h]/(2*com.ls);
-      F[com.z[js][h]*n+com.z[is][h]]+=com.fpatt[h]/(2*com.ls);
-   }
-   if(fabs(1-sum(F,n*n))>1e-6) error2("Sum F != 1 in dsdnREV");
-
-   FOR (i,n) {
-      pi[i]=sum(F+i*n, n);  
-/*
-      if (F[i*n+i]<=small || F[i*n+i]<pi[i]/4)
-*/
-      if (F[i*n+i]<=small)  F[i*n+i]=1-pi[i]+F[i*n+i];
-      else                  abyx(1/pi[i], F+i*n, n); 
-   }
-   if (eigen (1, F, n, Root, T1, U, V, T2)) error2 ("eigen jgl");
-   xtoy (U, V, n*n);
-   matinv (V, n, n, T1);
-
-fprintf(frst,"\npi in data\n");
-matout (frst, pi, 1, n);   FPN(F0);
-matout (frst, Root, 1, n);
-
-   FOR (i,n) {
-      if (Root[i]<=0) 
-         printf ("  Root %d:%10.4f", i+1, Root[i]); 
-      Root[i]=log(Root[i]);
-   }
-   FOR (i,n) FOR (j,n) T1[i*n+j]=U[i*n+j]*Root[j];
-   matby (T1, V, Qt, n, n, n);
-   for (i=0,t=0; i<n; i++) t-=pi[i]*Qt[i*n+i];
-   if (t<=0) puts ("err: dsdnREV");
-
-   FOR(i,n*n) Qt[i]+=1e-8;  /* remove negative numbers from rounding errors */
-
-   matout(frst,Qt,n,n);
-printf("\nt = %.5f\n", t);
-
-   return (0);
-}
-
-
-#endif
+/* yn00.c
+   Pairwise estimation of dS and dN by the method of Yang & Nielsen 
+   (2000 Mol. Biol. Evol. 17:32-43)
+
+   Copyright, 1998, Ziheng Yang
+
+                 cc -o yn00 -fast yn00.c tools.o -lm
+                 cl -O2 yn00.c tools.o
+                 yn00 <SequenceFileName>
+
+  Codon sequences are encoded as 0,1,...,61, as in codeml.c.
+*/
+#include "paml.h"
+#define NS            1000
+#define LSPNAME       30
+#define NCODE         64
+#define NGENE         2000
+
+int GetOptions (char *ctlf);
+int EncodeSeqCodon(void);
+int Statistics(FILE *fout, double space[]);
+int DistanceMatLWL85 (FILE *fout);
+int DistanceYN00(int is, int js, double*S, double*N, double*dS,double*dN,
+    double *SEdS, double *SEdN, double *t,double space[]);
+int GetKappa (void);
+int GetFreqs(int is1, int is2, double f3x4[], double pi[]);
+int CountSites(char z[],double pi[],double*Stot,double*Ntot,
+    double fbS[],double fbN[]);
+int GetPMatCodon(double P[],double t, double kappa, double omega, double space[]);
+int CountDiffs(char z1[],char z2[], 
+               double*Sdts,double*Sdtv,double*Ndts,double*Ndtv,double PMat[]);
+int DistanceF84(double n, double P, double Q, double pi[],
+		          double*k_HKY, double*t, double*SEt);
+double dsdnREV (int is, int js, double space[]);
+
+int ExpPattFreq(double t,double kappa,double omega,double pi[],double space[]);
+int ConsistencyMC(void);
+int InfiniteData(double t,double kappa,double omega,double f3x4_0[],
+    double space[]);
+void SimulateData2s64(FILE* fout, double f3x4_0[], double space[]);
+
+struct common_info {
+   unsigned char *z[NS];
+   char *spname[NS], seqf[512],outf[512];
+   int ns,ls,npatt,codonf,icode,ncode,getSE,*pose,verbose, seqtype, readpattern;
+   int cleandata, fcommon,kcommon, weighting, ndata, print;
+   double *fpatt, pi[NCODE], f3x4s[NS][12], kappa, omega;
+   int ngene,posG[NGENE+1],lgene[NGENE],fix_rgene, model;
+   double rgene[NGENE],piG[NGENE][NCODE], alpha;
+}  com;
+
+
+int FROM61[64], FROM64[64], FourFold[4][4];
+double PMat[NCODE*NCODE];
+char *codonfreqs[]={"Fequal", "F1x4", "F3x4", "Fcodon"};
+enum {Fequal, F1x4, F3x4, Fcodon} CodonFreqs;
+
+FILE *frst, *frst1, *frub;
+extern char BASEs[], AAs[];
+extern int noisy, GeneticCode[][64];
+int Nsensecodon;
+enum {NODEBUG, KAPPA, SITES, DIFF} DebugFunctions;
+int debug=0;
+
+double omega_NG, dN_NG, dS_NG;  /* what are these for? */
+
+
+#define YN00
+#define REALSEQUENCE
+#include "treesub.c"
+
+
+int main(int argc, char *argv[])
+{
+   char dsf[512]="2YN.dS", dnf[512]="2YN.dN", tf[512]="2YN.t";
+   FILE *fout, *fseq, *fds, *fdn, *ft;
+   char ctlf[96]="yn00.ctl", timestr[64];
+   int    n=com.ncode, is,js, j, idata, wname=20, sspace;
+   double t=0.4, dS=0.1,dN=0.1, S,N, SEdS, SEdN, f3x4[12], *space=NULL;
+
+   /* ConsistencyMC(); */
+
+   printf("YN00 in %s\n", pamlVerStr);
+   starttimer();
+   if (argc>1)  strcpy(ctlf, argv[1]); 
+   com.seqtype=1;  com.cleandata=1;  /* works for clean data only? */
+   com.ndata=1;  com.print=0;
+   noisy=1; com.icode=0;  com.fcommon=0;  com.kcommon=1;
+   GetOptions(ctlf);
+   setmark_61_64 ();
+   fout = fopen (com.outf, "w"); 
+   frst = fopen("rst", "w");
+   frst1 = fopen("rst1", "w"); 
+   frub = fopen ("rub", "w");
+   if (fout==NULL || frst==NULL) error2("outfile creation err.");
+   fds = (FILE*)fopen(dsf, "w");
+   fdn = (FILE*)fopen(dnf, "w");
+   ft = (FILE*)fopen(tf, "w"); 
+   if(fds==NULL || fdn==NULL || ft==NULL) error2("file open error");
+
+   if((fseq=fopen (com.seqf,"r"))==NULL) {
+      printf ("\n\nSequence file %s not found!\n", com.seqf);
+      exit(-1);
+   }
+   for (idata=0; idata<com.ndata; idata++) {
+      if (com.ndata>1) {
+         printf("\nData set %d\n", idata+1);
+         fprintf(fout, "\n\nData set %d\n", idata+1);
+         fprintf(frst, "\t%d", idata+1);
+      }
+
+      ReadSeq((com.verbose?fout:NULL), fseq, com.cleandata, 0);
+      SetMapAmbiguity();
+
+      sspace = max2(200000,64*com.ns*sizeof(double));
+      sspace = max2(sspace,64*64*5*sizeof(double));
+      if ((space=(double*)realloc(space,sspace))==NULL) error2("oom space");
+
+      com.kappa = 4.6;
+      com.omega = 1;
+      fprintf(fout,"YN00 %15s", com.seqf);
+      Statistics(fout, space);
+
+      if(noisy) printf("\n\n(A) Nei-Gojobori (1986) method\n");
+      fprintf(fout,"\n\n(A) Nei-Gojobori (1986) method\n");
+      DistanceMatNG86 (fout, NULL, NULL, NULL, 0);
+      fflush(fout);
+
+      if(noisy) printf("\n\n(B) Yang & Nielsen (2000) method\n\n");
+      fprintf(fout,"\n\n(B) Yang & Nielsen (2000) method\n\n");
+      fprintf(fout,"Yang Z, Nielsen R (2000) Estimating synonymous and nonsynonymous substitution rates under realistic evolutionary models. Mol. Biol. Evol. 17:32-43\n");
+      if(!com.weighting) fputs("\n(equal weighting of pathways)\n",fout);
+
+      if(com.fcommon)  GetFreqs(-1, -1, f3x4, com.pi);
+      if(com.kcommon) {
+         GetKappa();
+         printf("kappa = %.2f\n\n",com.kappa);
+         /* puts("kappa?"); scanf("%lf", &com.kappa); */
+      }
+
+      fputs("\nseq. seq.     S       N        t   kappa   omega     dN +- SE    dS +- SE\n\n",fout);
+      fprintf(fds,"%6d\n", com.ns);
+      fprintf(fdn,"%6d\n", com.ns);
+      fprintf(ft,"%6d\n", com.ns);
+      for(is=0; is<com.ns; is++) {
+         fprintf(fds,"%-*s ", wname,com.spname[is]);
+         fprintf(fdn,"%-*s ", wname,com.spname[is]);
+         fprintf(ft,"%-*s ", wname,com.spname[is]);
+         for(js=0; js<is; js++) {
+            if(noisy) printf("%3d vs. %3d\n", is+1, js+1);
+            fprintf(fout, " %3d  %3d ", is+1, js+1);
+
+            if(!com.fcommon) GetFreqs(is, js, f3x4, com.pi);
+            if(!com.kcommon) GetKappa();
+            j = DistanceYN00(is, js, &S, &N, &dS,&dN, &SEdS, &SEdN, &t,space);
+
+            fprintf(fout,"%7.1f %7.1f %8.4f %7.4f %7.4f %6.4f +- %6.4f %7.4f +- %6.4f\n",
+               S,N,t,com.kappa,com.omega,dN,SEdN,dS,SEdS);
+            fprintf(frst," YN: %8.4f%8.4f%8.4f %6.4f +- %6.4f %7.4f +- %6.4f\n",
+               t,com.kappa,com.omega,dN,SEdN,dS,SEdS);
+
+            fprintf(fds," %7.4f",dS); fprintf(fdn," %7.4f",dN); fprintf(ft," %7.4f",t);
+         }    /* for (js) */
+         FPN(fds); FPN(fdn); FPN(ft);    
+         fflush(fds); fflush(fdn); fflush(ft);
+      }       /* for (is) */
+      FPN(fds); FPN(fdn); FPN(ft);
+
+      if(noisy) printf("\n\n(C) LWL85, LPB93 & LWLm methods\n\n");
+      fprintf(fout,"\n\n(C) LWL85, LPB93 & LWLm methods\n\n");
+      fprintf(fout,"Li W.-H., C.-I. Wu, Luo (1985) A new method for estimating synonymous and nonsynonymous rates of nucleotide substitutions considering the relative likelihood of nucleotide and codon changes. Mol. Biol. Evol. 2: 150-174.\n");
+      fprintf(fout,"Li W-H (1993) Unbiased estimation of the rates of synonymous and nonsynonymous substitution. J. Mol. Evol. 36:96-99\n");
+      fprintf(fout,"Pamilo P, Bianchi NO (1993) Evolution of the Zfx and Zfy genes - rates and interdependence between the genes. Mol. Biol. Evol. 10:271-281\n");
+      fprintf(fout,"Yang Z (2006) Computational Molecular Evolution. Oxford University Press, Oxford. Eqs. 2.12 & 2.13\n");
+
+      DistanceMatLWL85(fout);
+
+      fflush(frst);
+      if(noisy) printf("\nTime used: %s\n", printtime(timestr));
+   }
+   return (0);
+}
+
+
+
+int GetOptions (char *ctlf)
+{
+   int i, nopt=9, lline=4096;
+   char line[4096], *pline, opt[20], comment='*';
+   char *optstr[]={"seqfile","outfile", "verbose", "noisy", "icode", 
+        "weighting","commonkappa", "commonf3x4", "ndata"};
+   double t;
+   FILE *fctl;
+
+   if((fctl=fopen(ctlf,"r"))==NULL) error2("\nctl file open error.\n");
+   printf ("\nReading options from %s..\n", ctlf);
+   for (;;) {
+      if (fgets (line, lline, fctl) == NULL) break;
+      for (i=0,t=0,pline=line; i<lline&&line[i]; i++)
+         if (isalnum(line[i]))  { t=1; break; }
+         else if (line[i]==comment) break;
+      if (t==0) continue;
+      sscanf (line, "%s%*s%lf", opt, &t);
+      if ((pline=strstr(line, "="))==NULL) error2("option file.");
+
+      for (i=0; i<nopt; i++) {
+         if (strncmp(opt, optstr[i], 8)==0)  {
+            if (noisy>2)
+               printf ("\n%3d %15s | %-20s %6.2f", i+1,optstr[i],opt,t);
+            switch (i) {
+               case (0): sscanf(pline+2, "%s", com.seqf);    break;
+               case (1): sscanf(pline+2, "%s", com.outf);    break;
+               case (2): com.verbose=(int)t;     break;
+               case (3): noisy=(int)t;           break;
+               case (4): com.icode=(int)t;       break;
+               case (5): com.weighting=(int)t;   break;
+               case (6): com.kcommon=(int)t;     break;
+               case (7): com.fcommon=(int)t;     break;
+               case (8): com.ndata=(int)t;       break;
+            }
+            break;
+         }
+      }
+      if (i==nopt)
+         { printf ("\noption %s in %s\n", opt, ctlf);  exit (-1); }
+   }
+
+   for (i=0,Nsensecodon=0; i<64; i++)
+      if (GeneticCode[com.icode][i]!=-1) Nsensecodon++;
+   com.ncode = Nsensecodon;
+   fclose (fctl);
+   FPN(F0);
+   return (0);
+}
+
+int DistanceYN00(int is, int js, double*S, double*N, double*dS,double*dN,
+    double *SEdS, double *SEdN, double *t,double space[])
+{
+/* calculates dS, dN, w, t by weighting.
+   com.kappa & com.pi[] are calculated beforehand are not updated.
+*/
+   int j,k,ir,nround=10, status=0;
+   double fbS[4],fbN[4],fbSt[4],fbNt[4], St,Nt, Sdts,Sdtv,Ndts,Ndtv, kappaS,kappaN;
+   double w0=0,dS0=0,dN0=0, accu=5e-4, minomega=1e-5,maxomega=99;
+
+   if(*t==0) *t=.5;  
+   if(com.omega<=0) com.omega=1;
+   for(k=0; k<4; k++) fbS[k] = fbN[k] = 0;
+   if(debug) printf("\nCountSites\n");
+   if(noisy>3) printf("\n");
+   for(k=0,*S=*N=0; k<2; k++) {
+      CountSites(com.z[k==0?is:js], com.pi, &St, &Nt, fbSt, fbNt);
+      *S += St/2;
+      *N += Nt/2;
+      for(j=0; j<4; j++) {
+         fbS[j] += fbSt[j]/2;
+         fbN[j] += fbNt[j]/2;
+      }
+      if(noisy>3) printf("Seq. %d: S = %9.3f N=%9.3f\n",k+1,St,Nt);
+   }
+   if(noisy>3) {
+      printf("Ave.  : S = %9.3f N=%9.3f\n\n",*S,*N);
+      printf("Base freqs at syn & nonsyn sites\n%10s%10s%10s%10s\n", "T", "C", "A", "G");
+      for(k=0; k<4; k++) printf(" %9.6f", fbS[k]);  FPN(F0);
+      for(k=0; k<4; k++) printf(" %9.6f", fbN[k]);  FPN(F0);
+   }
+   if(noisy>3) 
+      printf(" #    Sdts   Sdtv   Ndts   Ndtv |       t   kappa       w      dN      dS |   kappaS  kappaN\n");
+
+   /* initial values? */
+   if(com.weighting) { 
+      if(*t<0.001 || *t>5) *t=0.5; 
+      if(com.omega<0.01 || com.omega>5) com.omega=.5;
+   }
+   for (ir=0; ir<(com.weighting?nround:1); ir++) {   /* weighting or iteration */
+      if(com.weighting)
+         GetPMatCodon(PMat,*t,com.kappa,com.omega,space);
+      else
+         for(j=0; j<com.ncode*com.ncode; j++) 
+            PMat[j] = 1;
+
+      CountDiffs(com.z[is], com.z[js], &Sdts, &Sdtv, &Ndts, &Ndtv, PMat);
+
+      if(DistanceF84(*S, Sdts/ *S, Sdtv/ *S, fbS, &kappaS, dS, SEdS)) status=-1;
+      if(DistanceF84(*N, Ndts/ *N, Ndtv/ *N, fbN, &kappaN, dN, SEdN)) status=-1;
+
+      if(*dS<1e-9) { 
+         status = -1;
+         com.omega = maxomega;
+      }
+      else
+         com.omega= max2(minomega, *dN/ *dS);
+      *t = *dS * 3 * *S/(*S + *N) + *dN * 3 * *N/(*S + *N);
+      if(noisy>3) {
+         printf("%2d %7.2f%7.2f%7.2f%7.2f |", ir+1, Sdts,Sdtv,Ndts,Ndtv);
+         printf("%8.4f%8.4f%8.4f%8.4f%8.4f", *t, com.kappa,com.omega,*dN,*dS);
+         printf(" | %8.4f%8.4f\n", kappaS,kappaN);
+      }
+      if(fabs(*dS-dS0)<accu && fabs(*dN-dN0)<accu && fabs(com.omega-w0)<accu)
+         break;
+      dS0=*dS;  dN0=*dN;  w0=com.omega;
+   } /* for (ir) */
+   if(ir==nround) status=-2;
+   /* if(status) printf("\n\tstatus: %d\n", status); */
+   return(status);
+}
+
+
+
+int Statistics(FILE *fout, double space[])
+{
+/* This calculates base frequencies, using npatt & fpatt[]
+*/
+   int h, is,j, c[3], wname=20;
+   double f3x4tot[12], *fb3tot=com.pi, *fb3s=space;
+
+   if(fout) {
+      fprintf(fout, "\n\nns =%4d\tls =%4d", com.ns, com.ls);
+      fprintf(fout,"\n\nCodon position x base (3x4) table for each sequence.");
+   }
+   zero(f3x4tot,12);  zero(fb3s,64*com.ns);
+   for(is=0; is<com.ns; is++)  zero(com.f3x4s[is], 12);
+   for (is=0; is<com.ns; is++) {
+      for (h=0; h<com.npatt; h++) {
+         j = FROM61[com.z[is][h]];
+         c[0]=j/16; c[1]=(j%16)/4; c[2]=j%4;
+         fb3s[is*64+j] += com.fpatt[h];
+         for(j=0; j<3; j++)
+            com.f3x4s[is][j*4+c[j]] += com.fpatt[h]/com.ls;
+      }
+      for(j=0; j<12; j++) f3x4tot[j] += com.f3x4s[is][j]/com.ns;
+      if(fout) { 
+         fprintf(fout,"\n\n%-*s", wname, com.spname[is]);
+         for(j=0; j<3; j++) {
+            fprintf (fout, "\nposition %2d:", j+1);
+            for(h=0; h<4; h++)
+               fprintf (fout,"%5c:%7.5f", BASEs[h], com.f3x4s[is][j*4+h]);
+         }
+      }
+   }
+   if(fout) {
+      fprintf (fout, "\n\nAverage");
+      for(j=0; j<3; j++) {
+         fprintf (fout, "\nposition %2d:", j+1);
+         for(h=0; h<4; h++)
+            fprintf (fout,"%5c:%7.5f", BASEs[h], f3x4tot[j*4+h]);
+      }
+      for(is=0,zero(fb3tot,64);is<com.ns;is++) 
+         for(j=0; j<64; j++) fb3tot[j] += fb3s[is*64+j];
+      fprintf (fout, "\n\nCodon usage for each species\n");
+      printcums (fout, com.ns, fb3s, com.icode);
+      fprintf (fout, "\nSums\n");
+      printcums (fout, 1, fb3tot, com.icode);
+   }
+
+   return(0);
+}
+
+int GetFreqs(int is1, int is2, double f3x4[], double pi[])
+{
+/* uses com.fcommon and com.f3x4s to calculate f3x4[] and pi[].
+   Codon frequencies pi[] are calculated from the f3x4 table.
+   The calculation is duplicated when com.fcommon=1.
+*/
+   int n=com.ncode, j, k, ic, b[3];
+
+   if (com.fcommon)
+      for(j=0,zero(f3x4,12);j<com.ns;j++)
+         for(k=0; k<12; k++) f3x4[k]+=com.f3x4s[j][k]/com.ns;
+   else 
+      for(k=0; k<12; k++)
+         f3x4[k] = (com.f3x4s[is1][k]+com.f3x4s[is2][k])/2;
+
+   if (noisy>=9)
+      matout(F0, f3x4, 3, 4);
+   for(j=0; j<n; j++) {
+      ic=FROM61[j]; b[0]=ic/16; b[1]=(ic%16)/4; b[2]=ic%4;
+      pi[j] = f3x4[b[0]] * f3x4[4+b[1]] * f3x4[8+b[2]];
+   }
+   abyx(1/sum(pi,n), pi, n);
+
+   return (0);
+}
+
+
+int DistanceMatLWL85 (FILE *fout)
+{
+/* This implements 3 methods: LWL85 (Li, Wu & Luo 1985), LPB (Li 1993, 
+   Pamilo & Bianchi 1993), and LWL85m (equation 12 in book; check other refs).
+   alpha is not used.
+*/
+   int i,j,k, h, wname=15;
+   char *codon1, *codon2, str[4]="   ";
+   double L[3], sdiff[3], vdiff[3], Lt[3], sdifft[3], vdifft[3], A[3],B[3];
+   double P[3],Q[3], a,b, dS,dN, pS2, S,N, Sd,Nd;
+
+   for(i=0; i<com.ns; i++) {
+      for(j=0; j<i; j++) {  /* pair i and j */
+         for(k=0; k<3; k++) L[k] = sdiff[k] = vdiff[k] = 0;
+
+         for (h=0; h<com.npatt; h++)  {
+            codon1 = CODONs[com.z[i][h]];
+            codon2 = CODONs[com.z[j][h]];
+            difcodonLWL85(codon1, codon2, Lt, sdifft, vdifft, 0, com.icode);
+            for(k=0; k<3; k++) {
+               L[k]     += Lt[k]*com.fpatt[h];
+               sdiff[k] += sdifft[k]*com.fpatt[h];
+               vdiff[k] += vdifft[k]*com.fpatt[h];
+            }
+         }
+
+         for(k=0; k<3; k++) { 
+            P[k] = sdiff[k]/L[k];
+            Q[k] = vdiff[k]/L[k]; 
+            a = 1 - 2*P[k] - Q[k];
+            b = 1 - 2*Q[k];
+            A[k] = -log(a)/2 + log(b)/4;
+            B[k] = -log(b)/2;
+         }
+         if(fout) {
+            fprintf(fout, "\n%d (%s) vs. %d (%s)\n\n", i+1, com.spname[i], j+1, com.spname[j]);
+            fprintf(fout,"L(i):  %9.1f %9.1f %9.1f  sum=%9.1f\n", L[0],L[1],L[2],L[0]+L[1]+L[2]);
+            fprintf(fout,"Ns(i): %9.4f %9.4f %9.4f  sum=%9.4f\n", sdiff[0],sdiff[1],sdiff[2], sdiff[0]+sdiff[1]+sdiff[2]);
+            fprintf(fout,"Nv(i): %9.4f %9.4f %9.4f  sum=%9.4f\n", vdiff[0],vdiff[1],vdiff[2], vdiff[0]+vdiff[1]+vdiff[2]);
+            fprintf(fout,"A(i):  %9.4f %9.4f %9.4f\n", A[0],A[1],A[2]);
+            fprintf(fout,"B(i):  %9.4f %9.4f %9.4f\n", B[0],B[1],B[2]);
+
+            Sd = L[1]*A[1] + L[2]*(A[2]+B[2]);
+            Nd = L[1]*B[1] + L[0]*(A[0]+B[0]);
+            pS2 = 1/3.;
+            S = L[1]*pS2 + L[2];
+            N = L[1]*(1-pS2) + L[0];
+            dS = Sd/S;
+            dN = Nd/N;
+            fprintf(fout,"LWL85:  dS = %7.4f dN = %7.4f w =%7.4f S =%7.1f N =%7.1f\n", dS,dN, dN/dS, S, N);
+            pS2 = A[2]/(A[2]+B[2]);
+            S = L[1]*pS2 + L[2];
+            N = L[1]*(1-pS2) + L[0];
+            dS = Sd/S;
+            dN = Nd/N;
+            fprintf(fout,"LWL85m: dS = %7.4f dN = %7.4f w =%7.4f S =%7.1f N =%7.1f (rho = %.3f)\n", dS,dN, dN/dS, S, N, pS2);
+
+            dS = (L[1]*A[1]+L[2]*A[2])/(L[1]+L[2]) + B[2];
+            dN = (L[0]*B[0]+L[1]*B[1])/(L[0]+L[1]) + A[0];
+            fprintf(fout,"LPB93:  dS = %7.4f dN = %7.4f w =%7.4f\n", dS, dN, dN/dS);
+         }
+      }
+      if(noisy)  printf(" %3d",i+1);
+   }
+   if(noisy)  FPN(F0);
+   if(fout) FPN(fout);
+   return (0);
+}
+
+
+
+int GetKappa(void)
+{
+/* This calculates mutational transition/transversion rate ratio kappa 
+   using 4-fold degenerate sites from pairwise comparisons 
+   under HKY85, weighting estimates by the numbers of sites
+*/
+   int is,js,j,k,h, i1,pos,c[2],aa[2],b[2][3],a,ndeg,by[3]={16,4,1}, status=0;
+   double ka[2], F[2][16],S[2],wk[2], t,P,Q,pi[4];
+                 /* F&S&wk [0]: non-degenerate; [1]:4-fold;  S:sites */
+   double kdefault=(com.kappa>0?com.kappa:(com.icode==1?10:2));
+   char str1[4]="   ",str2[4]="   ", *sitestr[2]={"non-degenerate","4-fold"};
+
+   for(is=0,com.kappa=0;is<com.ns;is++) {
+      for(js=0; js<is; js++) {
+         if(noisy>=9) printf ("\n%4d vs. %3d", is+1, js+1);
+         for(k=0; k<2; k++) zero(F[k],16);
+         for(h=0; h<com.npatt; h++) {
+            c[0] = FROM61[com.z[is][h]];
+            c[1] = FROM61[com.z[js][h]];
+            for(k=0; k<2; k++) {
+               b[k][0] = c[k]/16;
+               b[k][1] = (c[k]%16)/4;
+               b[k][2] = c[k]%4;
+               aa[k] = GeneticCode[com.icode][c[k]];
+            }
+
+            /* find non-degenerate sites */
+            for(pos=0; pos<3; pos++) {         /* check all positions */
+               for(k=0,ndeg=0;k<2;k++) {       /* two codons */
+                  for(i1=0; i1<4; i1++) {
+                     if(i1==b[k][pos]) continue;
+                     a = GeneticCode[com.icode][c[k]+(i1-b[k][pos])*by[pos]];
+                     if(a==aa[k]) break;
+                  }
+                  if(i1==4) ndeg++;
+               }
+               if(ndeg==2) {
+                  F[0][b[0][pos]*4+b[1][pos]] += .5*com.fpatt[h];
+                  F[0][b[1][pos]*4+b[0][pos]] += .5*com.fpatt[h];
+               }
+
+            }
+            /* find 4-fold degenerate sites at 3rd positions */
+            for(k=0,ndeg=0;k<2;k++) {       /* two codons */
+               for(j=0,i1=c[k]-b[k][2]; j<4; j++) 
+                  if(j!=b[k][2] && GeneticCode[com.icode][i1+j]!=aa[k]) break;
+               if(aa[0]==aa[1] && j==4) ndeg++;
+            }
+            if (ndeg<2) continue;
+            F[1][b[0][2]*4+b[1][2]] += .5*com.fpatt[h]; 
+            F[1][b[1][2]*4+b[0][2]] += .5*com.fpatt[h];
+         }  /* for (h) */
+         for(k=0; k<2; k++) {  /* two kinds of sites */
+            /*
+            if(noisy>3) printf("\n%s:\n",sitestr[k]);
+            */
+            S[k] = sum(F[k],16); 
+            if(S[k]<=0) { wk[k]=0; continue; }
+            for(j=0; j<16; j++) F[k][j]/=S[k];
+            P = (F[k][0*4+1]+F[k][2*4+3])*2;
+            Q = 1-(F[k][0*4+0]+F[k][1*4+1]+F[k][2*4+2]+F[k][3*4+3]) - P;
+            for(j=0; j<4; j++)
+               pi[j] = sum(F[k]+j*4,4);
+            DistanceF84(S[k], P,Q,pi, &ka[k], &t, NULL);
+            wk[k] = (ka[k]>0?S[k]:0);
+
+            /* matout(F0,F[k],4,4);  matout(F0,pi,1,4);  */
+            /*
+            if(noisy>3)
+               printf("\nSPQkt:%9.4f%9.5f%9.5f%9.4f%9.4f\n",S[k],P,Q,ka[k],t);
+            */
+         }
+         if(wk[0]+wk[1]==0) {
+            status = -1;
+            ka[0] = kdefault;
+            if(noisy>3) printf("\ngot no kappa! fix it at %.4f\n",ka[0]);
+         }
+         else
+             ka[0] = (ka[0]*wk[0]+ka[1]*wk[1])/(wk[0]+wk[1]);
+         com.kappa += ka[0]/(com.ns*(com.ns-1.)/2);
+      }  /* for(js) */
+   }     /* for(is) */
+
+   return (status);
+}
+
+
+int CountSites(char z[],double pi[],double*Stot,double*Ntot,double fbS[],double fbN[])
+{
+/* This calculates the total numbers of synonymous and nonsynonymous sites 
+   (Stot & Ntot) in the sequence z[] using com.kappa and pi[].
+   It also count the base frequencies at the synonymous and nonsynonymous 
+   sites.  Total number of sites is scaled to be equal to sequence length
+   even if some changes are to stop codons.  Since pi[] is scaled to sum 
+   to one, rates to stop codons are not considered.
+   The counting goes through the sequence codon by codon, and so is different 
+   from the counting in codeml, which uses pi[] to count the sites.
+*/
+   int h, j,k, c[2],aa[2], b[3], by[3]={16,4,1};
+   double r, S,N, kappa=com.kappa;
+
+   *Stot = *Ntot = 0;  
+   for(k=0; k<4; k++) 
+      fbS[k] = fbN[k] = 0;
+   for (h=0; h<com.npatt; h++)  {
+      c[0] = FROM61[z[h]];
+      b[0] = c[0]/16; b[1]=(c[0]%16)/4; b[2]=c[0]%4;
+      aa[0] = GeneticCode[com.icode][c[0]];
+      if (aa[0]==-1) 
+         error2("stop codon");
+      for (j=0,S=N=0; j<3; j++) {
+         for(k=0; k<4; k++) {    /* b[j] changes to k */
+            if (k==b[j]) continue;
+            c[1]  = c[0]+(k-b[j])*by[j];
+            aa[1] = GeneticCode[com.icode][c[1]];
+            if(aa[1] == -1) continue;
+            r = pi[FROM64[c[1]]];
+            if (k+b[j]==1 || k+b[j]==5)  r *= kappa; /* transition */
+            if (aa[0]==aa[1]) { S += r; fbS[b[j]] += r*com.fpatt[h]; }
+            else              { N += r; fbN[b[j]] += r*com.fpatt[h]; }
+         }
+      }
+      *Stot += com.fpatt[h]*S;
+      *Ntot += com.fpatt[h]*N;
+   }
+   r = 3*com.ls/(*Stot+*Ntot);  *Stot*=r;  *Ntot*=r;
+   r = sum(fbS,4);  for(k=0; k<4; k++) fbS[k] /= r;
+   r = sum(fbN,4);  for(k=0; k<4; k++) fbN[k] /= r;
+   return (0);
+}
+
+
+int GetPMatCodon(double P[],double t, double kappa, double omega, double space[])
+{
+/* Get PMat=exp(Q*t) for weighting pathways
+*/
+   int nterms=100, n=com.ncode, ic1, ic2, i,j,k, aa[2],ndiff,pos=0,from[3],to[3];
+   double *Q=P, *U=space+n*n, *V=U+n*n, *Root=V+n*n, mr, spacesqrt[NCODE];
+
+   for(i=0; i<n*n; i++) Q[i] = 0;
+   for (i=0; i<n; i++) {
+      ic1=FROM61[i]; from[0]=ic1/16; from[1]=(ic1/4)%4; from[2]=ic1%4;
+      for(j=0; j<i; j++) {  
+         ic2=FROM61[j];   to[0]=ic2/16;   to[1]=(ic2/4)%4;   to[2]=ic2%4;
+         aa[0] = GeneticCode[com.icode][ic1];
+         aa[1] = GeneticCode[com.icode][ic2];
+         if (aa[0]==-1 || aa[1]==-1)  continue;
+         for (k=0,ndiff=0; k<3; k++) 
+            if(from[k] != to[k]) { ndiff++; pos=k; }
+         if (ndiff!=1)  continue;
+         Q[i*n+j] = 1;
+         if ((from[pos]+to[pos]-1)*(from[pos]+to[pos]-5)==0)
+            Q[i*n+j] *= kappa;
+         if(aa[0] != aa[1])  Q[i*n+j] *= omega;
+         Q[j*n+i] = Q[i*n+j];
+      }
+   }
+
+   for(i=0; i<n; i++) for(j=0; j<n; j++)
+      Q[i*n+j] *= com.pi[j];
+
+   for (i=0,mr=0; i<n; i++) { 
+      Q[i*n+i] = -sum(Q+i*n,n); 
+      mr -= com.pi[i]*Q[i*n+i]; 
+   }
+
+   eigenQREV(Q, com.pi, n, Root, U, V, spacesqrt);
+   for(i=0; i<n; i++) Root[i] /= mr;
+   PMatUVRoot(P, t, n, U, V, Root);
+   /*
+   testTransP(PMat, n);
+   fprintf(frub,"\a\nP(%.5f)\n", t);
+   for(i=0; i<n; i++,FPN(frub)) for(j=0; j<n; j++)
+   fprintf(frub, " %9.5g", PMat[i*n+j]);
+   fflush(frub);
+   */
+   return (0);
+}
+
+
+
+int CountDiffs(char z1[],char z2[], double*Sdts,double*Sdtv,double*Ndts,double*Ndtv,double PMat[])
+{
+/* Count the numbers of synonymous and nonsynonymous differences between 
+   sequences z1 and z2, weighting pathways with PMat. No weighting if PMat=NULL
+   Modified from difcodon()
+   dmark[i] (=0,1,2) is the i_th different codon position (i=0,1,ndiff).
+   step[j] (=0,1,2) is the codon position to be changed at step j (j=0,1,ndiff).
+   b[i][j] (=0,1,2,3) is the nucleotide at position j (0,1,2) in codon i (0,1)
+   sts,stv,nts,ntv are syn ts & tv and nonsyn ts & tv at a codon site.
+   stspath[k] stvpath[k] ntspath[k] ntvpath[k] are syn ts & tv and 
+   nonsyn ts & tv differences on path k (k=2,6).
+*/
+   char str[4]="   ";
+   int n=com.ncode, h,i1,i2,i,k, transi, c[2],ct[2],aa[2], by[3]={16,4,1};
+   int dmark[3], step[3], b[2][3], bt1[3], bt2[3];
+   int ndiff, npath, nstop, stspath[6],stvpath[6],ntspath[6],ntvpath[6];
+   double sts,stv,nts,ntv; /* syn ts & tv, nonsyn ts & tv for 2 codons */
+   double ppath[6], sump,p;
+
+   *Sdts = *Sdtv = *Ndts = *Ndtv = 0;
+   for (h=0; h<com.npatt; h++)  {
+      c[0] = FROM61[z1[h]];
+      c[1] = FROM61[z2[h]];
+      if (c[0]==c[1]) continue;
+      for(i=0; i<2; i++) {
+         b[i][0]=c[i]/16; b[i][1]=(c[i]%16)/4; b[i][2]=c[i]%4;
+         aa[i] = GeneticCode[com.icode][c[i]];
+      }
+      if (aa[0]==-1 || aa[1]==-1)
+         error2("stop codon in sequence.");
+      ndiff=0;  sts=stv=nts=ntv=0;
+      for(k=0; k<3; k++) dmark[k] = -1;
+      for(k=0; k<3; k++) if(b[0][k] != b[1][k]) dmark[ndiff++] = k;
+      npath=1;
+      if(ndiff>1) npath = (ndiff==2 ? 2 : 6);
+      if (ndiff==1) {
+         transi = b[0][dmark[0]]+b[1][dmark[0]];
+         transi = (transi==1 || transi==5);
+         if (aa[0]==aa[1])  { if (transi) sts++; else stv++; }
+         else               { if (transi) nts++; else ntv++; }
+      }
+      else {   /* ndiff=2 or 3 */
+         if(debug==DIFF) {
+            printf("\n\nh=%d %s (%c) .. ", h+1,getcodon(str,c[0]),AAs[aa[0]]);
+            printf("%s (%c): ", getcodon(str,c[1]), AAs[aa[1]]);
+         }
+         nstop=0;
+         for(k=0; k<npath; k++) {
+            if(debug==DIFF) printf("\npath %d: ", k+1);
+
+            for(i1=0; i1<3; i1++)  step[i1] = -1;
+            if (ndiff==2) {
+               step[0] = dmark[k];
+               step[1] = dmark[1-k];
+            }
+            else {
+               step[0] = k/2;
+               step[1] = k%2;
+               if (step[0]<=step[1]) step[1]++;
+               step[2] = 3-step[0]-step[1];
+            }
+            for(i1=0; i1<3; i1++) bt1[i1] = bt2[i1]=b[0][i1];
+            stspath[k] = stvpath[k] = ntspath[k] = ntvpath[k] = 0;  
+            /* mutations along each path */
+            for (i1=0,ppath[k]=1; i1<ndiff; i1++) { 
+               bt2[step[i1]] = b[1][step[i1]];
+               for (i2=0,ct[0]=ct[1]=0; i2<3; i2++) {
+                  ct[0] += bt1[i2]*by[i2];
+                  ct[1] += bt2[i2]*by[i2];
+               }
+               ppath[k] *= PMat[ FROM64[ct[0]]*n + FROM64[ct[1]] ];
+               for(i2=0; i2<2; i2++) aa[i2] = GeneticCode[com.icode][ct[i2]];
+
+               if(debug==DIFF) printf("%s (%c) %.5f: ", getcodon(str,ct[1]),AAs[aa[1]],PMat[ct[0]*n+ct[1]]);
+
+               if (aa[1]==-1) {
+                  nstop++;  ppath[k]=0; break;
+               }
+               transi = b[0][step[i1]]+b[1][step[i1]];
+               transi = (transi==1 || transi==5);  /* transition? */
+
+               if(aa[0]==aa[1]) { if(transi) stspath[k]++; else stvpath[k]++; }
+               else             { if(transi) ntspath[k]++; else ntvpath[k]++; }
+               for(i2=0; i2<3; i2++) bt1[i2] = bt2[i2];
+            }
+
+            if(debug==DIFF) printf("  p =%.9f", ppath[k]);
+
+         }  /* for(k,npath) */
+         if (npath==nstop) {  /* all paths through stop codons */
+            puts ("all paths through stop codons..");
+            if (ndiff==2) { nts=.5; ntv=1.5; }
+            else          { nts=.5; ntv=2.5; }
+         }
+         else {
+            sump = sum(ppath,npath);
+            if(sump<1e-20) { 
+               printf("\nsump=0, npath=%4d\nh=%2d ", npath, h+1);
+               printf("(%s ", getcodon(str,c[0]));
+               printf("%s)", getcodon(str,c[1]));
+               for(k=0; k<npath; k++) printf(" %9.6g", ppath[k]); FPN(F0);
+               matout(frub, PMat, n, n); 
+               exit(-1);
+
+               /* 
+               sump=1; FOR(k,npath) if(ppath[k]) ppath[k]=1./(npath-nstop); 
+               */
+            }
+            for(k=0; k<npath; k++) { 
+               p = ppath[k]/sump;
+               sts += stspath[k]*p;
+               stv += stvpath[k]*p;  
+               nts += ntspath[k]*p; 
+               ntv += ntvpath[k]*p;
+            }
+
+            if(debug==DIFF) {
+               for(k=0; k<npath; k++) printf("\n p =%.5f", ppath[k]/sump);  FPN(F0);
+               printf(" syn ts & tv, nonsyn ts & tv:%9.5f%9.5f%9.5f%9.5f\n",sts,stv,nts,ntv);
+            }
+         }
+
+         if(debug==DIFF) getchar();
+
+      }     /* if (ndiff) */
+      *Sdts += com.fpatt[h]*sts;
+      *Sdtv += com.fpatt[h]*stv;
+      *Ndts += com.fpatt[h]*nts;
+      *Ndtv += com.fpatt[h]*ntv;
+   }  /* for (h) */
+   return (0);
+}
+
+
+int DistanceF84(double n, double P, double Q, double pi[],
+    double*k_HKY, double*t, double*SEt)
+{
+/* This calculates kappa and d from P (proportion of transitions) & Q 
+   (proportion of transversions) & pi under F84.
+   When F84 fails, we try to use K80.  When K80 fails, we try
+   to use JC69.  When JC69 fails, we set distance t to maxt.
+   Variance formula under F84 is from Tateno et al. (1994), and briefly 
+   checked against simulated data sets.
+*/
+   int failF84=0,failK80=0,failJC69=0;
+   double tc,ag, Y,R, a=0,b=0, A=-1,B=-1,C=-1, k_F84;
+   double Qsmall=min2(1e-10,0.1/n), maxkappa=999,maxt=99;
+
+   *k_HKY=-1;
+   Y=pi[0]+pi[1];  R=pi[2]+pi[3];  tc=pi[0]*pi[1];  ag=pi[2]*pi[3];
+   if (P+Q>1) { *t=maxt; *k_HKY=1; return(3); }
+   if (P<-1e-10 || Q<-1e-10 || fabs(Y+R-1)>1e-8) {
+      printf("\nPQYR & pi[]: %9.5f%9.5f%9.5f%9.5f",P,Q,Y,R);
+      matout(F0,pi,1,4);
+      error2("DistanceF84: input err.");
+   }
+   if(Q<Qsmall)  failF84=failK80=1;
+   else if(Y<=0 || R<=0 || (tc<=0 && ag<=0)) failF84=1;
+   else {
+      A=tc/Y+ag/R; B=tc+ag; C=Y*R;
+      a=(2*B+2*(tc*R/Y+ag*Y/R)*(1-Q/(2*C)) - P) / (2*A);
+      b=1-Q/(2*C);
+      if (a<=0 || b<=0) failF84=1;
+   }
+   if (!failF84) {
+      a=-.5*log(a); b=-.5*log(b);
+      if(b<=0) failF84=1;
+      else {
+         k_F84 = a/b-1;
+         *t = 4*b*(tc*(1+ k_F84/Y) + ag*(1+ k_F84/R)+C);
+         *k_HKY = (B + (tc/Y+ag/R)* k_F84)/B; /* k_F84=>k_HKY85 */
+         if(SEt) {
+            a = A*C/(A*C-C*P/2-(A-B)*Q/2);
+            b = A*(A-B)/(A*C-C*P/2-(A-B)*Q/2) - (A-B-C)/(C-Q/2);
+            *SEt = sqrt((a*a*P+b*b*Q-square(a*P+b*Q))/n);
+         }
+      }
+   }
+   if(failF84 && !failK80) {  /* try K80 */
+      if (noisy>=9) printf("\na=%.5f  b=%.5f, use K80\n", a,b);
+      a=1-2*P-Q;  b=1-2*Q;
+      if (a<=0 || b<=0) failK80=1;
+      else {
+         a=-log(a); b=-log(b);
+         if(b<=0)  failK80=1;
+         else {
+            *k_HKY=(.5*a-.25*b)/(.25*b);
+            *t = .5*a+.25*b;
+         }
+         if(SEt) {
+            a=1/(1-2*P-Q); b=(a+1/(1-2*Q))/2;
+            *SEt = sqrt((a*a*P+b*b*Q-square(a*P+b*Q))/n);
+         }
+      }
+   }
+   if(failK80) {
+      if((P+=Q)>=.75) { failJC69=1; P=.75*(n-1.)/n; }
+      *t = -.75*log(1-P*4/3.); 
+      if(*t>maxt) *t=maxt;
+      if(SEt) {
+         *SEt = sqrt(9*P*(1-P)/n) / (3-4*P);
+      }
+   }
+   if(*k_HKY>maxkappa) *k_HKY=maxkappa;
+
+   return(failF84 + failK80 + failJC69);
+}
+
+
+
+#if 0
+
+double dsdnREV (int is, int js, double space[])
+{
+/* This calculates ds and dn by recovering the Q*t matrix using the equation
+      F(t) = PI * P(t) = PI * exp(Q*t)
+   This is found not to work well and is not published.
+   space[64*64*5]
+   The code here is broken since I changed the coding.  Codons are now coded 0, 1, ..., 60. 
+*/
+   int n=com.ncode, i,j, h;
+   double *F=PMat, *Qt=F;
+   double *Root=space+n*n,*pi=Root+n, *U=pi+n,*V=U+n*n;
+   double *T1=V+n*n,*T2=T1+n*n, t, small=1e-6;
+   
+   fprintf(frst,"\npi in model\n");
+   matout(frst,com.pi,1,n);
+   FOR(i,n*n) F[i]=0;
+   FOR (h,com.npatt) {
+      F[com.z[is][h]*n+com.z[js][h]]+=com.fpatt[h]/(2*com.ls);
+      F[com.z[js][h]*n+com.z[is][h]]+=com.fpatt[h]/(2*com.ls);
+   }
+   if(fabs(1-sum(F,n*n))>1e-6) error2("Sum F != 1 in dsdnREV");
+
+   FOR (i,n) {
+      pi[i]=sum(F+i*n, n);  
+/*
+      if (F[i*n+i]<=small || F[i*n+i]<pi[i]/4)
+*/
+      if (F[i*n+i]<=small)  F[i*n+i]=1-pi[i]+F[i*n+i];
+      else                  abyx(1/pi[i], F+i*n, n); 
+   }
+   if (eigen (1, F, n, Root, T1, U, V, T2)) error2 ("eigen jgl");
+   xtoy (U, V, n*n);
+   matinv (V, n, n, T1);
+
+fprintf(frst,"\npi in data\n");
+matout (frst, pi, 1, n);   FPN(F0);
+matout (frst, Root, 1, n);
+
+   FOR (i,n) {
+      if (Root[i]<=0) 
+         printf ("  Root %d:%10.4f", i+1, Root[i]); 
+      Root[i]=log(Root[i]);
+   }
+   FOR (i,n) FOR (j,n) T1[i*n+j]=U[i*n+j]*Root[j];
+   matby (T1, V, Qt, n, n, n);
+   for (i=0,t=0; i<n; i++) t-=pi[i]*Qt[i*n+i];
+   if (t<=0) puts ("err: dsdnREV");
+
+   FOR(i,n*n) Qt[i]+=1e-8;  /* remove negative numbers from rounding errors */
+
+   matout(frst,Qt,n,n);
+printf("\nt = %.5f\n", t);
+
+   return (0);
+}
+
+
+#endif

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/paml.git



More information about the debian-med-commit mailing list