[med-svn] [paml] 01/03: New upstream version 4.9e+dfsg
Andreas Tille
tille at debian.org
Fri Mar 17 20:54:20 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository paml.
commit b308d0b83b7d3a7f6225aef2b47cc6d262ae2ece
Author: Andreas Tille <tille at debian.org>
Date: Fri Mar 17 21:28:26 2017 +0100
New upstream version 4.9e+dfsg
---
README.txt | 11 +-
doc/pamlHistory.txt | 29 +
doc/{pamlHistory.txt => pamlHistory.txt~} | 23 +
examples/DatingSoftBound/BF.Clock23.xlsx | Bin 0 -> 20563 bytes
examples/DatingSoftBound/README.BayesFactor.txt | 26 +
examples/DatingSoftBound/{ => bf1}/mcmctree.ctl | 17 +-
examples/DatingSoftBound/{ => bf2}/mcmctree.ctl | 13 +-
examples/DatingSoftBound/mcmctree.ctl | 5 +-
examples/DatingSoftBound/mtCDNApri.trees | 4 +
src/BFdriver.c | 74 +
src/README.txt | 3 +-
src/baseml.c | 6 +-
src/chi2.c | 14 +-
src/codeml.c | 2 +-
src/evolver.c | 2630 +++++++++++------------
src/mcmctree.c | 89 +-
src/paml.h | 9 +-
src/pamp.c | 1290 +++++------
src/tools.c | 223 +-
src/treesub.c | 715 +++---
src/yn00.c | 1814 ++++++++--------
21 files changed, 3647 insertions(+), 3350 deletions(-)
diff --git a/README.txt b/README.txt
index ec5fab2..8f293ab 100644
--- a/README.txt
+++ b/README.txt
@@ -14,7 +14,16 @@ following, where the archive can be downloaded:
http://abacus.gene.ucl.ac.uk/software/paml.html
-PAML is distributed free of charge for academic use only.
+Copyright notice and disclaimer
+
+The software package is provided "as is" without warranty of any
+kind. In no event shall the author or his employer be held responsible
+for any damage resulting from the use of this software, including but
+not limited to the frustration that you may experience in using the
+package. The program package, including source codes, example data
+sets, executables, and this documentation, is maintained by Ziheng
+Yang and distributed under the GNU GPL v3.
+
Ziheng Yang
Department of Biology Phone: (+44) (0)20 7679 4379
diff --git a/doc/pamlHistory.txt b/doc/pamlHistory.txt
index 87fb1c2..9eca86c 100644
--- a/doc/pamlHistory.txt
+++ b/doc/pamlHistory.txt
@@ -9,6 +9,35 @@ https://groups.google.com/forum/#!forum/pamlsoftware.
+Version 4.9e, March 2017
+
+Edited the readme files to change the license to GPL.
+
+
+
+Version 4.9d, February 2017
+
+(*) mcmctree. A bug was introduced which causes the program to read
+the fossil calibration information in the tree file incorrectly, if
+lower (minimum) bounds are specified using the symbol '>'. If you use
+the notation "B()" or "L()", the information is read correctly. This
+bug was introduced in version 4.9b and exists also in 4.9c. Versions
+4.9a and earlier were correct.
+
+(*) mcmctree. Changed the default prior for rates for loci to
+gamma-Dirichlet (dos Reis 2014), and updated the documentation as
+well. It was set to the conditional i.i.d. prior (Zhu et al. 2015).
+
+(*) mcmctree. Added Bayes factor calculation. A program called
+BFdriver is included in the release, as well as a pdf document in the
+folder examples/DatingSoftBound/BFdriverDOC.pdf. We suggest that you
+use the exact likelihood calculation when you use this option, since the
+normal approximation is unreliable when the power posterior is close to
+the prior (when beta is small).
+
+
+
+
Version 4.9c, September 2016
(*) Added GPL license statement in various places.
diff --git a/doc/pamlHistory.txt b/doc/pamlHistory.txt~
similarity index 96%
copy from doc/pamlHistory.txt
copy to doc/pamlHistory.txt~
index 87fb1c2..f6a3c20 100644
--- a/doc/pamlHistory.txt
+++ b/doc/pamlHistory.txt~
@@ -9,6 +9,29 @@ https://groups.google.com/forum/#!forum/pamlsoftware.
+Version 4.9d, February 2017
+
+(*) mcmctree. A bug was introduced which causes the program to read
+the fossil calibration information in the tree file incorrectly, if
+lower (minimum) bounds are specified using the symbol '>'. If you use
+the notation "B()" or "L()", the information is read correctly. This
+bug was introduced in version 4.9b and exists also in 4.9c. Versions
+4.9a and earlier were correct.
+
+(*) mcmctree. Changed the default prior for rates for loci to
+gamma-Dirichlet (dos Reis 2014), and updated the documentation as
+well. It was set to the conditional i.i.d. prior (Zhu et al. 2015).
+
+(*) mcmctree. Added Bayes factor calculation. A program called
+BFdriver is included in the release, as well as a pdf document in the
+folder examples/DatingSoftBound/BFdriverDOC.pdf. We suggest that you
+use the exact likelihood calculation when you use this option, since the
+normal approximation is unreliable when the power posterior is close to
+the prior (when beta is small).
+
+
+
+
Version 4.9c, September 2016
(*) Added GPL license statement in various places.
diff --git a/examples/DatingSoftBound/BF.Clock23.xlsx b/examples/DatingSoftBound/BF.Clock23.xlsx
new file mode 100644
index 0000000..bc9216b
Binary files /dev/null and b/examples/DatingSoftBound/BF.Clock23.xlsx differ
diff --git a/examples/DatingSoftBound/README.BayesFactor.txt b/examples/DatingSoftBound/README.BayesFactor.txt
new file mode 100644
index 0000000..23ce8b9
--- /dev/null
+++ b/examples/DatingSoftBound/README.BayesFactor.txt
@@ -0,0 +1,26 @@
+Bayes factor calculation using MCMCTREE
+Ziheng Yang
+
+5 February 2017
+
+Read the document BFDriverDOC.pdf, and replace bpp with MCMCtree.
+
+The folders bf1/ is for running the marginal likelihood calculation for clock=2
+The folders bf2/ is for running the marginal likelihood calculation for clock=3
+
+
+ cd bf1
+ ../../../bin/BFdriver mcmctree.ctl 16
+
+Check and edit the commands file, and specify the full path for mcmctree if necessary, e.g.,
+ echo "../../../bin/mcmctree mcmctree.b$I.ctl > log.b$I.txt" > tmp.sh
+
+Then run the commands to submit jobs onto the queue.
+
+ source commands
+
+
+Do the same thing in bf2/ for clock3, and collect results into the excel file: BF.Clock23.xlsx.
+Note that the last line in the file commands is a grep command for retrieving the results after
+all jobs are finished:
+grep BFbeta log.b*.txt
diff --git a/examples/DatingSoftBound/mcmctree.ctl b/examples/DatingSoftBound/bf1/mcmctree.ctl
similarity index 67%
copy from examples/DatingSoftBound/mcmctree.ctl
copy to examples/DatingSoftBound/bf1/mcmctree.ctl
index 26ed77a..1fd18e2 100644
--- a/examples/DatingSoftBound/mcmctree.ctl
+++ b/examples/DatingSoftBound/bf1/mcmctree.ctl
@@ -1,6 +1,7 @@
seed = -1
- seqfile = mtCDNApri123.txt
- treefile = mtCDNApri.trees
+ seqfile = ../mtCDNApri123.txt
+ treefile = ../mtCDNApri.trees
+ mcmcfile = mcmc.txt
outfile = out.txt
ndata = 3
@@ -16,17 +17,15 @@
cleandata = 0 * remove sites with ambiguity data (1:yes, 0:no)?
BDparas = 1 1 0.1 * birth, death, sampling
- kappa_gamma = 6 2 * gamma prior for kappa
- alpha_gamma = 1 1 * gamma prior for alpha
+ kappa_gamma = 6 2 * gamma prior for kappa
+ alpha_gamma = 1 1 * gamma prior for alpha
rgene_gamma = 2 20 1 * gammaDir prior for rate for genes
- sigma2_gamma = 1 10 1 * gammaDir prior for sigma^2 (for clock=2 or 3)
+ sigma2_gamma = 1 10 1 * gammaDir prior for sigma^2 (for clock=2 or 3)
finetune = 1: .1 .1 .1 .1 .1 .1 * auto (0 or 1): times, musigma2, rates, mixing, paras, FossilErr
- print = 1
+ print = 1 * 0: no mcmc sample; 1: everything except branch rates 2: everything
burnin = 2000
sampfreq = 2
- nsample = 20000
-
-*** Note: Make your window wider (100 columns) before running the program.
+ nsample = 200000
diff --git a/examples/DatingSoftBound/mcmctree.ctl b/examples/DatingSoftBound/bf2/mcmctree.ctl
similarity index 76%
copy from examples/DatingSoftBound/mcmctree.ctl
copy to examples/DatingSoftBound/bf2/mcmctree.ctl
index 26ed77a..d771d45 100644
--- a/examples/DatingSoftBound/mcmctree.ctl
+++ b/examples/DatingSoftBound/bf2/mcmctree.ctl
@@ -1,12 +1,13 @@
seed = -1
- seqfile = mtCDNApri123.txt
- treefile = mtCDNApri.trees
+ seqfile = ../mtCDNApri123.txt
+ treefile = ../mtCDNApri.trees
+ mcmcfile = mcmc.txt
outfile = out.txt
ndata = 3
seqtype = 0 * 0: nucleotides; 1:codons; 2:AAs
usedata = 1 * 0: no data; 1:seq like; 2:normal approximation; 3:out.BV (in.BV)
- clock = 2 * 1: global clock; 2: independent rates; 3: correlated rates
+ clock = 3 * 1: global clock; 2: independent rates; 3: correlated rates
RootAge = '<1.0' * safe constraint on root age, used if no fossil for root.
model = 0 * 0:JC69, 1:K80, 2:F81, 3:F84, 4:HKY85
@@ -24,9 +25,7 @@
finetune = 1: .1 .1 .1 .1 .1 .1 * auto (0 or 1): times, musigma2, rates, mixing, paras, FossilErr
- print = 1
+ print = 1 * 0: no mcmc sample; 1: everything except branch rates 2: everything
burnin = 2000
sampfreq = 2
- nsample = 20000
-
-*** Note: Make your window wider (100 columns) before running the program.
+ nsample = 200000
diff --git a/examples/DatingSoftBound/mcmctree.ctl b/examples/DatingSoftBound/mcmctree.ctl
index 26ed77a..1f260e9 100644
--- a/examples/DatingSoftBound/mcmctree.ctl
+++ b/examples/DatingSoftBound/mcmctree.ctl
@@ -1,6 +1,7 @@
seed = -1
seqfile = mtCDNApri123.txt
treefile = mtCDNApri.trees
+ mcmcfile = mcmc.txt
outfile = out.txt
ndata = 3
@@ -24,9 +25,9 @@
finetune = 1: .1 .1 .1 .1 .1 .1 * auto (0 or 1): times, musigma2, rates, mixing, paras, FossilErr
- print = 1
+ print = 1 * 0: no mcmc sample; 1: everything except branch rates 2: everything
burnin = 2000
- sampfreq = 2
+ sampfreq = 10
nsample = 20000
*** Note: Make your window wider (100 columns) before running the program.
diff --git a/examples/DatingSoftBound/mtCDNApri.trees b/examples/DatingSoftBound/mtCDNApri.trees
index 688c42f..e79612b 100644
--- a/examples/DatingSoftBound/mtCDNApri.trees
+++ b/examples/DatingSoftBound/mtCDNApri.trees
@@ -5,3 +5,7 @@
//end of file
+
+
+((((human, (chimpanzee, bonobo)) 'B(.06, .08)', gorilla),
+(orangutan, sumatran)) 'B(.12, .16)', gibbon);
diff --git a/src/BFdriver.c b/src/BFdriver.c
new file mode 100644
index 0000000..3af2eb3
--- /dev/null
+++ b/src/BFdriver.c
@@ -0,0 +1,74 @@
+/* This drives the computation of the marginal likelihood (bayes factor) calculation
+ using bpp and mcmctree.
+
+ cc -o BFdriver -O3 BFdriver.c tools.c -lm
+
+ BFdriver <controlfilename> <npoints> <scriptname.sh>
+ BFdriver mcmctree.ctl 16 tmp.sh
+*/
+#include "paml.h"
+
+int main (int argc, char*argv[])
+{
+ int j, npoints=8, ixw, nline=1024;
+ char resultsf[96]="betaweights.txt", ctlf[96]="mcmctree.ctl", scriptf[96]="tmp.sh";
+ char ctlfi[96], tmpf[96], line[1024], *pline, *s;
+ double beta, sign, weight;
+ const double *xNI = NULL, *wNI = NULL; /* Gauss-Legendre quadrature points */
+ FILE *fctl, *fctlb, *fcommand, *fresults;
+
+ puts("Usage:\n\tBFdriver controlfilename npoints\n");
+ puts("\tquadrature: log{M} = 0.5 * SUM w_b * E_b(log{f(X)})\n");
+ if(argc<2) exit(-1);
+ strcpy(ctlf, argv[1]);
+ if(argc>2) npoints = atoi(argv[2]);
+ if(argc>3) strcpy(scriptf, argv[3]);
+ fctl = (FILE*)gfopen(ctlf, "r");
+ if( s = strstr(ctlf, ".ctl") ) *s = '\0';
+
+ fresults = (FILE*)gfopen(resultsf, "w");
+ fprintf(fresults, "%s\t%s\t%s\n", "beta", "weight", "ElnfX");
+ GaussLegendreRule(&xNI, &wNI, npoints);
+ for (j=0; j<npoints; j++) {
+ if (j<npoints / 2) { ixw = npoints / 2 - 1 - j; sign = -1; }
+ else { ixw = j - npoints / 2; sign = 1; }
+ beta = 0.5 + sign / 2 * xNI[ixw];
+ weight = wNI[ixw];
+ printf("b%02d: beta = %.4f w = %8.6f\n", j+1, beta, weight );
+ sprintf(ctlfi, "%s.b%02d.ctl\0", ctlf, j+1);
+ fctlb = (FILE*)gfopen(ctlfi, "w");
+ fprintf(fctlb, "BayesFactorBeta = %8.6f * w=%8.6f.ctl\n", beta, weight);
+
+ rewind(fctl);
+ for ( ; ; ) {
+ if (fgets(line, nline, fctl) == NULL) break;
+ if (line[0] == '*') continue;
+ if (strstr(line, "BayesFactorBeta")) continue;
+ if (strstr(line, "outfile") || strstr(line, "mcmcfile")) {
+ pline = strchr(line, '=');
+ sscanf(pline + 1, "%s", tmpf);
+ if( s = strstr(tmpf, ".txt") ) {
+ *s = '\0';
+ }
+ sprintf(pline + 2, "%s.b%02d.txt\n\0", tmpf, j+1);
+ }
+ fputs(line, fctlb);
+ }
+ fclose(fctlb);
+ fprintf(fresults, "%.6f\t%.6f\t\n", beta, weight);
+ }
+ fclose(fctl);
+
+ fcommand = (FILE*)gfopen("commands", "w");
+ fprintf(fcommand, "#!/bin/bash\nfor I in {01..%02d}\n", npoints);
+ fprintf(fcommand, " do\n");
+ fprintf(fcommand, " echo \"#!/bin/bash\" > %s\n", scriptf);
+ fprintf(fcommand, " echo \"mcmctree %s.b$I.ctl > log.b$I.txt\" > %s\n", ctlf, scriptf);
+ fprintf(fcommand, " sleep 1\n");
+ fprintf(fcommand, " qsub -S /bin/bash -l h_vmem=4G -l tmem=4G -l h_rt=360:0:0 -cwd %s\n", scriptf);
+ fprintf(fcommand, " done\n");
+
+ fputs("\n#grep BFbeta log.b*.txt\n", fcommand);
+ fclose(fcommand);
+ exit(0);
+}
diff --git a/src/README.txt b/src/README.txt
index 2452855..e4eda44 100644
--- a/src/README.txt
+++ b/src/README.txt
@@ -1,7 +1,8 @@
Notes for compiling PAML on UNIX systems, including MAC OS X
Ziheng Yang (z.yang at ucl.ac.uk)
-Last updated, 10 December 2003
+Last updated, 17 March 2017
+
Copyright notice and disclaimer
diff --git a/src/baseml.c b/src/baseml.c
index 4b63796..532ca74 100644
--- a/src/baseml.c
+++ b/src/baseml.c
@@ -247,10 +247,11 @@ int main (int argc, char *argv[])
if(SeqDistance==NULL||ancestor==NULL) error2("oom distance&ancestor");
}
InitializeBaseAA(fout);
+
if(com.Mgene==3)
for(i=0; i<com.ngene; i++) xtoy(com.pi, com.piG[i], com.ncode);
- if (com.model==JC69 && !com.readpattern && !com.print) {
+ if (com.model==JC69 && com.ngene<=1 && !com.readpattern && !com.print) {
PatternWeightJC69like();
if(fout) {
fprintf(fout, "\n\nPrinting out site pattern counts\n");
@@ -274,8 +275,7 @@ int main (int argc, char *argv[])
if((com.fhK=(double*)realloc(com.fhK,s2))==NULL) error2("oom");
}
- printf ("\n%9ld bytes for distance ",
- com.ns*(com.ns-1)/2*(sizeof(double)+sizeof(int)));
+ printf ("\n%9ld bytes for distance ", com.ns*(com.ns-1)/2*(sizeof(double)+sizeof(int)));
printf("\n%9lu bytes for conP\n", com.sconP);
printf("%9lu bytes for fhK\n%9lu bytes for space\n", s2, com.sspace);
diff --git a/src/chi2.c b/src/chi2.c
index f2252b2..6ebf40a 100644
--- a/src/chi2.c
+++ b/src/chi2.c
@@ -6,7 +6,7 @@
degrees of freedom and the tail probability (type I error rate) for
given observed chi-square statistic and degree of freedom.
- Ziheng Yang, October 1993.
+ Ziheng Yang, October 1993.
*/
#include <stdio.h>
@@ -26,8 +26,8 @@ double IncompleteGamma (double x, double alpha, double ln_gamma_alpha);
int main(int argc, char*argv[])
{
- int i,j, n=20, ndf=200, nprob=8, option=0;
- double df, chi2, d=1.0/n, prob[]={.005, .025, .1, .5, .90, .95, .99, .999};
+ int i,j, n=20, ndf=200, nprob=8, option=0, df;
+ double chi2, d=1.0/n, prob[]={.005, .025, .1, .5, .90, .95, .99, .999};
if (argc!=2 && argc!=3) {
printf ("\n\nChi-square critical values\n");
@@ -51,21 +51,21 @@ int main(int argc, char*argv[])
else if(argc==2) {
for (; ; ) {
printf ("\nd.f. & Chi^2 value (Ctrl-c to break)? ");
- scanf ("%lf%lf", &df, &chi2);
+ scanf ("%d%lf", &df, &chi2);
if(df<1 || chi2<0) break;
prob[0] = 1-CDFChi2(chi2,df);
- printf ("\ndf = %2.0f prob = %.9f = %.3e\n", df, prob[0], prob[0]);
+ printf ("\ndf = %2d prob = %.9f = %.3e\n", df, prob[0], prob[0]);
}
}
else if(argc==3) {
df = atoi(argv[1]);
chi2 = atof(argv[2]);
if(df<1 || chi2<0) {
- printf("df = %d ch2 = %.4f invalid", df, chi2);
+ printf("df = %2d ch2 = %.4f invalid", df, chi2);
exit(-1);
}
prob[0] = 1 - CDFChi2(chi2, df);
- printf ("\ndf = %2.0f prob = %.9f = %.3e\n", df, prob[0], prob[0]);
+ printf ("\ndf = %2d prob = %.9f = %.3e\n", df, prob[0], prob[0]);
}
printf ("\n");
return (0);
diff --git a/src/codeml.c b/src/codeml.c
index 186466f..16baf76 100644
--- a/src/codeml.c
+++ b/src/codeml.c
@@ -487,7 +487,7 @@ scanf("%d", &KGaussLegendreRule);
}
fflush(fout);
- if(com.seqtype==AAseq && com.model==Poisson && !com.print) {
+ if(com.seqtype==AAseq && com.model==Poisson && com.ngene<=1 && !com.print) {
PatternWeightJC69like();
fprintf(fout, "\n\nPrinting out site pattern counts\n");
printPatterns(fout);
diff --git a/src/evolver.c b/src/evolver.c
index 423eaa5..5f64be3 100644
--- a/src/evolver.c
+++ b/src/evolver.c
@@ -1,1315 +1,1315 @@
-/* evolver.c
- Copyright, Ziheng Yang, April 1995.
-
- cl -Ot -O2 evolver.c tools.c
- cl -Ot -O2 -DCodonNSbranches -FeevolverNSbranches.exe evolver.c tools.c
- cl -Ot -O2 -DCodonNSsites -FeevolverNSsites.exe evolver.c tools.c
- cl -Ot -O2 -DCodonNSbranchsites -FeevolverNSbranchsites.exe evolver.c tools.c
-
- cc -fast -o evolver evolver.c tools.c -lm
- cc -O4 -DCodonNSbranches -o evolverNSbranches evolver.c tools.c -lm
- cc -O4 -DCodonNSsites -o evolverNSsites evolver.c tools.c -lm
- cc -O4 -DCodonNSbranchsites -o evolverNSbranchsites evolver.c tools.c -lm
-
- evolver
- evolver 5 MCbase.dat
- evolver 6 MCcodon.dat
- evolver 7 MCaa.dat
- evolver 9 <TreesFile> <MasterTreeFile>
-*/
-
-/*
-#define CodonNSbranches
-#define CodonNSsites
-#define CodonNSbranchsites
-*/
-
-#include "paml.h"
-
-#define NS 5000
-#define NBRANCH (NS*2-2)
-#define MAXNSONS 20
-#define LSPNAME 50
-#define NCODE 64
-#define NCATG 40
-
-
-struct CommonInfo {
- unsigned char *z[2*NS-1];
- char spname[NS][LSPNAME+1], daafile[512], cleandata, readpattern;
- int ns, ls, npatt, np, ntime, ncode, clock, rooted, model, icode;
- int seqtype, *pose, ncatG, NSsites;
- int ngene, lgene[1], posG[1+1]; /* not used */
- double piG[1][4], rgene[1]; /* not used */
- double *fpatt, kappa, omega, alpha, pi[64], *conP, daa[20*20];
- double freqK[NCATG], rK[NCATG];
- char *siteID; /* used if ncatG>1 */
- double *siterates; /* rates for gamma or omega for site or branch-site models */
- double *omegaBS, *QfactorBS; /* omega IDs for branch-site models */
-} com;
-struct TREEB {
- int nbranch, nnode, root, branches[NBRANCH][2];
-} tree;
-struct TREEN {
- int father, nson, sons[MAXNSONS], ibranch;
- double branch, age, omega, label, *conP;
- char *nodeStr, fossil;
-} *nodes;
-
-extern char BASEs[];
-extern int GeneticCode[][64], noisy;
-int LASTROUND=0; /* not used */
-
-#define EVOLVER
-#define NODESTRUCTURE
-#define BIRTHDEATH
-#include "treesub.c"
-#include "treespace.c"
-
-void TreeDistances(FILE* fout);
-void Simulate(char *ctlf);
-void MakeSeq(char *z, int ls);
-int EigenQbase(double rates[], double pi[], double Root[],double U[], double V[],double Q[]);
-int EigenQcodon (int getstats, double kappa,double omega,double pi[], double Root[], double U[], double V[], double Q[]);
-int EigenQaa(double pi[], double Root[], double U[], double V[],double Q[]);
-void CladeMrBayesProbabilities (char treefile[]);
-int between_f_and_x(void);
-void LabelClades(FILE *fout);
-
-char *MCctlf0[]={"MCbase.dat","MCcodon.dat","MCaa.dat"};
-char *seqf[]={"mc.paml", "mc.paml", "mc.nex", "mc.nex"};
-
-enum {JC69, K80, F81, F84, HKY85, T92, TN93, REV} BaseModels;
-char *basemodels[]={"JC69","K80","F81","F84","HKY85","T92","TN93","REV"};
-enum {Poisson, EqualInput, Empirical, Empirical_F} AAModels;
-char *aamodels[]={"Poisson", "EqualInput", "Empirical", "Empirical_F"};
-
-
-double PMat[NCODE*NCODE], U[NCODE*NCODE], V[NCODE*NCODE], Root[NCODE];
-static double Qfactor=-1, Qrates[5]; /* Qrates[] hold kappa's for nucleotides */
-
-
-int main (int argc, char*argv[])
-{
- char *MCctlf=NULL, outf[512]="evolver.out", treefile[512]="mcmc.txt", mastertreefile[512]="\0";
- int i, option=-1, ntree=1,rooted, BD=0, gotoption=0, pick1tree=-1;
- double bfactor=1, birth=-1,death=-1,sample=-1,mut=-1, *space;
- FILE *fout=gfopen(outf,"w");
-
- printf("EVOLVER in %s\n", pamlVerStr);
- com.alpha=0; com.cleandata=1; com.model=0; com.NSsites=0;
-
- if(argc>2 && !strcmp(argv[argc-1], "--stdout-no-buf"))
- setvbuf(stdout, NULL, _IONBF, 0);
- if(argc>1) {
- gotoption=1; sscanf(argv[1], "%d", &option);
- }
- if(argc==1)
- printf("Results for options 1-4 & 8 go into %s\n",outf);
- else if(option!=5 && option!=6 && option!=7 && option!=9) {
- puts("Usage: \n\tevolver \n\tevolver option# MyDataFile"); exit(-1);
- }
- if(option>=4 && option<=6)
- MCctlf = argv[2];
- else if(option==9) {
- strcpy(treefile, argv[2]);
- if(argc>3) strcpy(mastertreefile, argv[3]);
- if(argc>4) sscanf(argv[4], "%d", &pick1tree);
- }
-
-#if defined (CodonNSbranches)
- option=6; com.model=1;
- MCctlf = (argc==3 ? argv[2] : "MCcodonNSbranches.dat");
- gotoption = 1;
-#elif defined (CodonNSsites)
- option=6; com.NSsites=3;
- MCctlf = (argc==3 ? argv[2] : "MCcodonNSsites.dat");
- gotoption = 1;
-#elif defined (CodonNSbranchsites)
- option=6; com.model=1; com.NSsites=3;
- MCctlf = (argc==3 ? argv[2] : "MCcodonNSbranchsites.dat");
- gotoption = 1;
-#endif
-
- if(!gotoption) {
- for(; ;) {
- fflush(fout);
- printf("\n\t(1) Get random UNROOTED trees?\n");
- printf("\t(2) Get random ROOTED trees?\n");
- printf("\t(3) List all UNROOTED trees?\n");
- printf("\t(4) List all ROOTED trees?\n");
- printf("\t(5) Simulate nucleotide data sets (use %s)?\n",MCctlf0[0]);
- printf("\t(6) Simulate codon data sets (use %s)?\n",MCctlf0[1]);
- printf("\t(7) Simulate amino acid data sets (use %s)?\n",MCctlf0[2]);
- printf("\t(8) Calculate identical bi-partitions between trees?\n");
- printf("\t(9) Calculate clade support values (evolver 9 treefile mastertreefile <pick1tree>)?\n");
- printf("\t(11) Label clades?\n");
- printf("\t(0) Quit?\n");
-
- option = 9;
- scanf("%d", &option);
-
- if(option==0) exit(0);
- if(option>=5 && option<=7) break;
- if(option<5) {
- printf ("No. of species: ");
- scanf ("%d", &com.ns);
- }
- if(com.ns>NS) error2 ("Too many species. Raise NS.");
- if((space=(double*)malloc(10000*sizeof(double)))==NULL) error2("oom");
- rooted = !(option%2);
- if(option<3) {
- printf("\nnumber of trees & random number seed? ");
- scanf("%d%d", &ntree, &i);
- SetSeed(i, 1);
- printf ("Want branch lengths from the birth-death process (0/1)? ");
- scanf ("%d", &BD);
- }
- if(option<=4) {
- if(com.ns<3) error2("no need to do this?");
- i = (com.ns*2-1)*sizeof(struct TREEN);
- if((nodes=(struct TREEN*)malloc(i)) == NULL)
- error2("oom");
- }
- switch (option) {
- case(1): /* random UNROOTED trees */
- case(2): /* random ROOTED trees */
- /* default names */
- if(com.ns<=52)
- for(i=0; i<com.ns; i++) sprintf(com.spname[i], "%c", (i<26 ? 'A'+i : 'a'+i-26));
- else
- for(i=0; i<com.ns; i++) sprintf(com.spname[i], "S%d", i+1);
-
- if(BD) {
- printf ("\nbirth rate, death rate, sampling fraction, and ");
- printf ("mutation rate (tree height)?\n");
- scanf ("%lf%lf%lf%lf", &birth, &death, &sample, &mut);
- }
- for(i=0;i<ntree;i++) {
- RandomLHistory (rooted, space);
- if(BD)
- BranchLengthBD (1, birth, death, sample, mut);
- if(com.ns<20&&ntree<10) { OutTreeN(F0, 0, BD); puts("\n"); }
- OutTreeN(fout, 1, BD); FPN(fout);
- }
- /*
- for (i=0; i<com.ns-2-!rooted; i++)
- Ib[i] = (int)((3.+i)*rndu());
- MakeTreeIb (com.ns, Ib, rooted);
- */
- break;
- case(3):
- case(4):
- ListTrees(fout, com.ns, rooted);
- break;
- case(8): TreeDistances(fout); break;
- case(9):
- printf("tree file names? ");
- scanf("%s%s", treefile, mastertreefile);
- break;
- case(10): between_f_and_x(); break;
- case(11): LabelClades(fout); break;
- default: exit(0);
- }
- }
- }
-
- if(option>=5 && option<=7) {
- com.seqtype = option-5; /* 0, 1, 2 for bases, codons, & amino acids */
- Simulate(MCctlf ? MCctlf : MCctlf0[option-5]);
- }
- else if(option==9) {
- CladeSupport(fout, treefile, 1, mastertreefile, pick1tree);
- /* CladeMrBayesProbabilities("/papers/BPPJC3sB/Karol.trees"); */
- }
- return(0);
-}
-
-
-int between_f_and_x (void)
-{
-/* this helps with the exponential transform for frequency parameters */
- int i,n,fromf=0;
- double x[100];
-
- for(;;) {
- printf("\ndirection (0:x=>f; 1:f=>x; -1:end) & #classes? ");
- scanf("%d",&fromf);
- if(fromf==-1) return(0);
- scanf("%d", &n); if(n>100) error2("too many classes");
- printf("input the first %d values for %s? ",n-1,(fromf?"f":"x"));
- FOR(i,n-1) scanf("%lf",&x[i]);
- x[n-1]=(fromf?1-sum(x,n-1):0);
- f_and_x(x, x, n, fromf, 1);
- matout(F0,x,1,n);
- }
-}
-
-
-void LabelClades(FILE *fout)
-{
-/* This reads in a tree and scan species names to check whether they form a
- paraphyletic group and then label the clade.
- It assumes that the tree is unrooted, and so goes through two rounds to check
- whether the remaining seqs form a monophyletic clade.
-*/
- FILE *ftree;
- int unrooted=1,iclade, sizeclade, mrca, paraphyl, is, imrca, i,j,k, lasts, haslength;
- char key[96]="A", treef[64]="/A/F/flu/HA.all.prankcodon.tre", *p,chosen[NS], *endstr="end";
- int *anc[NS-1], loc, bitmask, SI=sizeof(int)*8;
- int debug;
-
- printf("Tree file name? ");
- scanf ("%s", treef);
- printf("Treat tree as unrooted (0 no, 1 yes)? ");
- scanf ("%d", &unrooted);
-
- ftree = gfopen (treef,"r");
- fscanf (ftree, "%d%d", &com.ns, &j);
- if(com.ns<=0) error2("need ns in tree file");
- debug = (com.ns<20);
-
- i = (com.ns*2-1)*sizeof(struct TREEN);
- if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
- for(i=0; i<com.ns*2-1; i++) nodes[i].nodeStr = NULL;
- for(i=0; i<com.ns-1; i++) {
- anc[i] = (int*)malloc((com.ns/SI+1)*sizeof(int));
- if(anc[i]==NULL) error2("oom");
- }
- ReadTreeN(ftree, &haslength, &j, 1, 0);
- fclose(ftree);
- if(debug) { OutTreeN(F0, 1, PrNodeNum); FPN(F0); }
-
- for(iclade=0; iclade<com.ns-1; iclade++) {
- printf("\nString for selecting sequences (followed by non-digit) (end to end)? ");
- scanf("%s", key);
- if(strcmp(endstr, key) == 0)
- break;
- for(i=0; i<com.ns; i++)
- chosen[i] = '\0';
-
-
- k = strlen(key);
- for(i=0; i<com.ns; i++) {
- if( (p=strstr(com.spname[i], key))
- && !isdigit(p[k]) )
- chosen[i] = 1;
- }
-
- /*
- for(i=0; i<com.ns; i++)
- if(strstr(com.spname[i], key)) chosen[i] = 1;
- */
-
- /* look for MRCA, going through two rounds, assuming unrooted tree */
- for(imrca=0; imrca<1+unrooted; imrca++) {
- if(imrca)
- for(i=0; i<com.ns; i++) chosen[i] = 1 - chosen[i];
-
- for(i=0,sizeclade=0; i<com.ns; i++)
- if(chosen[i]) {
- sizeclade ++;
- lasts = i;
- }
-
- if(sizeclade <= 1 || sizeclade >= com.ns-1) {
- puts("unable to form a clade. <2 seqs.");
- break;
- }
- for(i=0; i<com.ns-1; i++) for(j=0; j<com.ns/SI+1; j++)
- anc[i][j] = 0;
- for(is=0; is<com.ns; is++) {
- if(chosen[is]==0) continue;
- loc = is/SI; bitmask = 1 << (is%SI);
- for(j=nodes[is].father; j!=-1; j=nodes[j].father) {
- anc[j-com.ns][loc] |= bitmask;
- if(is==lasts) {
- for(i=0,k=0; i<com.ns; i++)
- if(anc[j-com.ns][i/SI] & (1<<(i%SI)))
- k ++;
- if(k==sizeclade) {
- mrca = j; break;
- }
- }
- }
- }
- if(imrca==0 && mrca!=tree.root) /* 1st round is enough */
- break;
- }
-
- if(sizeclade <= 1 || sizeclade >= com.ns-1 || mrca==tree.root) {
- printf("Unable to label. Ignored.");
- continue;
- }
-
- if(debug)
- for(is=0; is<com.ns-1; is++) {
- printf("\nnode %4d: ", is+com.ns);
- for(j=0; j<com.ns; j++) {
- loc = j/SI; bitmask = 1 << (j%SI);
- printf(" %d", (anc[is][loc] & bitmask) != 0);
- }
- }
-
- printf("\nClade #%d (%s): %d seqs selected, MRCA is %d\n", iclade+1, key, sizeclade, mrca+1);
- for(is=0,paraphyl=0; is<com.ns; is++) {
- if(chosen[is] == 0)
- for(j=nodes[is].father; j!=-1; j=nodes[j].father)
- if(j==mrca) { paraphyl++; break; }
- }
- if(paraphyl)
- printf("\nThis clade is paraphyletic, & includes %d other sequences\n", paraphyl);
-
- nodes[mrca].label = iclade+1;
- if(debug) OutTreeN(F0, 1, haslength|PrLabel);
- }
-
- for(i=0; i<com.ns-1; i++) free(anc[i]);
- OutTreeN(fout, 1, haslength|PrLabel); FPN(fout);
- printf("Printed final tree with labels in evolver.out\n");
- exit(0);
-}
-
-void TreeDistanceDistribution (FILE* fout)
-{
-/* This calculates figure 3.7 of Yang (2006).
- This reads the file of all trees (such as 7s.all.trees), and calculates the
- distribution of partition distance in all pairwise comparisons.
-*/
- int i,j,ntree, k,*nib, nsame, IBsame[NS], lpart=0;
- char treef[64]="5s.all.trees", *partition;
- FILE *ftree;
- double mPD[NS], PD1[NS]; /* distribution of partition distances */
-
- puts("Tree file name?");
- scanf ("%s", treef);
-
- ftree=gfopen (treef,"r");
- fscanf (ftree, "%d%d", &com.ns, &ntree);
- printf("%2d sequences %2d trees.\n", com.ns, ntree);
- i=(com.ns*2-1)*sizeof(struct TREEN);
- if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
-
- lpart = (com.ns-1)*com.ns*sizeof(char);
- i = ntree*lpart;
- printf("\n%d bytes of space requested.\n", i);
- partition = (char*)malloc(i);
- nib = (int*)malloc(ntree*sizeof(int));
- if (partition==NULL || nib==NULL) error2("out of memory");
-
- puts("\ntree #: mean prop of tree pairs with 0 1 2 ... shared bipartitions\n");
- fputs("\ntree #: prop of tree pairs with 0 1 2 ... shared bipartitions\n",fout);
- for (i=0; i<ntree; i++) {
- ReadTreeN (ftree, &j, &k, 0, 1);
- nib[i]=tree.nbranch-com.ns;
- Tree2Partition(partition+i*lpart);
- }
- for(k=0; k<com.ns-3; k++) mPD[k]=0;
- for (i=0; i<ntree; i++,FPN(fout)) {
- for(k=0; k<com.ns-3; k++) PD1[k]=0;
- for (j=0; j<ntree; j++) {
- if(j==i) continue;
- nsame=NSameBranch(partition+i*lpart,partition+j*lpart, nib[i],nib[j],IBsame);
- PD1[nsame] ++;
- }
- for(k=0; k<com.ns-3; k++) PD1[k] /= (ntree-1.);
- for(k=0; k<com.ns-3; k++) mPD[k] = (mPD[k]*i+PD1[k])/(i+1.);
- printf("%8d (%5.1f%%):", i+1,(i+1.)/ntree*100);
- for(k=0; k<com.ns-3; k++) printf(" %7.4f", mPD[k]);
- fprintf(fout, "%8d:", i+1); for(k=0; k<com.ns-3; k++) fprintf(fout, " %7.4f", PD1[k]);
- printf("%s", (com.ns<8||(i+1)%100==0 ? "\n" : "\r"));
- }
- free(partition); free(nodes); free(nib); fclose(ftree);
- exit(0);
-}
-
-
-void TreeDistances (FILE* fout)
-{
-/* I think this is broken after i changed the routine Tree2Partition().
-*/
- int i,j,ntree, k,*nib, parti2B[NS], nsame, IBsame[NS],nIBsame[NS], lpart=0;
- char treef[64]="5s.all.trees", *partition;
- FILE *ftree;
- double psame, mp, vp;
-
- /*
- TreeDistanceDistribution(fout);
- */
-
- puts("\nNumber of identical bi-partitions between trees.\nTree file name?");
- scanf ("%s", treef);
-
- ftree=gfopen (treef,"r");
- fscanf (ftree, "%d%d", &com.ns, &ntree);
- printf("%2d sequences %2d trees.\n", com.ns, ntree);
- i=(com.ns*2-1)*sizeof(struct TREEN);
- if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
-
- if(ntree<2) error2("ntree");
- printf ("\n%d species, %d trees\n", com.ns, ntree);
- puts("\n\t1: first vs. rest?\n\t2: all pairwise comparisons?\n");
- k=2;
- scanf("%d", &k);
-
- lpart=(com.ns-1)*com.ns*sizeof(char);
- i=(k==1?2:ntree)*lpart;
- printf("\n%d bytes of space requested.\n", i);
- partition=(char*)malloc(i);
- nib=(int*)malloc(ntree*sizeof(int));
- if (partition==NULL || nib==NULL) error2("out of memory");
-
- if(k==2) { /* pairwise comparisons */
- fputs("Number of identical bi-partitions in pairwise comparisons\n",fout);
- for (i=0; i<ntree; i++) {
- ReadTreeN (ftree, &j, &k, 0, 1);
- nib[i]=tree.nbranch-com.ns;
- Tree2Partition(partition+i*lpart);
- }
- for (i=0; i<ntree; i++,FPN(F0),FPN(fout)) {
- printf("%2d (%2d):", i+1,nib[i]);
- fprintf(fout,"%2d (%2d):", i+1,nib[i]);
- for (j=0; j<i; j++) {
- nsame=NSameBranch(partition+i*lpart,partition+j*lpart, nib[i],nib[j],IBsame);
- printf(" %2d", nsame);
- fprintf(fout," %2d", nsame);
- }
- }
- }
- else { /* first vs. others */
- ReadTreeN (ftree, &j, &k, 0, 1);
- nib[0]=tree.nbranch-com.ns;
- if (nib[0]==0) error2("1st tree is a star tree..");
- Tree2Partition (partition);
- fputs ("Comparing the first tree with the others\nFirst tree:\n",fout);
- OutTreeN(fout,0,0); FPN(fout); OutTreeB(fout); FPN(fout);
- fputs ("\nInternal branches in the first tree:\n",fout);
- FOR(i,nib[0]) {
- k=parti2B[i];
- fprintf(fout,"%3d (%2d..%-2d): ( ",
- i+1,tree.branches[k][0]+1,tree.branches[k][1]+1);
- FOR(j,com.ns) if(partition[i*com.ns+j]) fprintf(fout,"%d ",j+1);
- fputs(")\n",fout);
- }
- if(nodes[tree.root].nson<=2)
- fputs("\nRooted tree, results may not be correct.\n",fout);
- fputs("\nCorrect internal branches compared with the 1st tree:\n",fout);
- FOR(k,nib[0]) nIBsame[k]=0;
- for (i=1,mp=vp=0; i<ntree; i++,FPN(fout)) {
- ReadTreeN (ftree, &j, &k, 0, 1);
- nib[1]=tree.nbranch-com.ns;
- Tree2Partition(partition+lpart);
- nsame=NSameBranch (partition,partition+lpart, nib[0],nib[1],IBsame);
-
- psame=nsame/(double)nib[0];
- FOR(k,nib[0]) nIBsame[k]+=IBsame[k];
- fprintf(fout,"1 vs. %3d: %4d: ", i+1,nsame);
- FOR(k,nib[0]) if(IBsame[k]) fprintf(fout," %2d", k+1);
- printf("1 vs. %5d: %6d/%d %10.4f\n", i+1,nsame,nib[0],psame);
- vp += square(psame - mp)*(i-1.)/i;
- mp=(mp*(i-1.) + psame)/i;
- }
- vp=(ntree<=2 ? 0 : sqrt(vp/((ntree-1-1)*(ntree-1.))));
- fprintf(fout,"\nmean and S.E. of proportion of identical partitions\n");
- fprintf(fout,"between the 1st and all the other %d trees ", ntree-1);
- fprintf(fout,"(ignore these if not revelant):\n %.4f +- %.4f\n", mp, vp);
- fprintf(fout,"\nNumbers of times, out of %d, ", ntree-1);
- fprintf(fout,"interior branches of tree 1 are present");
- fputs("\n(This may be bootstrap support for nodes in tree 1)\n",fout);
- FOR(k,nib[0]) {
- i=tree.branches[parti2B[k]][0]+1; j=tree.branches[parti2B[k]][1]+1;
- fprintf(fout,"%3d (%2d..%-2d): %6d (%5.1f%%)\n",
- k+1,i,j,nIBsame[k],nIBsame[k]*100./(ntree-1.));
- }
- }
- free(partition); free(nodes); free(nib); fclose(ftree);
-}
-
-
-
-int EigenQbase(double rates[], double pi[],
- double Root[],double U[],double V[],double Q[])
-{
-/* Construct the rate matrix Q[] for nucleotide model REV.
-*/
- int i,j,k;
- double mr, space[4];
-
- zero (Q, 16);
- for (i=0,k=0; i<3; i++) for (j=i+1; j<4; j++)
- if (i*4+j!=11) Q[i*4+j]=Q[j*4+i]=rates[k++];
- for (i=0,Q[3*4+2]=Q[2*4+3]=1; i<4; i++) FOR (j,4) Q[i*4+j] *= pi[j];
- for (i=0,mr=0; i<4; i++)
- { Q[i*4+i]=0; Q[i*4+i]=-sum(Q+i*4, 4); mr-=pi[i]*Q[i*4+i]; }
- abyx (1/mr, Q, 16);
-
- eigenQREV(Q, com.pi, 4, Root, U, V, space);
- return (0);
-}
-
-
-static double freqK_NS=-1;
-
-int EigenQcodon (int getstats, double kappa, double omega, double pi[],
- double Root[], double U[], double V[], double Q[])
-{
-/* Construct the rate matrix Q[].
- 64 codons are used, and stop codons have 0 freqs.
-*/
- int n=com.ncode, i,j,k, c[2],ndiff,pos=0,from[3],to[3];
- double mr, space[64];
-
- for(i=0; i<n*n; i++) Q[i] = 0;
- for (i=0; i<n; i++) FOR (j,i) {
- from[0]=i/16; from[1]=(i/4)%4; from[2]=i%4;
- to[0]=j/16; to[1]=(j/4)%4; to[2]=j%4;
- c[0]=GeneticCode[com.icode][i]; c[1]=GeneticCode[com.icode][j];
- if (c[0]==-1 || c[1]==-1) continue;
- for (k=0,ndiff=0; k<3; k++) if (from[k]!=to[k]) { ndiff++; pos=k; }
- if (ndiff!=1) continue;
- Q[i*n+j]=1;
- if ((from[pos]+to[pos]-1)*(from[pos]+to[pos]-5)==0) Q[i*n+j]*=kappa;
- if(c[0]!=c[1]) Q[i*n+j]*=omega;
- Q[j*n+i]=Q[i*n+j];
- }
- for(i=0; i<n; i++) for(j=0; j<n; j++)
- Q[i*n+j] *= com.pi[j];
- for(i=0,mr=0;i<n;i++) {
- Q[i*n+i] = -sum(Q+i*n,n);
- mr -= pi[i]*Q[i*n+i];
- }
-
- if(getstats)
- Qfactor += freqK_NS * mr;
- else {
- if(com.ncatG==0) FOR(i,n*n) Q[i]*=1/mr;
- else FOR(i,n*n) Q[i]*=Qfactor; /* NSsites models */
- eigenQREV(Q, com.pi, n, Root, U, V, space);
- }
- return (0);
-}
-
-
-
-int EigenQaa (double pi[], double Root[], double U[], double V[], double Q[])
-{
-/* Construct the rate matrix Q[]
-*/
- int n=20, i,j;
- double mr, space[20];
-
- FOR (i,n*n) Q[i]=0;
- switch (com.model) {
- case (Poisson) : case (EqualInput) :
- fillxc (Q, 1., n*n); break;
- case (Empirical) : case (Empirical_F):
- FOR(i,n) FOR(j,i) Q[i*n+j]=Q[j*n+i]=com.daa[i*n+j]/100;
- break;
- }
- FOR (i,n) FOR (j,n) Q[i*n+j]*=com.pi[j];
- for (i=0,mr=0; i<n; i++) {
- Q[i*n+i]=0; Q[i*n+i]=-sum(Q+i*n,n); mr-=com.pi[i]*Q[i*n+i];
- }
-
- eigenQREV(Q, com.pi, n, Root, U, V, space);
- FOR(i,n) Root[i]=Root[i]/mr;
-
- return (0);
-}
-
-
-int GetDaa (FILE* fout, double daa[])
-{
-/* Get the amino acid substitution rate matrix (grantham, dayhoff, jones, etc).
-*/
- FILE * fdaa;
- char aa3[4]="";
- int i,j, n=20;
-
- fdaa=gfopen(com.daafile, "r");
- printf("\nReading rate matrix from %s\n", com.daafile);
-
- for (i=0; i<n; i++) for (j=0,daa[i*n+i]=0; j<i; j++) {
- fscanf(fdaa, "%lf", &daa[i*n+j]);
- daa[j*n+i]=daa[i*n+j];
- }
- if (com.model==Empirical) {
- FOR(i,n) if(fscanf(fdaa,"%lf",&com.pi[i])!=1) error2("err aaRatefile");
- if (fabs(1-sum(com.pi,20))>1e-4) error2("\nSum of aa freq. != 1\n");
- }
- fclose (fdaa);
-
- if (fout) {
- fprintf (fout, "\n%s\n", com.daafile);
- FOR (i,n) {
- fprintf (fout, "\n%4s", getAAstr(aa3,i));
- FOR (j,i) fprintf (fout, "%5.0f", daa[i*n+j]);
- }
- FPN (fout);
- }
-
- return (0);
-}
-
-
-
-
-void MakeSeq(char*z, int ls)
-{
-/* generate a random sequence of nucleotides, codons, or amino acids by
- sampling com.pi[], or read the ancestral sequence from the file RootSeq.txt
- if the file exists.
-*/
- int i,j,h, n=com.ncode, ch, n31=(com.seqtype==1?3:1), lst;
- double p[64],r, small=1e-5;
- char *pch=(com.seqtype==2?AAs:BASEs);
- char rootseqf[]="RootSeq.txt", codon[4]=" ";
- FILE *fseq=(FILE*)fopen(rootseqf,"r");
- static int times=0;
-
- if(fseq) {
- if(times++==0) printf("Reading sequence at the root from file.\n\n");
- if(com.siterates && com.ncatG>1)
- error2("sequence for root doesn't work for site-class models");
-
- for(lst=0; ; ) {
- for(i=0; i<n31; i++) {
- while((ch=fgetc(fseq)) !=EOF && !isalpha(ch)) ;
- if(ch==EOF) error2("EOF when reading root sequence.");
- if(isalpha(ch))
- codon[i]=(char)(ch=CodeChara((char)ch, com.seqtype));
- }
- if(com.seqtype==1) ch = codon[0]*16 + codon[1]*4 + codon[2];
- if(ch<0 || ch>n-1)
- printf("error when reading site %d\n", lst+1);
- if(com.seqtype==1 && com.pi[ch]==0)
- printf("you seem to have a stop codon in the root sequence\n");
-
- z[lst++] = (char)ch;
- if(lst==com.ls) break;
- }
- fclose(fseq);
- }
- else {
- for(j=0; j<n; j++) p[j] = com.pi[j];
- for(j=1; j<n; j++) p[j] += p[j-1];
- if(fabs(p[n-1]-1) > small)
- { printf("\nsum pi = %.6f != 1!\n", p[n-1]); exit(-1); }
- for(h=0; h<com.ls; h++) {
- for(j=0,r=rndu();j<n-1;j++)
- if(r<p[j]) break;
- z[h] = (char)j;
- }
- }
-}
-
-
-
-void Evolve1 (int inode)
-{
-/* evolve sequence com.z[tree.root] along the tree to generate com.z[],
- using nodes[].branch, nodes[].omega, & com.model
- Needs com.z[0,1,...,nnode-1], while com.z[0] -- com.z[ns-1] constitute
- the data.
- For codon sequences, com.siterates[] has w's for NSsites and NSbranchsite models.
-*/
- int is, h,i,j, ison, from, n=com.ncode, longseq=100000;
- double t, rw;
-
- for (is=0; is<nodes[inode].nson; is++) {
- ison=nodes[inode].sons[is];
- memcpy(com.z[ison],com.z[inode],com.ls*sizeof(unsigned char));
- t=nodes[ison].branch;
-
- if(com.seqtype==1 && com.model && com.NSsites) { /* branch-site models */
- Qfactor = com.QfactorBS[ison];
- for(h=0; h<com.ls; h++)
- com.siterates[h] = com.omegaBS[ison*com.ncatG+com.siteID[h]];
- }
-
- for(h=0; h<com.ls; h++) {
- /* decide whether to recalcualte PMat[]. */
- if (h==0 || (com.siterates && com.siterates[h]!=com.siterates[h-1])) {
- rw = (com.siterates?com.siterates[h]:1);
-
- switch(com.seqtype) {
- case (BASEseq):
- if(com.model<=TN93)
- PMatTN93(PMat, t*Qfactor*rw*Qrates[0],
- t*Qfactor*rw*Qrates[1], t*Qfactor*rw, com.pi);
- else if(com.model==REV)
- PMatUVRoot(PMat, t*rw, com.ncode, U,V,Root);
- break;
-
- case (CODONseq): /* Watch out for NSsites model */
- if(com.model || com.NSsites) { /* no need to update UVRoot if M0 */
- if(com.model && com.NSsites==0) /* branch */
- rw = nodes[ison].omega; /* should be equal to com.rK[nodes[].label] */
-
- EigenQcodon(0, com.kappa, rw, com.pi, Root, U, V, PMat);
- }
- PMatUVRoot(PMat, t, com.ncode, U, V, Root);
- break;
-
- case (AAseq):
- PMatUVRoot(PMat, t*rw, com.ncode, U, V, Root);
- break;
- }
- for(i=0; i<n; i++)
- for(j=1;j<n;j++)
- PMat[i*n+j] += PMat[i*n+j-1];
- }
- for(j=0,from=com.z[ison][h],rw=rndu(); j<n-1; j++)
- if(rw < PMat[from*n+j]) break;
- com.z[ison][h] = j;
- }
-
- if(com.ls>longseq) printf("\r nodes %2d -> %2d, evolving . . ", inode+1, ison+1);
-
- if(nodes[ison].nson) Evolve1(ison);
- } /* for (is) */
-
- if(inode==tree.root && com.ls>longseq) printf("\r%s", strc(50,' '));
-}
-
-
-
-void Simulate (char *ctlf)
-{
-/* simulate nr data sets of nucleotide, codon, or AA sequences.
- ls: number of nucleotides, codons, or AAs in each sequence.
- All 64 codons are used for codon sequences.
- When com.alpha or com.ncatG>1, sites are randomized after sequences are
- generated.
- space[com.ls] is used to hold site marks.
- format: 0: paml sites; 1: paml patterns; 2: paup nex; 3: paup JC69 format
- */
- char *ancf="ancestral.txt", *siteIDf="siterates.txt";
- FILE *fin, *fseq, *ftree=NULL, *fanc=NULL, *fsiteID=NULL;
- char *paupstart="paupstart",*paupblock="paupblock",*paupend="paupend";
- char line[32000];
- int lline=32000, i,j,k, ir,n,nr, fixtree=1, sspace=10000, rooted=1;
- int h=0,format=0, b[3]={0}, nrate=1, counts[NCATG];
- int *siteorder=NULL;
- char *tmpseq=NULL, *pc;
- double birth=0, death=0, sample=1, mut=1, tlength, *space, *blengthBS;
- double T,C,A,G,Y,R, Falias[NCATG];
- int Lalias[NCATG];
-
- noisy = 1;
- printf("\nReading options from data file %s\n", ctlf);
- com.ncode = n = (com.seqtype==0 ? 4 : (com.seqtype==1?64:20));
- fin = (FILE*)gfopen(ctlf,"r");
- fscanf(fin, "%d", &format);
- fgets(line, lline, fin);
- printf("\nSimulated data will go into %s.\n", seqf[format]);
- if(format==2) printf("%s, %s, & %s will be appended if existent.\n", paupstart,paupblock,paupend);
-
- fscanf (fin, "%d", &i);
- fgets(line, lline, fin);
- SetSeed(i, 1);
- fscanf (fin, "%d%d%d", &com.ns, &com.ls, &nr);
- fgets(line, lline, fin);
- i=(com.ns*2-1)*sizeof(struct TREEN);
- if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
-
- if(com.ns>NS) error2("too many seqs?");
- printf ("\n%d seqs, %d sites, %d replicate(s)\n", com.ns, com.ls, nr);
- k=(com.ns*com.ls* (com.seqtype==CODONseq?4:1) *nr)/1000+1;
- printf ("Seq file will be about %dK bytes.\n",k);
- for(i=0; i<com.ns; i++) /* default spname */
- sprintf(com.spname[i],"S%d",i+1);
-
- if(fixtree) {
- fscanf(fin, "%lf", &tlength); fgets(line, lline, fin);
- if(ReadTreeN(fin, &i, &j, 1, 1)) /* might overwrite spname */
- error2("err tree..");
-
- if(i==0) error2("use : to specify branch lengths in tree");
- for(i=0,T=0; i<tree.nnode; i++)
- if(i!=tree.root) T += nodes[i].branch;
- if(tlength>0) {
- for(i=0; i<tree.nnode; i++)
- if(i!=tree.root) nodes[i].branch *= tlength/T;
- }
- printf("tree length = %.3f\n", (tlength>0?tlength:T));
- if(com.ns<100) {
- printf("\nModel tree & branch lengths:\n");
- OutTreeN(F0,1,1); FPN(F0);
- OutTreeN(F0,0,1); FPN(F0);
- }
- if(com.seqtype==CODONseq && com.model && !com.NSsites) { /* branch model */
- FOR(i,tree.nnode) nodes[i].omega=nodes[i].label;
- FPN(F0); OutTreeN(F0, 1, PrBranch|PrLabel); FPN(F0);
- }
- }
- else { /* random trees, broken or need testing? */
- printf ("\nbirth rate, death rate, sampling fraction, mutation rate (tree height)?\n");
- fscanf (fin, "%lf%lf%lf%lf", &birth, &death, &sample, &mut);
- fgets(line, lline, fin);
- printf("%9.4f %9.4f %9.4f %9.4f\n", birth, death, sample, mut);
- }
-
- if(com.seqtype==BASEseq) {
- fscanf(fin,"%d", &com.model);
- fgets(line, lline, fin);
- if(com.model<0 || com.model>REV) error2("model err");
- if(com.model==T92) error2("T92: please use HKY85 with T=A and C=G.");
-
- printf("\nModel: %s\n", basemodels[com.model]);
- if(com.model==REV) nrate=5;
- else if(com.model==TN93) nrate=2;
- FOR(i,nrate) fscanf(fin,"%lf",&Qrates[i]);
- fgets(line, lline, fin);
- if(nrate<=2) FOR(i,nrate) printf("kappa %9.5f\n",Qrates[i]); FPN(F0);
- if(nrate==5) {
- printf("a & b & c & d & e: ");
- FOR(i,nrate) printf("%9.5f",Qrates[i]); FPN(F0);
- }
- if((com.model==JC69 || com.model==F81)&&Qrates[0]!=1)
- error2("kappa should be 1 for this model");
- }
- else if(com.seqtype==CODONseq) {
- for(i=0; i<64; i++)
- getcodon(CODONs[i], i);
- if(com.model==0 && com.NSsites) { /* site model */
- fscanf(fin,"%d", &com.ncatG); fgets(line, lline, fin);
- if(com.ncatG>NCATG) error2("ncatG>NCATG");
- FOR(i,com.ncatG) fscanf(fin,"%lf",&com.freqK[i]); fgets(line, lline, fin);
- FOR(i,com.ncatG) fscanf(fin,"%lf",&com.rK[i]); fgets(line, lline, fin);
- printf("\n\ndN/dS (w) for site classes (K=%d)", com.ncatG);
- printf("\nf: "); FOR(i,com.ncatG) printf("%9.5f",com.freqK[i]);
- printf("\nw: "); FOR(i,com.ncatG) printf("%9.5f",com.rK[i]); FPN(F0);
- }
- else if(com.model && com.NSsites) { /* branchsite model */
- fscanf(fin,"%d",&com.ncatG); fgets(line, lline, fin);
- if(com.ncatG>min2(NCATG,127)) error2("ncatG too large");
- FOR(i,com.ncatG) fscanf(fin,"%lf",&com.freqK[i]); fgets(line,lline,fin);
- printf("\n%d site classes.\nFreqs: ", com.ncatG);
- FOR(i,com.ncatG) printf("%9.5f",com.freqK[i]);
-
- if((com.omegaBS=(double*)malloc((com.ncatG+2)*tree.nnode*sizeof(double)))==NULL)
- error2("oom");
- com.QfactorBS = com.omegaBS + com.ncatG*tree.nnode;
- blengthBS = com.QfactorBS + tree.nnode;
-
- for(i=0; i<tree.nnode; i++)
- blengthBS[i] = nodes[i].branch;
- for(k=0; k<com.ncatG; k++) {
- ReadTreeN(fin, &i, &j, 0, 1);
- if(i) error2("do not include branch lengths except in the first tree.");
- if(!j) error2("Use # to specify omega's for branches");
- for(i=0; i<tree.nnode; i++) com.omegaBS[i*com.ncatG+k]=nodes[i].label;
- }
- for(i=0; i<tree.nnode; i++)
- { nodes[i].branch=blengthBS[i]; nodes[i].label=nodes[i].omega=0; }
- for(i=0; i<tree.nnode; i++) { /* print out omega as node labels. */
- nodes[i].nodeStr=pc=(char*)malloc(20*com.ncatG*sizeof(char));
- sprintf(pc, "'[%.2f", com.omegaBS[i*com.ncatG+0]);
- for(k=1,pc+=strlen(pc); k<com.ncatG; k++,pc+=strlen(pc))
- sprintf(pc, ", %.2f", com.omegaBS[i*com.ncatG+k]);
- sprintf(pc, "]'");
- }
- FPN(F0); OutTreeN(F0,1,PrBranch|PrLabel); FPN(F0);
- }
- else if(com.model==0) { /* M0 */
- fscanf(fin,"%lf",&com.omega);
- fgets(line, lline, fin);
- printf("omega = %9.5f\n",com.omega);
- for(i=0; i<tree.nbranch; i++)
- nodes[tree.branches[i][1]].omega = com.omega;
- }
-
- fscanf(fin, "%lf", &com.kappa); fgets(line, lline, fin);
- printf("kappa = %9.5f\n",com.kappa);
- }
-
- if(com.seqtype==BASEseq || com.seqtype==AAseq) {
- fscanf(fin,"%lf%d", &com.alpha, &com.ncatG);
- fgets(line, lline, fin);
- if(com.alpha)
- printf("Gamma rates, alpha =%.4f (K=%d)\n", com.alpha, com.ncatG);
- else {
- com.ncatG=0;
- puts("Rates are constant over sites.");
- }
- }
- if(com.alpha || com.ncatG) { /* this is used for codon NSsites as well. */
- k = com.ls;
- if(com.seqtype==1 && com.model && com.NSsites) k *= tree.nnode;
- if((com.siterates=(double*)malloc(k*sizeof(double)))==NULL) error2("oom1");
- if((siteorder=(int*)malloc(com.ls*sizeof(int)))==NULL) error2("oom2");
- }
-
- if(com.seqtype==AAseq) { /* get aa substitution model and rate matrix */
- fscanf(fin,"%d",&com.model);
- printf("\nmodel: %s",aamodels[com.model]);
- if(com.model>=2) { fscanf(fin,"%s",com.daafile); GetDaa(NULL,com.daa); }
- fgets(line, lline, fin);
- }
-
- /* get freqs com.pi[] */
- if((com.seqtype==BASEseq && com.model>K80) ||
- com.seqtype==CODONseq ||
- (com.seqtype==AAseq && (com.model==1 || com.model==3)))
- for(k=0; k<com.ncode; k++) fscanf(fin,"%lf", &com.pi[k]);
- else if(com.model==0 || (com.seqtype==BASEseq && com.model<=K80))
- fillxc(com.pi, 1./com.ncode, com.ncode);
-
- printf("sum pi = 1 = %.6f:", sum(com.pi,com.ncode));
- matout2(F0, com.pi, com.ncode/4, 4, 9, 6);
- if(com.seqtype==CODONseq) {
- fscanf(fin, "%d", &com.icode); fgets(line, lline, fin);
- printf("genetic code = %d\n", com.icode);
- for(k=0; k<com.ncode; k++)
- if(GeneticCode[com.icode][k] == -1 && com.pi[k])
- error2("stop codons should have frequency 0?");
- }
-
- if(com.seqtype==BASEseq) {
- if(com.model<REV) {
- T=com.pi[0]; C=com.pi[1]; A=com.pi[2]; G=com.pi[3]; Y=T+C; R=A+G;
- if (com.model==F84) {
- Qrates[1]=1+Qrates[0]/R; /* kappa2 */
- Qrates[0]=1+Qrates[0]/Y; /* kappa1 */
- }
- else if (com.model<=HKY85) Qrates[1]=Qrates[0];
- Qfactor = 1/(2*T*C*Qrates[0] + 2*A*G*Qrates[1] + 2*Y*R);
- }
- else
- if(com.model==REV) EigenQbase(Qrates, com.pi, Root,U,V,PMat);
- }
-
- /* get Qfactor for NSsites & NSbranchsite models */
- if(com.seqtype==CODONseq && com.NSsites) {
- if(!com.model) { /* site models */
- for(k=0,Qfactor=0; k<com.ncatG; k++) {
- freqK_NS=com.freqK[k];
- EigenQcodon(1, com.kappa,com.rK[k],com.pi, NULL,NULL,NULL, PMat);
- }
- Qfactor=1/Qfactor;
- printf("Qfactor for NSsites model = %9.5f\n", Qfactor);
- }
- else { /* branch-site models */
- for(i=0; i<tree.nnode; i++) {
- if(i==tree.root) { com.QfactorBS[i]=-1; continue; }
- for(k=0,Qfactor=0; k<com.ncatG; k++) {
- freqK_NS=com.freqK[k];
- EigenQcodon(1, com.kappa,com.omegaBS[i*com.ncatG+k],com.pi, NULL,NULL,NULL, PMat);
- }
- com.QfactorBS[i]=1/Qfactor; Qfactor=0;
- printf("node %2d: Qfactor = %9.5f\n", i+1, com.QfactorBS[i]);
- }
- }
- }
- if(com.seqtype==CODONseq && com.ncatG<=1 && com.model==0)
- EigenQcodon(0, com.kappa,com.omega, com.pi, Root, U, V, PMat);
- else if(com.seqtype==AAseq)
- EigenQaa(com.pi, Root, U, V,PMat);
-
- puts("\nAll parameters are read. Ready to simulate\n");
- for(j=0; j<com.ns*2-1; j++)
- com.z[j] = (unsigned char*)malloc(com.ls*sizeof(unsigned char));
- sspace = max2(sspace, 8000000);
- space = (double*)malloc(sspace);
- if(com.alpha || com.ncatG) tmpseq=(char*)space;
- if (com.z[com.ns*2-1-1]==NULL) error2("oom for seqs");
- if (space==NULL) {
- printf("oom for space, %d bytes needed.", sspace);
- exit(-1);
- }
-
- fseq = gfopen(seqf[format], "w");
- if(format==2 || format==3) appendfile(fseq, paupstart);
-
- fanc = (FILE*)gfopen(ancf, "w");
- if(fixtree) {
- fputs("\nAncestral sequences generated during simulation ",fanc);
- fprintf(fanc, "(check against %s)\n", seqf[format]);
- OutTreeN(fanc,0,0); FPN(fanc); OutTreeB(fanc); FPN(fanc);
- }
- if(com.alpha || com.NSsites) {
- fsiteID=(FILE*)gfopen(siteIDf,"w");
- if(com.seqtype==1) fprintf(fsiteID, "\nSite class IDs\n");
- else fprintf(fsiteID, "\nRates for sites\n");
- if(com.seqtype==CODONseq && com.NSsites) {
- if(!com.model) matout(fsiteID,com.rK, 1,com.ncatG);
- if((com.siteID=(char*)malloc(com.ls*sizeof(char)))==NULL)
- error2("oom siteID");
- }
- }
-
- for (ir=0; ir<nr; ir++) {
- if (!fixtree) { /* right now tree is fixed */
- RandomLHistory (rooted, space);
- if (rooted && com.ns<10) j = GetIofLHistory ();
- BranchLengthBD (1, birth, death, sample, mut);
- if(com.ns<20) {
- printf ("\ntree used: ");
- OutTreeN(F0,1,1);
- FPN(F0);
- }
- }
- MakeSeq(com.z[tree.root], com.ls);
-
- if (com.alpha)
- Rates4Sites(com.siterates, com.alpha, com.ncatG, com.ls, 0,space);
- else if(com.seqtype==1 && com.NSsites) { /* for NSsites */
- /* the table for the alias algorithm is the same, but ncatG is small. */
- MultiNomialAliasSetTable(com.ncatG, com.freqK, Falias, Lalias, space);
- MultiNomialAlias(com.ls, com.ncatG, Falias, Lalias, counts);
-
- for (i=0,h=0; i<com.ncatG; i++)
- for (j=0; j<counts[i]; j++) {
- com.siteID[h]=(char)i;
- com.siterates[h++]=com.rK[i]; /* overwritten later for branchsite */
- }
- }
-
- Evolve1(tree.root);
-
- /* randomize sites for site-class model */
- if(com.siterates && com.ncatG>1) {
- if(format==1 && ir==0)
- puts("\nrequested site pattern counts as output for site-class model.\n");
- randorder(siteorder, com.ls, (int*)space);
- for(j=0; j<tree.nnode; j++) {
- memcpy(tmpseq,com.z[j],com.ls*sizeof(char));
- for(h=0; h<com.ls; h++) com.z[j][h]=tmpseq[siteorder[h]];
- }
- if(com.alpha || com.ncatG>1) {
- memcpy(space,com.siterates,com.ls*sizeof(double));
- for(h=0; h<com.ls; h++) com.siterates[h]=space[siteorder[h]];
- }
- if(com.siteID) {
- memcpy((char*)space,com.siteID,com.ls*sizeof(char));
- for(h=0; h<com.ls; h++) com.siteID[h]=*((char*)space+siteorder[h]);
- }
- }
-
- /* print sequences*/
- if(format==1 || format==3) {
- for(i=0; i<com.ns; i++) for(h=0; h<com.ls; h++) com.z[i][h] ++; /* coded as 1, 2, ... */
- PatternWeightSimple();
- for(i=0; i<com.ns; i++) for(h=0; h<com.npatt; h++) com.z[i][h] --; /* coded as 0, 1, ... */
- if(format==3)
- PatternWeightJC69like();
- }
- if(format==2 || format==3) fprintf(fseq,"\n\n[Replicate # %d]\n", ir+1);
- printSeqs(fseq, NULL, NULL, format); /* printsma not usable as it codes into 0,1,...,60. */
-
- if((format==2 || format==3) && !fixtree) {
- fprintf(fseq,"\nbegin tree;\n tree true_tree = [&U] ");
- OutTreeN(fseq,1,1); fputs(";\n",fseq);
- fprintf(fseq,"end;\n\n");
- }
- if(format==2 || format==3) appendfile(fseq, paupblock);
-
- /* print ancestral seqs, rates for sites. */
- if(format!=1 && format!=3) { /* don't print ancestors if site patterns are printed. */
- j = (com.seqtype==CODONseq?3*com.ls:com.ls);
- fprintf(fanc,"[replicate %d]\n",ir+1);
-
- if(!fixtree) {
- if(format<2)
- { OutTreeN(fanc,1,1); FPN(fanc); FPN(fanc); }
- }
- else {
- fprintf(fanc,"%6d %6d\n",tree.nnode-com.ns,j);
- for(j=com.ns; j<tree.nnode; j++,FPN(fanc)) {
- fprintf(fanc,"node%-26d ", j+1);
- print1seq(fanc, com.z[j], com.ls, NULL);
- }
- FPN(fanc);
-
- if(fsiteID) {
- if(com.seqtype==CODONseq && com.NSsites && com.model==0) { /* site model */
- k=0;
- if(com.rK[com.ncatG-1]>1)
- FOR(h,com.ls) if(com.rK[com.siteID[h]]>1) k++;
- fprintf(fsiteID, "\n[replicate %d: %2d]\n",ir+1, k);
- if(k) for(h=0,k=0; h<com.ls; h++) {
- if(com.rK[com.siteID[h]]>1) {
- fprintf(fsiteID,"%4d ",h+1);
- if(++k%15==0) FPN(fsiteID);
- }
- }
- FPN(fsiteID);
- }
- else if(com.seqtype==CODONseq && com.NSsites && com.model) { /* branchsite */
- fprintf(fsiteID, "\n[replicate %d]\n",ir+1);
- for(h=0; h<com.ls; h++) {
- fprintf(fsiteID," %4d ", com.siteID[h]+1);
- if(h==com.ls-1 || (h+1)%15==0) FPN(fsiteID);
- }
- }
- else { /* gamma rates */
- fprintf(fsiteID,"\n[replicate %d]\n",ir+1);
- for(h=0; h<com.ls; h++) {
- fprintf(fsiteID,"%7.4f ",com.siterates[h]);
- if(h==com.ls-1 || (h+1)%10==0) FPN(fsiteID);
- }
- }
- }
- }
- }
-
- printf ("\rdid data set %d %s", ir+1, (com.ls>100000||nr<100? "\n" : ""));
- } /* for (ir) */
- if(format==2 || format==3) appendfile(fseq, paupend);
-
- fclose(fseq); if(!fixtree) fclose(fanc);
- if(com.alpha || com.NSsites) fclose(fsiteID);
- for(j=0; j<com.ns*2-1; j++) free(com.z[j]);
- free(space);
- if(com.model && com.NSsites) /* branch-site model */
- for(i=0; i<tree.nnode; i++) free(nodes[i].nodeStr);
- free(nodes);
- if(com.alpha || com.ncatG) {
- free(com.siterates); com.siterates=NULL;
- free(siteorder);
- if(com.siteID) free(com.siteID); com.siteID=NULL;
- }
- if(com.seqtype==1 && com.model && com.NSsites) free(com.omegaBS);
- com.omegaBS = NULL;
-
- exit (0);
-}
-
-
-int GetSpnamesFromMB (FILE *fmb, char line[], int lline)
-{
-/* This reads species names from MrBayes output file fmb, like the following.
-
- Taxon 1 -> 1_Arabidopsis_thaliana
- Taxon 2 -> 2_Taxus_baccata
-*/
- int j, ispecies;
- char *p=NULL, *mbstr1="Taxon ", *mbstr2="->";
-
- puts("Reading species names from mb output file.\n");
- rewind(fmb);
- for(ispecies=0; ; ) {
- if(fgets(line, lline, fmb)==NULL) return(-1);
- if(strstr(line, mbstr1) && strstr(line, mbstr2)) {
- p=strstr(line, mbstr1)+5;
- sscanf(p, "%d", &ispecies);
- p=strstr(line, mbstr2)+3;
- if(com.spname[ispecies-1][0])
- error2("species name already read?");
-
- for(j=0; isgraph(*p)&&j<lline; ) com.spname[ispecies-1][j++] = *p++;
- com.spname[ispecies-1][j]=0;
-
- printf("\tTaxon %2d: %s\n", ispecies, com.spname[ispecies-1]);
- }
- else if (ispecies)
- break;
- }
- com.ns=ispecies;
- rewind(fmb);
-
- return(0);
-}
-
-char *GrepLine (FILE*fin, char*query, char* line, int lline)
-{
-/* This greps infile to search for query[], and returns NULL or line[].
-*/
- char *p=NULL;
-
- rewind(fin);
- for( ; ; ) {
- if(fgets(line, lline, fin)==NULL) return(NULL);
- if(strstr(line, query)) return(line);
- }
- return(NULL);
-}
-
-
-void CladeMrBayesProbabilities (char treefile[])
-{
-/* This reads a tree from treefile and then scans a set of MrBayes output files
- (mbfiles) to retrieve posterior probabilities for every clade in that tree.
- It first scans the first mb output file to get the species names.
-
- Sample mb output:
- 6 -- ...........................************* 8001 1.000 0.005 (0.000)
- 7 -- ....................******************** 8001 1.000 0.006 (0.000)
-
- Note 4 Jan 2014: This uses parti2B[], and is broken after i rewrote
- Tree2Partition().
-*/
- int lline=100000, i,j,k, nib, inode, parti2B[NS];
- char line[100000], *partition, *p;
- char symbol[2]=".*", cladestr[NS+1]={0};
- FILE *ftree, *fmb[20];
- double *Pclade, t;
-/*
- int nmbfiles=15;
- char *mbfiles[]={"mb-1e-5.out", "mb-2e-5.out", "mb-3e-5.out", "mb-4e-5.out",
-"mb-5e-5.out", "mb-6e-5.out", "mb-7e-5.out", "mb-8e-5.out",
-"mb-9e-5.out", "mb-1e-4.out", "mb-2e-4.out", "mb-3e-4.out",
-"mb-5e-4.out", "mb-1e-3.out", "mb-1e-2.out"};
-*/
- int nmbfiles=2;
- char *mbfiles[]={"mb-1e-4.out", "mb-1e-1.out"};
-
- printf("tree file is %s\nmb output files:\n", treefile);
- ftree=gfopen(treefile,"r");
- for(k=0; k<nmbfiles; k++)
- fmb[k]=gfopen(mbfiles[k],"r");
- for(k=0; k<nmbfiles; k++) printf("\t%s\n", mbfiles[k]);
-
- GetSpnamesFromMB(fmb[0], line, lline); /* read species names from mb output */
-
- fscanf (ftree, "%d%d", &i, &k);
- if(i && i!=com.ns) error2("do you mean to specify ns in the tree file?");
- i=(com.ns*2-1)*sizeof(struct TREEN);
- if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
- ReadTreeN (ftree, &i, &j, 0, 1);
-
- FPN(F0); OutTreeN(F0, 0, 0); FPN(F0); FPN(F0);
- nib=tree.nbranch-com.ns;
- for(i=0;i<tree.nnode;i++) {
- nodes[i].nodeStr = NULL;
- if(i>com.ns) nodes[i].nodeStr=(char*)malloc(100*sizeof(char));
- }
-
- partition=(char*)malloc(nib*com.ns*sizeof(char));
- if (partition==NULL) error2("oom");
- if((Pclade=(double*)malloc(nib*nmbfiles*sizeof(double)))==NULL)
- error2("oom");
- for(i=0;i<nib*nmbfiles; i++) Pclade[i]=0;
-
- Tree2Partition(partition);
-
- for(i=0; i<nib; i++) {
- inode=tree.branches[parti2B[i]][1];
- if(partition[i*com.ns+0])
- for(j=0; j<com.ns; j++) cladestr[j]=symbol[1-partition[i*com.ns+j]];
- else
- for(j=0; j<com.ns; j++) cladestr[j]=symbol[partition[i*com.ns+j]];
- printf("#%2d branch %2d node %2d %s", i+1, parti2B[i], inode, cladestr);
-
- for(k=0; k<nmbfiles; k++) {
- if(GrepLine(fmb[k], cladestr, line, lline)) {
- p=strstr(line,cladestr);
- sscanf(p+com.ns, "%lf%lf\0", &t, &Pclade[i*nmbfiles+k]);
- }
- }
- for(k=0; k<nmbfiles; k++) printf("%6.2f", Pclade[i*nmbfiles+k]);
- FPN(F0);
- for(k=0,p=nodes[inode].nodeStr; k<nmbfiles; k++) {
- sprintf(p, "%3.0f%s", Pclade[i*nmbfiles+k]*100,(k<nmbfiles-1?"/":""));
- p+=4;
- }
- }
- FPN(F0); OutTreeN(F0,1,PrLabel); FPN(F0);
-
- for(i=0; i<tree.nnode; i++) free(nodes[i].nodeStr);
- free(nodes); free(partition); free(Pclade);
- fclose(ftree);
- for(k=0; k<nmbfiles; k++) fclose(fmb[k]);
- exit(0);
-}
+/* evolver.c
+ Copyright, Ziheng Yang, April 1995.
+
+ cl -Ot -O2 evolver.c tools.c
+ cl -Ot -O2 -DCodonNSbranches -FeevolverNSbranches.exe evolver.c tools.c
+ cl -Ot -O2 -DCodonNSsites -FeevolverNSsites.exe evolver.c tools.c
+ cl -Ot -O2 -DCodonNSbranchsites -FeevolverNSbranchsites.exe evolver.c tools.c
+
+ cc -fast -o evolver evolver.c tools.c -lm
+ cc -O4 -DCodonNSbranches -o evolverNSbranches evolver.c tools.c -lm
+ cc -O4 -DCodonNSsites -o evolverNSsites evolver.c tools.c -lm
+ cc -O4 -DCodonNSbranchsites -o evolverNSbranchsites evolver.c tools.c -lm
+
+ evolver
+ evolver 5 MCbase.dat
+ evolver 6 MCcodon.dat
+ evolver 7 MCaa.dat
+ evolver 9 <TreesFile> <MasterTreeFile>
+*/
+
+/*
+#define CodonNSbranches
+#define CodonNSsites
+#define CodonNSbranchsites
+*/
+
+#include "paml.h"
+
+#define NS 5000
+#define NBRANCH (NS*2-2)
+#define MAXNSONS 20
+#define LSPNAME 50
+#define NCODE 64
+#define NCATG 40
+
+
+struct CommonInfo {
+ unsigned char *z[2*NS-1];
+ char spname[NS][LSPNAME+1], daafile[512], cleandata, readpattern;
+ int ns, ls, npatt, np, ntime, ncode, clock, rooted, model, icode;
+ int seqtype, *pose, ncatG, NSsites;
+ int ngene, lgene[1], posG[1+1]; /* not used */
+ double piG[1][4], rgene[1]; /* not used */
+ double *fpatt, kappa, omega, alpha, pi[64], *conP, daa[20*20];
+ double freqK[NCATG], rK[NCATG];
+ char *siteID; /* used if ncatG>1 */
+ double *siterates; /* rates for gamma or omega for site or branch-site models */
+ double *omegaBS, *QfactorBS; /* omega IDs for branch-site models */
+} com;
+struct TREEB {
+ int nbranch, nnode, root, branches[NBRANCH][2];
+} tree;
+struct TREEN {
+ int father, nson, sons[MAXNSONS], ibranch;
+ double branch, age, omega, label, *conP;
+ char *nodeStr, fossil;
+} *nodes;
+
+extern char BASEs[];
+extern int GeneticCode[][64], noisy;
+int LASTROUND=0; /* not used */
+
+#define EVOLVER
+#define NODESTRUCTURE
+#define BIRTHDEATH
+#include "treesub.c"
+#include "treespace.c"
+
+void TreeDistances(FILE* fout);
+void Simulate(char *ctlf);
+void MakeSeq(char *z, int ls);
+int EigenQbase(double rates[], double pi[], double Root[],double U[], double V[],double Q[]);
+int EigenQcodon (int getstats, double kappa,double omega,double pi[], double Root[], double U[], double V[], double Q[]);
+int EigenQaa(double pi[], double Root[], double U[], double V[],double Q[]);
+void CladeMrBayesProbabilities (char treefile[]);
+int between_f_and_x(void);
+void LabelClades(FILE *fout);
+
+char *MCctlf0[]={"MCbase.dat","MCcodon.dat","MCaa.dat"};
+char *seqf[]={"mc.paml", "mc.paml", "mc.nex", "mc.nex"};
+
+enum {JC69, K80, F81, F84, HKY85, T92, TN93, REV} BaseModels;
+char *basemodels[]={"JC69","K80","F81","F84","HKY85","T92","TN93","REV"};
+enum {Poisson, EqualInput, Empirical, Empirical_F} AAModels;
+char *aamodels[]={"Poisson", "EqualInput", "Empirical", "Empirical_F"};
+
+
+double PMat[NCODE*NCODE], U[NCODE*NCODE], V[NCODE*NCODE], Root[NCODE];
+static double Qfactor=-1, Qrates[5]; /* Qrates[] hold kappa's for nucleotides */
+
+
+int main (int argc, char*argv[])
+{
+ char *MCctlf=NULL, outf[512]="evolver.out", treefile[512]="mcmc.txt", mastertreefile[512]="\0";
+ int i, option=-1, ntree=1,rooted, BD=0, gotoption=0, pick1tree=-1;
+ double bfactor=1, birth=-1,death=-1,sample=-1,mut=-1, *space;
+ FILE *fout=gfopen(outf,"w");
+
+ printf("EVOLVER in %s\n", pamlVerStr);
+ com.alpha=0; com.cleandata=1; com.model=0; com.NSsites=0;
+
+ if(argc>2 && !strcmp(argv[argc-1], "--stdout-no-buf"))
+ setvbuf(stdout, NULL, _IONBF, 0);
+ if(argc>1) {
+ gotoption=1; sscanf(argv[1], "%d", &option);
+ }
+ if(argc==1)
+ printf("Results for options 1-4 & 8 go into %s\n",outf);
+ else if(option!=5 && option!=6 && option!=7 && option!=9) {
+ puts("Usage: \n\tevolver \n\tevolver option# MyDataFile"); exit(-1);
+ }
+ if(option>=4 && option<=6)
+ MCctlf = argv[2];
+ else if(option==9) {
+ strcpy(treefile, argv[2]);
+ if(argc>3) strcpy(mastertreefile, argv[3]);
+ if(argc>4) sscanf(argv[4], "%d", &pick1tree);
+ }
+
+#if defined (CodonNSbranches)
+ option=6; com.model=1;
+ MCctlf = (argc==3 ? argv[2] : "MCcodonNSbranches.dat");
+ gotoption = 1;
+#elif defined (CodonNSsites)
+ option=6; com.NSsites=3;
+ MCctlf = (argc==3 ? argv[2] : "MCcodonNSsites.dat");
+ gotoption = 1;
+#elif defined (CodonNSbranchsites)
+ option=6; com.model=1; com.NSsites=3;
+ MCctlf = (argc==3 ? argv[2] : "MCcodonNSbranchsites.dat");
+ gotoption = 1;
+#endif
+
+ if(!gotoption) {
+ for(; ;) {
+ fflush(fout);
+ printf("\n\t(1) Get random UNROOTED trees?\n");
+ printf("\t(2) Get random ROOTED trees?\n");
+ printf("\t(3) List all UNROOTED trees?\n");
+ printf("\t(4) List all ROOTED trees?\n");
+ printf("\t(5) Simulate nucleotide data sets (use %s)?\n",MCctlf0[0]);
+ printf("\t(6) Simulate codon data sets (use %s)?\n",MCctlf0[1]);
+ printf("\t(7) Simulate amino acid data sets (use %s)?\n",MCctlf0[2]);
+ printf("\t(8) Calculate identical bi-partitions between trees?\n");
+ printf("\t(9) Calculate clade support values (evolver 9 treefile mastertreefile <pick1tree>)?\n");
+ printf("\t(11) Label clades?\n");
+ printf("\t(0) Quit?\n");
+
+ option = 9;
+ scanf("%d", &option);
+
+ if(option==0) exit(0);
+ if(option>=5 && option<=7) break;
+ if(option<5) {
+ printf ("No. of species: ");
+ scanf ("%d", &com.ns);
+ }
+ if(com.ns>NS) error2 ("Too many species. Raise NS.");
+ if((space=(double*)malloc(10000*sizeof(double)))==NULL) error2("oom");
+ rooted = !(option%2);
+ if(option<3) {
+ printf("\nnumber of trees & random number seed? ");
+ scanf("%d%d", &ntree, &i);
+ SetSeed(i, 1);
+ printf ("Want branch lengths from the birth-death process (0/1)? ");
+ scanf ("%d", &BD);
+ }
+ if(option<=4) {
+ if(com.ns<3) error2("no need to do this?");
+ i = (com.ns*2-1)*sizeof(struct TREEN);
+ if((nodes=(struct TREEN*)malloc(i)) == NULL)
+ error2("oom");
+ }
+ switch (option) {
+ case(1): /* random UNROOTED trees */
+ case(2): /* random ROOTED trees */
+ /* default names */
+ if(com.ns<=52)
+ for(i=0; i<com.ns; i++) sprintf(com.spname[i], "%c", (i<26 ? 'A'+i : 'a'+i-26));
+ else
+ for(i=0; i<com.ns; i++) sprintf(com.spname[i], "S%d", i+1);
+
+ if(BD) {
+ printf ("\nbirth rate, death rate, sampling fraction, and ");
+ printf ("mutation rate (tree height)?\n");
+ scanf ("%lf%lf%lf%lf", &birth, &death, &sample, &mut);
+ }
+ for(i=0;i<ntree;i++) {
+ RandomLHistory (rooted, space);
+ if(BD)
+ BranchLengthBD (1, birth, death, sample, mut);
+ if(com.ns<20&&ntree<10) { OutTreeN(F0, 0, BD); puts("\n"); }
+ OutTreeN(fout, 1, BD); FPN(fout);
+ }
+ /*
+ for (i=0; i<com.ns-2-!rooted; i++)
+ Ib[i] = (int)((3.+i)*rndu());
+ MakeTreeIb (com.ns, Ib, rooted);
+ */
+ break;
+ case(3):
+ case(4):
+ ListTrees(fout, com.ns, rooted);
+ break;
+ case(8): TreeDistances(fout); break;
+ case(9):
+ printf("tree file names? ");
+ scanf("%s%s", treefile, mastertreefile);
+ break;
+ case(10): between_f_and_x(); break;
+ case(11): LabelClades(fout); break;
+ default: exit(0);
+ }
+ }
+ }
+
+ if(option>=5 && option<=7) {
+ com.seqtype = option-5; /* 0, 1, 2 for bases, codons, & amino acids */
+ Simulate(MCctlf ? MCctlf : MCctlf0[option-5]);
+ }
+ else if(option==9) {
+ CladeSupport(fout, treefile, 1, mastertreefile, pick1tree);
+ /* CladeMrBayesProbabilities("/papers/BPPJC3sB/Karol.trees"); */
+ }
+ return(0);
+}
+
+
+int between_f_and_x (void)
+{
+/* this helps with the exponential transform for frequency parameters */
+ int i,n,fromf=0;
+ double x[100];
+
+ for(;;) {
+ printf("\ndirection (0:x=>f; 1:f=>x; -1:end) & #classes? ");
+ scanf("%d",&fromf);
+ if(fromf==-1) return(0);
+ scanf("%d", &n); if(n>100) error2("too many classes");
+ printf("input the first %d values for %s? ",n-1,(fromf?"f":"x"));
+ FOR(i,n-1) scanf("%lf",&x[i]);
+ x[n-1]=(fromf?1-sum(x,n-1):0);
+ f_and_x(x, x, n, fromf, 1);
+ matout(F0,x,1,n);
+ }
+}
+
+
+void LabelClades(FILE *fout)
+{
+/* This reads in a tree and scan species names to check whether they form a
+ paraphyletic group and then label the clade.
+ It assumes that the tree is unrooted, and so goes through two rounds to check
+ whether the remaining seqs form a monophyletic clade.
+*/
+ FILE *ftree;
+ int unrooted=1,iclade, sizeclade, mrca, paraphyl, is, imrca, i,j,k, lasts, haslength;
+ char key[96]="A", treef[64]="/A/F/flu/HA.all.prankcodon.tre", *p,chosen[NS], *endstr="end";
+ int *anc[NS-1], loc, bitmask, SI=sizeof(int)*8;
+ int debug;
+
+ printf("Tree file name? ");
+ scanf ("%s", treef);
+ printf("Treat tree as unrooted (0 no, 1 yes)? ");
+ scanf ("%d", &unrooted);
+
+ ftree = gfopen (treef,"r");
+ fscanf (ftree, "%d%d", &com.ns, &j);
+ if(com.ns<=0) error2("need ns in tree file");
+ debug = (com.ns<20);
+
+ i = (com.ns*2-1)*sizeof(struct TREEN);
+ if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
+ for(i=0; i<com.ns*2-1; i++) nodes[i].nodeStr = NULL;
+ for(i=0; i<com.ns-1; i++) {
+ anc[i] = (int*)malloc((com.ns/SI+1)*sizeof(int));
+ if(anc[i]==NULL) error2("oom");
+ }
+ ReadTreeN(ftree, &haslength, &j, 1, 0);
+ fclose(ftree);
+ if(debug) { OutTreeN(F0, 1, PrNodeNum); FPN(F0); }
+
+ for(iclade=0; iclade<com.ns-1; iclade++) {
+ printf("\nString for selecting sequences (followed by non-digit) (end to end)? ");
+ scanf("%s", key);
+ if(strcmp(endstr, key) == 0)
+ break;
+ for(i=0; i<com.ns; i++)
+ chosen[i] = '\0';
+
+
+ k = strlen(key);
+ for(i=0; i<com.ns; i++) {
+ if( (p=strstr(com.spname[i], key))
+ && !isdigit(p[k]) )
+ chosen[i] = 1;
+ }
+
+ /*
+ for(i=0; i<com.ns; i++)
+ if(strstr(com.spname[i], key)) chosen[i] = 1;
+ */
+
+ /* look for MRCA, going through two rounds, assuming unrooted tree */
+ for(imrca=0; imrca<1+unrooted; imrca++) {
+ if(imrca)
+ for(i=0; i<com.ns; i++) chosen[i] = 1 - chosen[i];
+
+ for(i=0,sizeclade=0; i<com.ns; i++)
+ if(chosen[i]) {
+ sizeclade ++;
+ lasts = i;
+ }
+
+ if(sizeclade <= 1 || sizeclade >= com.ns-1) {
+ puts("unable to form a clade. <2 seqs.");
+ break;
+ }
+ for(i=0; i<com.ns-1; i++) for(j=0; j<com.ns/SI+1; j++)
+ anc[i][j] = 0;
+ for(is=0; is<com.ns; is++) {
+ if(chosen[is]==0) continue;
+ loc = is/SI; bitmask = 1 << (is%SI);
+ for(j=nodes[is].father; j!=-1; j=nodes[j].father) {
+ anc[j-com.ns][loc] |= bitmask;
+ if(is==lasts) {
+ for(i=0,k=0; i<com.ns; i++)
+ if(anc[j-com.ns][i/SI] & (1<<(i%SI)))
+ k ++;
+ if(k==sizeclade) {
+ mrca = j; break;
+ }
+ }
+ }
+ }
+ if(imrca==0 && mrca!=tree.root) /* 1st round is enough */
+ break;
+ }
+
+ if(sizeclade <= 1 || sizeclade >= com.ns-1 || mrca==tree.root) {
+ printf("Unable to label. Ignored.");
+ continue;
+ }
+
+ if(debug)
+ for(is=0; is<com.ns-1; is++) {
+ printf("\nnode %4d: ", is+com.ns);
+ for(j=0; j<com.ns; j++) {
+ loc = j/SI; bitmask = 1 << (j%SI);
+ printf(" %d", (anc[is][loc] & bitmask) != 0);
+ }
+ }
+
+ printf("\nClade #%d (%s): %d seqs selected, MRCA is %d\n", iclade+1, key, sizeclade, mrca+1);
+ for(is=0,paraphyl=0; is<com.ns; is++) {
+ if(chosen[is] == 0)
+ for(j=nodes[is].father; j!=-1; j=nodes[j].father)
+ if(j==mrca) { paraphyl++; break; }
+ }
+ if(paraphyl)
+ printf("\nThis clade is paraphyletic, & includes %d other sequences\n", paraphyl);
+
+ nodes[mrca].label = iclade+1;
+ if(debug) OutTreeN(F0, 1, haslength|PrLabel);
+ }
+
+ for(i=0; i<com.ns-1; i++) free(anc[i]);
+ OutTreeN(fout, 1, haslength|PrLabel); FPN(fout);
+ printf("Printed final tree with labels in evolver.out\n");
+ exit(0);
+}
+
+void TreeDistanceDistribution (FILE* fout)
+{
+/* This calculates figure 3.7 of Yang (2006).
+ This reads the file of all trees (such as 7s.all.trees), and calculates the
+ distribution of partition distance in all pairwise comparisons.
+*/
+ int i,j,ntree, k,*nib, nsame, IBsame[NS], lpart=0;
+ char treef[64]="5s.all.trees", *partition;
+ FILE *ftree;
+ double mPD[NS], PD1[NS]; /* distribution of partition distances */
+
+ puts("Tree file name?");
+ scanf ("%s", treef);
+
+ ftree=gfopen (treef,"r");
+ fscanf (ftree, "%d%d", &com.ns, &ntree);
+ printf("%2d sequences %2d trees.\n", com.ns, ntree);
+ i=(com.ns*2-1)*sizeof(struct TREEN);
+ if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
+
+ lpart = (com.ns-1)*com.ns*sizeof(char);
+ i = ntree*lpart;
+ printf("\n%d bytes of space requested.\n", i);
+ partition = (char*)malloc(i);
+ nib = (int*)malloc(ntree*sizeof(int));
+ if (partition==NULL || nib==NULL) error2("out of memory");
+
+ puts("\ntree #: mean prop of tree pairs with 0 1 2 ... shared bipartitions\n");
+ fputs("\ntree #: prop of tree pairs with 0 1 2 ... shared bipartitions\n",fout);
+ for (i=0; i<ntree; i++) {
+ ReadTreeN (ftree, &j, &k, 0, 1);
+ nib[i]=tree.nbranch-com.ns;
+ Tree2Partition(partition+i*lpart);
+ }
+ for(k=0; k<com.ns-3; k++) mPD[k]=0;
+ for (i=0; i<ntree; i++,FPN(fout)) {
+ for(k=0; k<com.ns-3; k++) PD1[k]=0;
+ for (j=0; j<ntree; j++) {
+ if(j==i) continue;
+ nsame=NSameBranch(partition+i*lpart,partition+j*lpart, nib[i],nib[j],IBsame);
+ PD1[nsame] ++;
+ }
+ for(k=0; k<com.ns-3; k++) PD1[k] /= (ntree-1.);
+ for(k=0; k<com.ns-3; k++) mPD[k] = (mPD[k]*i+PD1[k])/(i+1.);
+ printf("%8d (%5.1f%%):", i+1,(i+1.)/ntree*100);
+ for(k=0; k<com.ns-3; k++) printf(" %7.4f", mPD[k]);
+ fprintf(fout, "%8d:", i+1); for(k=0; k<com.ns-3; k++) fprintf(fout, " %7.4f", PD1[k]);
+ printf("%s", (com.ns<8||(i+1)%100==0 ? "\n" : "\r"));
+ }
+ free(partition); free(nodes); free(nib); fclose(ftree);
+ exit(0);
+}
+
+
+void TreeDistances (FILE* fout)
+{
+/* I think this is broken after i changed the routine Tree2Partition().
+*/
+ int i,j,ntree, k,*nib, parti2B[NS], nsame, IBsame[NS],nIBsame[NS], lpart=0;
+ char treef[64]="5s.all.trees", *partition;
+ FILE *ftree;
+ double psame, mp, vp;
+
+ /*
+ TreeDistanceDistribution(fout);
+ */
+
+ puts("\nNumber of identical bi-partitions between trees.\nTree file name?");
+ scanf ("%s", treef);
+
+ ftree=gfopen (treef,"r");
+ fscanf (ftree, "%d%d", &com.ns, &ntree);
+ printf("%2d sequences %2d trees.\n", com.ns, ntree);
+ i=(com.ns*2-1)*sizeof(struct TREEN);
+ if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
+
+ if(ntree<2) error2("ntree");
+ printf ("\n%d species, %d trees\n", com.ns, ntree);
+ puts("\n\t1: first vs. rest?\n\t2: all pairwise comparisons?\n");
+ k=2;
+ scanf("%d", &k);
+
+ lpart=(com.ns-1)*com.ns*sizeof(char);
+ i=(k==1?2:ntree)*lpart;
+ printf("\n%d bytes of space requested.\n", i);
+ partition=(char*)malloc(i);
+ nib=(int*)malloc(ntree*sizeof(int));
+ if (partition==NULL || nib==NULL) error2("out of memory");
+
+ if(k==2) { /* pairwise comparisons */
+ fputs("Number of identical bi-partitions in pairwise comparisons\n",fout);
+ for (i=0; i<ntree; i++) {
+ ReadTreeN (ftree, &j, &k, 0, 1);
+ nib[i]=tree.nbranch-com.ns;
+ Tree2Partition(partition+i*lpart);
+ }
+ for (i=0; i<ntree; i++,FPN(F0),FPN(fout)) {
+ printf("%2d (%2d):", i+1,nib[i]);
+ fprintf(fout,"%2d (%2d):", i+1,nib[i]);
+ for (j=0; j<i; j++) {
+ nsame=NSameBranch(partition+i*lpart,partition+j*lpart, nib[i],nib[j],IBsame);
+ printf(" %2d", nsame);
+ fprintf(fout," %2d", nsame);
+ }
+ }
+ }
+ else { /* first vs. others */
+ ReadTreeN (ftree, &j, &k, 0, 1);
+ nib[0]=tree.nbranch-com.ns;
+ if (nib[0]==0) error2("1st tree is a star tree..");
+ Tree2Partition (partition);
+ fputs ("Comparing the first tree with the others\nFirst tree:\n",fout);
+ OutTreeN(fout,0,0); FPN(fout); OutTreeB(fout); FPN(fout);
+ fputs ("\nInternal branches in the first tree:\n",fout);
+ FOR(i,nib[0]) {
+ k=parti2B[i];
+ fprintf(fout,"%3d (%2d..%-2d): ( ",
+ i+1,tree.branches[k][0]+1,tree.branches[k][1]+1);
+ FOR(j,com.ns) if(partition[i*com.ns+j]) fprintf(fout,"%d ",j+1);
+ fputs(")\n",fout);
+ }
+ if(nodes[tree.root].nson<=2)
+ fputs("\nRooted tree, results may not be correct.\n",fout);
+ fputs("\nCorrect internal branches compared with the 1st tree:\n",fout);
+ FOR(k,nib[0]) nIBsame[k]=0;
+ for (i=1,mp=vp=0; i<ntree; i++,FPN(fout)) {
+ ReadTreeN (ftree, &j, &k, 0, 1);
+ nib[1]=tree.nbranch-com.ns;
+ Tree2Partition(partition+lpart);
+ nsame=NSameBranch (partition,partition+lpart, nib[0],nib[1],IBsame);
+
+ psame=nsame/(double)nib[0];
+ FOR(k,nib[0]) nIBsame[k]+=IBsame[k];
+ fprintf(fout,"1 vs. %3d: %4d: ", i+1,nsame);
+ FOR(k,nib[0]) if(IBsame[k]) fprintf(fout," %2d", k+1);
+ printf("1 vs. %5d: %6d/%d %10.4f\n", i+1,nsame,nib[0],psame);
+ vp += square(psame - mp)*(i-1.)/i;
+ mp=(mp*(i-1.) + psame)/i;
+ }
+ vp=(ntree<=2 ? 0 : sqrt(vp/((ntree-1-1)*(ntree-1.))));
+ fprintf(fout,"\nmean and S.E. of proportion of identical partitions\n");
+ fprintf(fout,"between the 1st and all the other %d trees ", ntree-1);
+ fprintf(fout,"(ignore these if not revelant):\n %.4f +- %.4f\n", mp, vp);
+ fprintf(fout,"\nNumbers of times, out of %d, ", ntree-1);
+ fprintf(fout,"interior branches of tree 1 are present");
+ fputs("\n(This may be bootstrap support for nodes in tree 1)\n",fout);
+ FOR(k,nib[0]) {
+ i=tree.branches[parti2B[k]][0]+1; j=tree.branches[parti2B[k]][1]+1;
+ fprintf(fout,"%3d (%2d..%-2d): %6d (%5.1f%%)\n",
+ k+1,i,j,nIBsame[k],nIBsame[k]*100./(ntree-1.));
+ }
+ }
+ free(partition); free(nodes); free(nib); fclose(ftree);
+}
+
+
+
+int EigenQbase(double rates[], double pi[],
+ double Root[],double U[],double V[],double Q[])
+{
+/* Construct the rate matrix Q[] for nucleotide model REV.
+*/
+ int i,j,k;
+ double mr, space[4];
+
+ zero (Q, 16);
+ for (i=0,k=0; i<3; i++) for (j=i+1; j<4; j++)
+ if (i*4+j!=11) Q[i*4+j]=Q[j*4+i]=rates[k++];
+ for (i=0,Q[3*4+2]=Q[2*4+3]=1; i<4; i++) FOR (j,4) Q[i*4+j] *= pi[j];
+ for (i=0,mr=0; i<4; i++)
+ { Q[i*4+i]=0; Q[i*4+i]=-sum(Q+i*4, 4); mr-=pi[i]*Q[i*4+i]; }
+ abyx (1/mr, Q, 16);
+
+ eigenQREV(Q, com.pi, 4, Root, U, V, space);
+ return (0);
+}
+
+
+static double freqK_NS=-1;
+
+int EigenQcodon (int getstats, double kappa, double omega, double pi[],
+ double Root[], double U[], double V[], double Q[])
+{
+/* Construct the rate matrix Q[].
+ 64 codons are used, and stop codons have 0 freqs.
+*/
+ int n=com.ncode, i,j,k, c[2],ndiff,pos=0,from[3],to[3];
+ double mr, space[64];
+
+ for(i=0; i<n*n; i++) Q[i] = 0;
+ for (i=0; i<n; i++) FOR (j,i) {
+ from[0]=i/16; from[1]=(i/4)%4; from[2]=i%4;
+ to[0]=j/16; to[1]=(j/4)%4; to[2]=j%4;
+ c[0]=GeneticCode[com.icode][i]; c[1]=GeneticCode[com.icode][j];
+ if (c[0]==-1 || c[1]==-1) continue;
+ for (k=0,ndiff=0; k<3; k++) if (from[k]!=to[k]) { ndiff++; pos=k; }
+ if (ndiff!=1) continue;
+ Q[i*n+j]=1;
+ if ((from[pos]+to[pos]-1)*(from[pos]+to[pos]-5)==0) Q[i*n+j]*=kappa;
+ if(c[0]!=c[1]) Q[i*n+j]*=omega;
+ Q[j*n+i]=Q[i*n+j];
+ }
+ for(i=0; i<n; i++) for(j=0; j<n; j++)
+ Q[i*n+j] *= com.pi[j];
+ for(i=0,mr=0;i<n;i++) {
+ Q[i*n+i] = -sum(Q+i*n,n);
+ mr -= pi[i]*Q[i*n+i];
+ }
+
+ if(getstats)
+ Qfactor += freqK_NS * mr;
+ else {
+ if(com.ncatG==0) FOR(i,n*n) Q[i]*=1/mr;
+ else FOR(i,n*n) Q[i]*=Qfactor; /* NSsites models */
+ eigenQREV(Q, com.pi, n, Root, U, V, space);
+ }
+ return (0);
+}
+
+
+
+int EigenQaa (double pi[], double Root[], double U[], double V[], double Q[])
+{
+/* Construct the rate matrix Q[]
+*/
+ int n=20, i,j;
+ double mr, space[20];
+
+ FOR (i,n*n) Q[i]=0;
+ switch (com.model) {
+ case (Poisson) : case (EqualInput) :
+ fillxc (Q, 1., n*n); break;
+ case (Empirical) : case (Empirical_F):
+ FOR(i,n) FOR(j,i) Q[i*n+j]=Q[j*n+i]=com.daa[i*n+j]/100;
+ break;
+ }
+ FOR (i,n) FOR (j,n) Q[i*n+j]*=com.pi[j];
+ for (i=0,mr=0; i<n; i++) {
+ Q[i*n+i]=0; Q[i*n+i]=-sum(Q+i*n,n); mr-=com.pi[i]*Q[i*n+i];
+ }
+
+ eigenQREV(Q, com.pi, n, Root, U, V, space);
+ FOR(i,n) Root[i]=Root[i]/mr;
+
+ return (0);
+}
+
+
+int GetDaa (FILE* fout, double daa[])
+{
+/* Get the amino acid substitution rate matrix (grantham, dayhoff, jones, etc).
+*/
+ FILE * fdaa;
+ char aa3[4]="";
+ int i,j, n=20;
+
+ fdaa=gfopen(com.daafile, "r");
+ printf("\nReading rate matrix from %s\n", com.daafile);
+
+ for (i=0; i<n; i++) for (j=0,daa[i*n+i]=0; j<i; j++) {
+ fscanf(fdaa, "%lf", &daa[i*n+j]);
+ daa[j*n+i]=daa[i*n+j];
+ }
+ if (com.model==Empirical) {
+ FOR(i,n) if(fscanf(fdaa,"%lf",&com.pi[i])!=1) error2("err aaRatefile");
+ if (fabs(1-sum(com.pi,20))>1e-4) error2("\nSum of aa freq. != 1\n");
+ }
+ fclose (fdaa);
+
+ if (fout) {
+ fprintf (fout, "\n%s\n", com.daafile);
+ FOR (i,n) {
+ fprintf (fout, "\n%4s", getAAstr(aa3,i));
+ FOR (j,i) fprintf (fout, "%5.0f", daa[i*n+j]);
+ }
+ FPN (fout);
+ }
+
+ return (0);
+}
+
+
+
+
+void MakeSeq(char*z, int ls)
+{
+/* generate a random sequence of nucleotides, codons, or amino acids by
+ sampling com.pi[], or read the ancestral sequence from the file RootSeq.txt
+ if the file exists.
+*/
+ int i,j,h, n=com.ncode, ch, n31=(com.seqtype==1?3:1), lst;
+ double p[64],r, small=1e-5;
+ char *pch=(com.seqtype==2?AAs:BASEs);
+ char rootseqf[]="RootSeq.txt", codon[4]=" ";
+ FILE *fseq=(FILE*)fopen(rootseqf,"r");
+ static int times=0;
+
+ if(fseq) {
+ if(times++==0) printf("Reading sequence at the root from file.\n\n");
+ if(com.siterates && com.ncatG>1)
+ error2("sequence for root doesn't work for site-class models");
+
+ for(lst=0; ; ) {
+ for(i=0; i<n31; i++) {
+ while((ch=fgetc(fseq)) !=EOF && !isalpha(ch)) ;
+ if(ch==EOF) error2("EOF when reading root sequence.");
+ if(isalpha(ch))
+ codon[i]=(char)(ch=CodeChara((char)ch, com.seqtype));
+ }
+ if(com.seqtype==1) ch = codon[0]*16 + codon[1]*4 + codon[2];
+ if(ch<0 || ch>n-1)
+ printf("error when reading site %d\n", lst+1);
+ if(com.seqtype==1 && com.pi[ch]==0)
+ printf("you seem to have a stop codon in the root sequence\n");
+
+ z[lst++] = (char)ch;
+ if(lst==com.ls) break;
+ }
+ fclose(fseq);
+ }
+ else {
+ for(j=0; j<n; j++) p[j] = com.pi[j];
+ for(j=1; j<n; j++) p[j] += p[j-1];
+ if(fabs(p[n-1]-1) > small)
+ { printf("\nsum pi = %.6f != 1!\n", p[n-1]); exit(-1); }
+ for(h=0; h<com.ls; h++) {
+ for(j=0,r=rndu();j<n-1;j++)
+ if(r<p[j]) break;
+ z[h] = (char)j;
+ }
+ }
+}
+
+
+
+void Evolve1 (int inode)
+{
+/* evolve sequence com.z[tree.root] along the tree to generate com.z[],
+ using nodes[].branch, nodes[].omega, & com.model
+ Needs com.z[0,1,...,nnode-1], while com.z[0] -- com.z[ns-1] constitute
+ the data.
+ For codon sequences, com.siterates[] has w's for NSsites and NSbranchsite models.
+*/
+ int is, h,i,j, ison, from, n=com.ncode, longseq=100000;
+ double t, rw;
+
+ for (is=0; is<nodes[inode].nson; is++) {
+ ison=nodes[inode].sons[is];
+ memcpy(com.z[ison],com.z[inode],com.ls*sizeof(unsigned char));
+ t=nodes[ison].branch;
+
+ if(com.seqtype==1 && com.model && com.NSsites) { /* branch-site models */
+ Qfactor = com.QfactorBS[ison];
+ for(h=0; h<com.ls; h++)
+ com.siterates[h] = com.omegaBS[ison*com.ncatG+com.siteID[h]];
+ }
+
+ for(h=0; h<com.ls; h++) {
+ /* decide whether to recalcualte PMat[]. */
+ if (h==0 || (com.siterates && com.siterates[h]!=com.siterates[h-1])) {
+ rw = (com.siterates?com.siterates[h]:1);
+
+ switch(com.seqtype) {
+ case (BASEseq):
+ if(com.model<=TN93)
+ PMatTN93(PMat, t*Qfactor*rw*Qrates[0],
+ t*Qfactor*rw*Qrates[1], t*Qfactor*rw, com.pi);
+ else if(com.model==REV)
+ PMatUVRoot(PMat, t*rw, com.ncode, U,V,Root);
+ break;
+
+ case (CODONseq): /* Watch out for NSsites model */
+ if(com.model || com.NSsites) { /* no need to update UVRoot if M0 */
+ if(com.model && com.NSsites==0) /* branch */
+ rw = nodes[ison].omega; /* should be equal to com.rK[nodes[].label] */
+
+ EigenQcodon(0, com.kappa, rw, com.pi, Root, U, V, PMat);
+ }
+ PMatUVRoot(PMat, t, com.ncode, U, V, Root);
+ break;
+
+ case (AAseq):
+ PMatUVRoot(PMat, t*rw, com.ncode, U, V, Root);
+ break;
+ }
+ for(i=0; i<n; i++)
+ for(j=1;j<n;j++)
+ PMat[i*n+j] += PMat[i*n+j-1];
+ }
+ for(j=0,from=com.z[ison][h],rw=rndu(); j<n-1; j++)
+ if(rw < PMat[from*n+j]) break;
+ com.z[ison][h] = j;
+ }
+
+ if(com.ls>longseq) printf("\r nodes %2d -> %2d, evolving . . ", inode+1, ison+1);
+
+ if(nodes[ison].nson) Evolve1(ison);
+ } /* for (is) */
+
+ if(inode==tree.root && com.ls>longseq) printf("\r%s", strc(50,' '));
+}
+
+
+
+void Simulate (char *ctlf)
+{
+/* simulate nr data sets of nucleotide, codon, or AA sequences.
+ ls: number of nucleotides, codons, or AAs in each sequence.
+ All 64 codons are used for codon sequences.
+ When com.alpha or com.ncatG>1, sites are randomized after sequences are
+ generated.
+ space[com.ls] is used to hold site marks.
+ format: 0: paml sites; 1: paml patterns; 2: paup nex; 3: paup JC69 format
+ */
+ char *ancf="ancestral.txt", *siteIDf="siterates.txt";
+ FILE *fin, *fseq, *ftree=NULL, *fanc=NULL, *fsiteID=NULL;
+ char *paupstart="paupstart",*paupblock="paupblock",*paupend="paupend";
+ char line[32000];
+ int lline=32000, i,j,k, ir,n,nr, fixtree=1, sspace=10000, rooted=1;
+ int h=0,format=0, b[3]={0}, nrate=1, counts[NCATG];
+ int *siteorder=NULL;
+ char *tmpseq=NULL, *pc;
+ double birth=0, death=0, sample=1, mut=1, tlength, *space, *blengthBS;
+ double T,C,A,G,Y,R, Falias[NCATG];
+ int Lalias[NCATG];
+
+ noisy = 1;
+ printf("\nReading options from data file %s\n", ctlf);
+ com.ncode = n = (com.seqtype==0 ? 4 : (com.seqtype==1?64:20));
+ fin = (FILE*)gfopen(ctlf,"r");
+ fscanf(fin, "%d", &format);
+ fgets(line, lline, fin);
+ printf("\nSimulated data will go into %s.\n", seqf[format]);
+ if(format==2) printf("%s, %s, & %s will be appended if existent.\n", paupstart,paupblock,paupend);
+
+ fscanf (fin, "%d", &i);
+ fgets(line, lline, fin);
+ SetSeed(i, 1);
+ fscanf (fin, "%d%d%d", &com.ns, &com.ls, &nr);
+ fgets(line, lline, fin);
+ i=(com.ns*2-1)*sizeof(struct TREEN);
+ if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
+
+ if(com.ns>NS) error2("too many seqs?");
+ printf ("\n%d seqs, %d sites, %d replicate(s)\n", com.ns, com.ls, nr);
+ k=(com.ns*com.ls* (com.seqtype==CODONseq?4:1) *nr)/1000+1;
+ printf ("Seq file will be about %dK bytes.\n",k);
+ for(i=0; i<com.ns; i++) /* default spname */
+ sprintf(com.spname[i],"S%d",i+1);
+
+ if(fixtree) {
+ fscanf(fin, "%lf", &tlength); fgets(line, lline, fin);
+ if(ReadTreeN(fin, &i, &j, 1, 1)) /* might overwrite spname */
+ error2("err tree..");
+
+ if(i==0) error2("use : to specify branch lengths in tree");
+ for(i=0,T=0; i<tree.nnode; i++)
+ if(i!=tree.root) T += nodes[i].branch;
+ if(tlength>0) {
+ for(i=0; i<tree.nnode; i++)
+ if(i!=tree.root) nodes[i].branch *= tlength/T;
+ }
+ printf("tree length = %.3f\n", (tlength>0?tlength:T));
+ if(com.ns<100) {
+ printf("\nModel tree & branch lengths:\n");
+ OutTreeN(F0,1,1); FPN(F0);
+ OutTreeN(F0,0,1); FPN(F0);
+ }
+ if(com.seqtype==CODONseq && com.model && !com.NSsites) { /* branch model */
+ FOR(i,tree.nnode) nodes[i].omega=nodes[i].label;
+ FPN(F0); OutTreeN(F0, 1, PrBranch|PrLabel); FPN(F0);
+ }
+ }
+ else { /* random trees, broken or need testing? */
+ printf ("\nbirth rate, death rate, sampling fraction, mutation rate (tree height)?\n");
+ fscanf (fin, "%lf%lf%lf%lf", &birth, &death, &sample, &mut);
+ fgets(line, lline, fin);
+ printf("%9.4f %9.4f %9.4f %9.4f\n", birth, death, sample, mut);
+ }
+
+ if(com.seqtype==BASEseq) {
+ fscanf(fin,"%d", &com.model);
+ fgets(line, lline, fin);
+ if(com.model<0 || com.model>REV) error2("model err");
+ if(com.model==T92) error2("T92: please use HKY85 with T=A and C=G.");
+
+ printf("\nModel: %s\n", basemodels[com.model]);
+ if(com.model==REV) nrate=5;
+ else if(com.model==TN93) nrate=2;
+ FOR(i,nrate) fscanf(fin,"%lf",&Qrates[i]);
+ fgets(line, lline, fin);
+ if(nrate<=2) FOR(i,nrate) printf("kappa %9.5f\n",Qrates[i]); FPN(F0);
+ if(nrate==5) {
+ printf("a & b & c & d & e: ");
+ FOR(i,nrate) printf("%9.5f",Qrates[i]); FPN(F0);
+ }
+ if((com.model==JC69 || com.model==F81)&&Qrates[0]!=1)
+ error2("kappa should be 1 for this model");
+ }
+ else if(com.seqtype==CODONseq) {
+ for(i=0; i<64; i++)
+ getcodon(CODONs[i], i);
+ if(com.model==0 && com.NSsites) { /* site model */
+ fscanf(fin,"%d", &com.ncatG); fgets(line, lline, fin);
+ if(com.ncatG>NCATG) error2("ncatG>NCATG");
+ FOR(i,com.ncatG) fscanf(fin,"%lf",&com.freqK[i]); fgets(line, lline, fin);
+ FOR(i,com.ncatG) fscanf(fin,"%lf",&com.rK[i]); fgets(line, lline, fin);
+ printf("\n\ndN/dS (w) for site classes (K=%d)", com.ncatG);
+ printf("\nf: "); FOR(i,com.ncatG) printf("%9.5f",com.freqK[i]);
+ printf("\nw: "); FOR(i,com.ncatG) printf("%9.5f",com.rK[i]); FPN(F0);
+ }
+ else if(com.model && com.NSsites) { /* branchsite model */
+ fscanf(fin,"%d",&com.ncatG); fgets(line, lline, fin);
+ if(com.ncatG>min2(NCATG,127)) error2("ncatG too large");
+ FOR(i,com.ncatG) fscanf(fin,"%lf",&com.freqK[i]); fgets(line,lline,fin);
+ printf("\n%d site classes.\nFreqs: ", com.ncatG);
+ FOR(i,com.ncatG) printf("%9.5f",com.freqK[i]);
+
+ if((com.omegaBS=(double*)malloc((com.ncatG+2)*tree.nnode*sizeof(double)))==NULL)
+ error2("oom");
+ com.QfactorBS = com.omegaBS + com.ncatG*tree.nnode;
+ blengthBS = com.QfactorBS + tree.nnode;
+
+ for(i=0; i<tree.nnode; i++)
+ blengthBS[i] = nodes[i].branch;
+ for(k=0; k<com.ncatG; k++) {
+ ReadTreeN(fin, &i, &j, 0, 1);
+ if(i) error2("do not include branch lengths except in the first tree.");
+ if(!j) error2("Use # to specify omega's for branches");
+ for(i=0; i<tree.nnode; i++) com.omegaBS[i*com.ncatG+k]=nodes[i].label;
+ }
+ for(i=0; i<tree.nnode; i++)
+ { nodes[i].branch=blengthBS[i]; nodes[i].label=nodes[i].omega=0; }
+ for(i=0; i<tree.nnode; i++) { /* print out omega as node labels. */
+ nodes[i].nodeStr=pc=(char*)malloc(20*com.ncatG*sizeof(char));
+ sprintf(pc, "'[%.2f", com.omegaBS[i*com.ncatG+0]);
+ for(k=1,pc+=strlen(pc); k<com.ncatG; k++,pc+=strlen(pc))
+ sprintf(pc, ", %.2f", com.omegaBS[i*com.ncatG+k]);
+ sprintf(pc, "]'");
+ }
+ FPN(F0); OutTreeN(F0,1,PrBranch|PrLabel); FPN(F0);
+ }
+ else if(com.model==0) { /* M0 */
+ fscanf(fin,"%lf",&com.omega);
+ fgets(line, lline, fin);
+ printf("omega = %9.5f\n",com.omega);
+ for(i=0; i<tree.nbranch; i++)
+ nodes[tree.branches[i][1]].omega = com.omega;
+ }
+
+ fscanf(fin, "%lf", &com.kappa); fgets(line, lline, fin);
+ printf("kappa = %9.5f\n",com.kappa);
+ }
+
+ if(com.seqtype==BASEseq || com.seqtype==AAseq) {
+ fscanf(fin,"%lf%d", &com.alpha, &com.ncatG);
+ fgets(line, lline, fin);
+ if(com.alpha)
+ printf("Gamma rates, alpha =%.4f (K=%d)\n", com.alpha, com.ncatG);
+ else {
+ com.ncatG=0;
+ puts("Rates are constant over sites.");
+ }
+ }
+ if(com.alpha || com.ncatG) { /* this is used for codon NSsites as well. */
+ k = com.ls;
+ if(com.seqtype==1 && com.model && com.NSsites) k *= tree.nnode;
+ if((com.siterates=(double*)malloc(k*sizeof(double)))==NULL) error2("oom1");
+ if((siteorder=(int*)malloc(com.ls*sizeof(int)))==NULL) error2("oom2");
+ }
+
+ if(com.seqtype==AAseq) { /* get aa substitution model and rate matrix */
+ fscanf(fin,"%d",&com.model);
+ printf("\nmodel: %s",aamodels[com.model]);
+ if(com.model>=2) { fscanf(fin,"%s",com.daafile); GetDaa(NULL,com.daa); }
+ fgets(line, lline, fin);
+ }
+
+ /* get freqs com.pi[] */
+ if((com.seqtype==BASEseq && com.model>K80) ||
+ com.seqtype==CODONseq ||
+ (com.seqtype==AAseq && (com.model==1 || com.model==3)))
+ for(k=0; k<com.ncode; k++) fscanf(fin,"%lf", &com.pi[k]);
+ else if(com.model==0 || (com.seqtype==BASEseq && com.model<=K80))
+ fillxc(com.pi, 1./com.ncode, com.ncode);
+
+ printf("sum pi = 1 = %.6f:", sum(com.pi,com.ncode));
+ matout2(F0, com.pi, com.ncode/4, 4, 9, 6);
+ if(com.seqtype==CODONseq) {
+ fscanf(fin, "%d", &com.icode); fgets(line, lline, fin);
+ printf("genetic code = %d\n", com.icode);
+ for(k=0; k<com.ncode; k++)
+ if(GeneticCode[com.icode][k] == -1 && com.pi[k])
+ error2("stop codons should have frequency 0?");
+ }
+
+ if(com.seqtype==BASEseq) {
+ if(com.model<REV) {
+ T=com.pi[0]; C=com.pi[1]; A=com.pi[2]; G=com.pi[3]; Y=T+C; R=A+G;
+ if (com.model==F84) {
+ Qrates[1]=1+Qrates[0]/R; /* kappa2 */
+ Qrates[0]=1+Qrates[0]/Y; /* kappa1 */
+ }
+ else if (com.model<=HKY85) Qrates[1]=Qrates[0];
+ Qfactor = 1/(2*T*C*Qrates[0] + 2*A*G*Qrates[1] + 2*Y*R);
+ }
+ else
+ if(com.model==REV) EigenQbase(Qrates, com.pi, Root,U,V,PMat);
+ }
+
+ /* get Qfactor for NSsites & NSbranchsite models */
+ if(com.seqtype==CODONseq && com.NSsites) {
+ if(!com.model) { /* site models */
+ for(k=0,Qfactor=0; k<com.ncatG; k++) {
+ freqK_NS=com.freqK[k];
+ EigenQcodon(1, com.kappa,com.rK[k],com.pi, NULL,NULL,NULL, PMat);
+ }
+ Qfactor=1/Qfactor;
+ printf("Qfactor for NSsites model = %9.5f\n", Qfactor);
+ }
+ else { /* branch-site models */
+ for(i=0; i<tree.nnode; i++) {
+ if(i==tree.root) { com.QfactorBS[i]=-1; continue; }
+ for(k=0,Qfactor=0; k<com.ncatG; k++) {
+ freqK_NS=com.freqK[k];
+ EigenQcodon(1, com.kappa,com.omegaBS[i*com.ncatG+k],com.pi, NULL,NULL,NULL, PMat);
+ }
+ com.QfactorBS[i]=1/Qfactor; Qfactor=0;
+ printf("node %2d: Qfactor = %9.5f\n", i+1, com.QfactorBS[i]);
+ }
+ }
+ }
+ if(com.seqtype==CODONseq && com.ncatG<=1 && com.model==0)
+ EigenQcodon(0, com.kappa,com.omega, com.pi, Root, U, V, PMat);
+ else if(com.seqtype==AAseq)
+ EigenQaa(com.pi, Root, U, V,PMat);
+
+ puts("\nAll parameters are read. Ready to simulate\n");
+ for(j=0; j<com.ns*2-1; j++)
+ com.z[j] = (unsigned char*)malloc(com.ls*sizeof(unsigned char));
+ sspace = max2(sspace, 8000000);
+ space = (double*)malloc(sspace);
+ if(com.alpha || com.ncatG) tmpseq=(char*)space;
+ if (com.z[com.ns*2-1-1]==NULL) error2("oom for seqs");
+ if (space==NULL) {
+ printf("oom for space, %d bytes needed.", sspace);
+ exit(-1);
+ }
+
+ fseq = gfopen(seqf[format], "w");
+ if(format==2 || format==3) appendfile(fseq, paupstart);
+
+ fanc = (FILE*)gfopen(ancf, "w");
+ if(fixtree) {
+ fputs("\nAncestral sequences generated during simulation ",fanc);
+ fprintf(fanc, "(check against %s)\n", seqf[format]);
+ OutTreeN(fanc,0,0); FPN(fanc); OutTreeB(fanc); FPN(fanc);
+ }
+ if(com.alpha || com.NSsites) {
+ fsiteID=(FILE*)gfopen(siteIDf,"w");
+ if(com.seqtype==1) fprintf(fsiteID, "\nSite class IDs\n");
+ else fprintf(fsiteID, "\nRates for sites\n");
+ if(com.seqtype==CODONseq && com.NSsites) {
+ if(!com.model) matout(fsiteID,com.rK, 1,com.ncatG);
+ if((com.siteID=(char*)malloc(com.ls*sizeof(char)))==NULL)
+ error2("oom siteID");
+ }
+ }
+
+ for (ir=0; ir<nr; ir++) {
+ if (!fixtree) { /* right now tree is fixed */
+ RandomLHistory (rooted, space);
+ if (rooted && com.ns<10) j = GetIofLHistory ();
+ BranchLengthBD (1, birth, death, sample, mut);
+ if(com.ns<20) {
+ printf ("\ntree used: ");
+ OutTreeN(F0,1,1);
+ FPN(F0);
+ }
+ }
+ MakeSeq(com.z[tree.root], com.ls);
+
+ if (com.alpha)
+ Rates4Sites(com.siterates, com.alpha, com.ncatG, com.ls, 0,space);
+ else if(com.seqtype==1 && com.NSsites) { /* for NSsites */
+ /* the table for the alias algorithm is the same, but ncatG is small. */
+ MultiNomialAliasSetTable(com.ncatG, com.freqK, Falias, Lalias, space);
+ MultiNomialAlias(com.ls, com.ncatG, Falias, Lalias, counts);
+
+ for (i=0,h=0; i<com.ncatG; i++)
+ for (j=0; j<counts[i]; j++) {
+ com.siteID[h]=(char)i;
+ com.siterates[h++]=com.rK[i]; /* overwritten later for branchsite */
+ }
+ }
+
+ Evolve1(tree.root);
+
+ /* randomize sites for site-class model */
+ if(com.siterates && com.ncatG>1) {
+ if(format==1 && ir==0)
+ puts("\nrequested site pattern counts as output for site-class model.\n");
+ randorder(siteorder, com.ls, (int*)space);
+ for(j=0; j<tree.nnode; j++) {
+ memcpy(tmpseq,com.z[j],com.ls*sizeof(char));
+ for(h=0; h<com.ls; h++) com.z[j][h]=tmpseq[siteorder[h]];
+ }
+ if(com.alpha || com.ncatG>1) {
+ memcpy(space,com.siterates,com.ls*sizeof(double));
+ for(h=0; h<com.ls; h++) com.siterates[h]=space[siteorder[h]];
+ }
+ if(com.siteID) {
+ memcpy((char*)space,com.siteID,com.ls*sizeof(char));
+ for(h=0; h<com.ls; h++) com.siteID[h]=*((char*)space+siteorder[h]);
+ }
+ }
+
+ /* print sequences*/
+ if(format==1 || format==3) {
+ for(i=0; i<com.ns; i++) for(h=0; h<com.ls; h++) com.z[i][h] ++; /* coded as 1, 2, ... */
+ PatternWeightSimple();
+ for(i=0; i<com.ns; i++) for(h=0; h<com.npatt; h++) com.z[i][h] --; /* coded as 0, 1, ... */
+ if(format==3)
+ PatternWeightJC69like();
+ }
+ if(format==2 || format==3) fprintf(fseq,"\n\n[Replicate # %d]\n", ir+1);
+ printSeqs(fseq, NULL, NULL, format); /* printsma not usable as it codes into 0,1,...,60. */
+
+ if((format==2 || format==3) && !fixtree) {
+ fprintf(fseq,"\nbegin tree;\n tree true_tree = [&U] ");
+ OutTreeN(fseq,1,1); fputs(";\n",fseq);
+ fprintf(fseq,"end;\n\n");
+ }
+ if(format==2 || format==3) appendfile(fseq, paupblock);
+
+ /* print ancestral seqs, rates for sites. */
+ if(format!=1 && format!=3) { /* don't print ancestors if site patterns are printed. */
+ j = (com.seqtype==CODONseq?3*com.ls:com.ls);
+ fprintf(fanc,"[replicate %d]\n",ir+1);
+
+ if(!fixtree) {
+ if(format<2)
+ { OutTreeN(fanc,1,1); FPN(fanc); FPN(fanc); }
+ }
+ else {
+ fprintf(fanc,"%6d %6d\n",tree.nnode-com.ns,j);
+ for(j=com.ns; j<tree.nnode; j++,FPN(fanc)) {
+ fprintf(fanc,"node%-26d ", j+1);
+ print1seq(fanc, com.z[j], com.ls, NULL);
+ }
+ FPN(fanc);
+
+ if(fsiteID) {
+ if(com.seqtype==CODONseq && com.NSsites && com.model==0) { /* site model */
+ k=0;
+ if(com.rK[com.ncatG-1]>1)
+ FOR(h,com.ls) if(com.rK[com.siteID[h]]>1) k++;
+ fprintf(fsiteID, "\n[replicate %d: %2d]\n",ir+1, k);
+ if(k) for(h=0,k=0; h<com.ls; h++) {
+ if(com.rK[com.siteID[h]]>1) {
+ fprintf(fsiteID,"%4d ",h+1);
+ if(++k%15==0) FPN(fsiteID);
+ }
+ }
+ FPN(fsiteID);
+ }
+ else if(com.seqtype==CODONseq && com.NSsites && com.model) { /* branchsite */
+ fprintf(fsiteID, "\n[replicate %d]\n",ir+1);
+ for(h=0; h<com.ls; h++) {
+ fprintf(fsiteID," %4d ", com.siteID[h]+1);
+ if(h==com.ls-1 || (h+1)%15==0) FPN(fsiteID);
+ }
+ }
+ else { /* gamma rates */
+ fprintf(fsiteID,"\n[replicate %d]\n",ir+1);
+ for(h=0; h<com.ls; h++) {
+ fprintf(fsiteID,"%7.4f ",com.siterates[h]);
+ if(h==com.ls-1 || (h+1)%10==0) FPN(fsiteID);
+ }
+ }
+ }
+ }
+ }
+
+ printf ("\rdid data set %d %s", ir+1, (com.ls>100000||nr<100? "\n" : ""));
+ } /* for (ir) */
+ if(format==2 || format==3) appendfile(fseq, paupend);
+
+ fclose(fseq); if(!fixtree) fclose(fanc);
+ if(com.alpha || com.NSsites) fclose(fsiteID);
+ for(j=0; j<com.ns*2-1; j++) free(com.z[j]);
+ free(space);
+ if(com.model && com.NSsites) /* branch-site model */
+ for(i=0; i<tree.nnode; i++) free(nodes[i].nodeStr);
+ free(nodes);
+ if(com.alpha || com.ncatG) {
+ free(com.siterates); com.siterates=NULL;
+ free(siteorder);
+ if(com.siteID) free(com.siteID); com.siteID=NULL;
+ }
+ if(com.seqtype==1 && com.model && com.NSsites) free(com.omegaBS);
+ com.omegaBS = NULL;
+
+ exit (0);
+}
+
+
+int GetSpnamesFromMB (FILE *fmb, char line[], int lline)
+{
+/* This reads species names from MrBayes output file fmb, like the following.
+
+ Taxon 1 -> 1_Arabidopsis_thaliana
+ Taxon 2 -> 2_Taxus_baccata
+*/
+ int j, ispecies;
+ char *p=NULL, *mbstr1="Taxon ", *mbstr2="->";
+
+ puts("Reading species names from mb output file.\n");
+ rewind(fmb);
+ for(ispecies=0; ; ) {
+ if(fgets(line, lline, fmb)==NULL) return(-1);
+ if(strstr(line, mbstr1) && strstr(line, mbstr2)) {
+ p=strstr(line, mbstr1)+5;
+ sscanf(p, "%d", &ispecies);
+ p=strstr(line, mbstr2)+3;
+ if(com.spname[ispecies-1][0])
+ error2("species name already read?");
+
+ for(j=0; isgraph(*p)&&j<lline; ) com.spname[ispecies-1][j++] = *p++;
+ com.spname[ispecies-1][j]=0;
+
+ printf("\tTaxon %2d: %s\n", ispecies, com.spname[ispecies-1]);
+ }
+ else if (ispecies)
+ break;
+ }
+ com.ns=ispecies;
+ rewind(fmb);
+
+ return(0);
+}
+
+char *GrepLine (FILE*fin, char*query, char* line, int lline)
+{
+/* This greps infile to search for query[], and returns NULL or line[].
+*/
+ char *p=NULL;
+
+ rewind(fin);
+ for( ; ; ) {
+ if(fgets(line, lline, fin)==NULL) return(NULL);
+ if(strstr(line, query)) return(line);
+ }
+ return(NULL);
+}
+
+
+void CladeMrBayesProbabilities (char treefile[])
+{
+/* This reads a tree from treefile and then scans a set of MrBayes output files
+ (mbfiles) to retrieve posterior probabilities for every clade in that tree.
+ It first scans the first mb output file to get the species names.
+
+ Sample mb output:
+ 6 -- ...........................************* 8001 1.000 0.005 (0.000)
+ 7 -- ....................******************** 8001 1.000 0.006 (0.000)
+
+ Note 4 Jan 2014: This uses parti2B[], and is broken after i rewrote
+ Tree2Partition().
+*/
+ int lline=100000, i,j,k, nib, inode, parti2B[NS];
+ char line[100000], *partition, *p;
+ char symbol[2]=".*", cladestr[NS+1]={0};
+ FILE *ftree, *fmb[20];
+ double *Pclade, t;
+/*
+ int nmbfiles=15;
+ char *mbfiles[]={"mb-1e-5.out", "mb-2e-5.out", "mb-3e-5.out", "mb-4e-5.out",
+"mb-5e-5.out", "mb-6e-5.out", "mb-7e-5.out", "mb-8e-5.out",
+"mb-9e-5.out", "mb-1e-4.out", "mb-2e-4.out", "mb-3e-4.out",
+"mb-5e-4.out", "mb-1e-3.out", "mb-1e-2.out"};
+*/
+ int nmbfiles=2;
+ char *mbfiles[]={"mb-1e-4.out", "mb-1e-1.out"};
+
+ printf("tree file is %s\nmb output files:\n", treefile);
+ ftree=gfopen(treefile,"r");
+ for(k=0; k<nmbfiles; k++)
+ fmb[k]=gfopen(mbfiles[k],"r");
+ for(k=0; k<nmbfiles; k++) printf("\t%s\n", mbfiles[k]);
+
+ GetSpnamesFromMB(fmb[0], line, lline); /* read species names from mb output */
+
+ fscanf (ftree, "%d%d", &i, &k);
+ if(i && i!=com.ns) error2("do you mean to specify ns in the tree file?");
+ i=(com.ns*2-1)*sizeof(struct TREEN);
+ if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
+ ReadTreeN (ftree, &i, &j, 0, 1);
+
+ FPN(F0); OutTreeN(F0, 0, 0); FPN(F0); FPN(F0);
+ nib=tree.nbranch-com.ns;
+ for(i=0;i<tree.nnode;i++) {
+ nodes[i].nodeStr = NULL;
+ if(i>com.ns) nodes[i].nodeStr=(char*)malloc(100*sizeof(char));
+ }
+
+ partition=(char*)malloc(nib*com.ns*sizeof(char));
+ if (partition==NULL) error2("oom");
+ if((Pclade=(double*)malloc(nib*nmbfiles*sizeof(double)))==NULL)
+ error2("oom");
+ for(i=0;i<nib*nmbfiles; i++) Pclade[i]=0;
+
+ Tree2Partition(partition);
+
+ for(i=0; i<nib; i++) {
+ inode=tree.branches[parti2B[i]][1];
+ if(partition[i*com.ns+0])
+ for(j=0; j<com.ns; j++) cladestr[j]=symbol[1-partition[i*com.ns+j]];
+ else
+ for(j=0; j<com.ns; j++) cladestr[j]=symbol[partition[i*com.ns+j]];
+ printf("#%2d branch %2d node %2d %s", i+1, parti2B[i], inode, cladestr);
+
+ for(k=0; k<nmbfiles; k++) {
+ if(GrepLine(fmb[k], cladestr, line, lline)) {
+ p=strstr(line,cladestr);
+ sscanf(p+com.ns, "%lf%lf\0", &t, &Pclade[i*nmbfiles+k]);
+ }
+ }
+ for(k=0; k<nmbfiles; k++) printf("%6.2f", Pclade[i*nmbfiles+k]);
+ FPN(F0);
+ for(k=0,p=nodes[inode].nodeStr; k<nmbfiles; k++) {
+ sprintf(p, "%3.0f%s", Pclade[i*nmbfiles+k]*100,(k<nmbfiles-1?"/":""));
+ p+=4;
+ }
+ }
+ FPN(F0); OutTreeN(F0,1,PrLabel); FPN(F0);
+
+ for(i=0; i<tree.nnode; i++) free(nodes[i].nodeStr);
+ free(nodes); free(partition); free(Pclade);
+ fclose(ftree);
+ for(k=0; k<nmbfiles; k++) fclose(fmb[k]);
+ exit(0);
+}
diff --git a/src/mcmctree.c b/src/mcmctree.c
index 26cc017..4e47437 100644
--- a/src/mcmctree.c
+++ b/src/mcmctree.c
@@ -21,7 +21,7 @@
#include "paml.h"
-#define NS 800
+#define NS 400
#define NBRANCH (NS*2-2)
#define NNODE (NS*2-1)
#define MAXNSONS 3
@@ -176,6 +176,8 @@ double PMat[16], Cijk[64], Root[4];
double _rateSite=1, OldAge=999;
int debug=0, LASTROUND=0, BayesEB, testlnL=0, NPMat=0; /* no use for this */
+double BFbeta=0;
+
/* for sptree.nodes[].fossil: lower, upper, bounds, gamma, inverse-gamma */
enum {LOWER_F=1, UPPER_F, BOUND_F, GAMMA_F, SKEWN_F, SKEWT_F, S2N_F} FOSSIL_FLAGS;
char *fossils[]={" ", "L", "U", "B", "G", "SN", "ST", "S2N"};
@@ -231,6 +233,7 @@ int main (int argc, char *argv[])
fprintf(fout, "MCMCTREE (%s) %s\n", pamlVerStr, com.seqf);
ReadTreeSeqs(fout);
+
if(data.pfossilerror && (data.pfossilerror[2]<0 || data.pfossilerror[2]>sptree.nfossil))
error2("nMinCorrect for fossil errors is out of range.");
@@ -305,14 +308,14 @@ int GetMem (void)
Memory arrangement if(com.conPSiteClass=1):
ncode*npatt for each node, by node, by iclass, by locus
*/
- int g = data.ngene, g1=g+(data.rgeneprior==0);
+ int g = data.ngene, g1 = g + (data.rgeneprior == 0);
int locus, j, k, s = sptree.nspecies, s1, sG = 1, sfhK = 0;
double *conP, *rates;
/* get mem for conP (internal nodes) */
if(mcmc.usedata==1) {
if(!com.fix_alpha && mcmc.saveconP) {
- com.conPSiteClass=1; sG=com.ncatG;
+ com.conPSiteClass = 1; sG = com.ncatG;
}
data.conP_offset[0] = 0;
for(locus=0,com.sconP=0; locus<g; locus++) {
@@ -703,7 +706,7 @@ double lnpD_locus (int locus)
else if(mcmc.usedata==2)
lnL = lnpD_locus_Approx(locus);
- return (lnL);
+ return (lnL*BFbeta);
}
double lnpData (double lnpDi[])
@@ -1026,9 +1029,9 @@ int GenerateBlengthGH (char infile[])
int GetOptions (char *ctlf)
{
int transform0=ARCSIN_B; /* default transform: SQRT_B, LOG_B, ARCSIN_B */
- int iopt, i, j, nopt=29, lline=4096;
- char line[4096], *pline, *peq, opt[32], *comment="*#";
- char *optstr[] = {"seed", "seqfile","treefile", "outfile", "mcmcfile",
+ int iopt, i, j, nopt=30, lline=4096;
+ char line[4096], *pline, *peq, opt[33], *comment="*#";
+ char *optstr[] = {"seed", "seqfile","treefile", "outfile", "mcmcfile", "BayesFactorBeta",
"seqtype", "aaRatefile", "icode", "noisy", "usedata", "ndata", "model", "clock",
"TipDate", "RootAge", "fossilerror", "alpha", "ncatG", "cleandata",
"BDparas", "kappa_gamma", "alpha_gamma",
@@ -1037,6 +1040,7 @@ int GetOptions (char *ctlf)
double t=1, *eps=mcmc.finetune;
FILE *fctl=gfopen (ctlf, "r");
+ data.rgeneprior = 1; /* default rate prior is gamma-Dirichlet. */
data.transform = transform0;
if (fctl) {
if (noisy) printf ("\nReading options from %s..\n", ctlf);
@@ -1061,25 +1065,26 @@ int GetOptions (char *ctlf)
case ( 2): sscanf(pline+1, "%s", com.treef); break;
case ( 3): sscanf(pline+1, "%s", com.outf); break;
case ( 4): sscanf(pline+1, "%s", com.mcmcf); break;
- case ( 5): com.seqtype=(int)t; break;
- case ( 6): sscanf(pline+2,"%s", com.daafile); break;
- case ( 7): com.icode=(int)t; break;
- case ( 8): noisy=(int)t; break;
- case ( 9):
+ case ( 5): sscanf(pline + 1, "%lf", &BFbeta); /* beta for marginal likelihood */
+ case ( 6): com.seqtype=(int)t; break;
+ case ( 7): sscanf(pline+2,"%s", com.daafile); break;
+ case ( 8): com.icode=(int)t; break;
+ case ( 9): noisy=(int)t; break;
+ case (10):
j=sscanf(pline+1, "%d %s%d", &mcmc.usedata, com.inBVf, &data.transform);
if(mcmc.usedata==2)
if(strchr(com.inBVf, '*')) { strcpy(com.inBVf, "in.BV"); data.transform=transform0; }
else if(j==2) data.transform=transform0;
break;
- case (10): com.ndata=(int)t; break;
- case (11): com.model=(int)t; break;
- case (12): com.clock=(int)t; break;
- case (13):
+ case (11): com.ndata=(int)t; break;
+ case (12): com.model=(int)t; break;
+ case (13): com.clock=(int)t; break;
+ case (14):
sscanf(pline+2, "%lf%lf", &com.TipDate, &com.TipDate_TimeUnit);
if(com.TipDate && com.TipDate_TimeUnit==0) error2("should set com.TipDate_TimeUnit");
data.transform = SQRT_B; /* SQRT_B, LOG_B, ARCSIN_B */
break;
- case (14):
+ case (15):
sptree.RootAge[2] = sptree.RootAge[3] = 0.025; /* default tail probs */
if((strchr(line, '>') || strchr(line, '<')) && (strstr(line, "U(") || strstr(line, "B(")))
error2("don't mix < U B on the RootAge line");
@@ -1094,35 +1099,35 @@ int GetOptions (char *ctlf)
else if((pline=strstr(line, "B(")))
sscanf(pline+2, "%lf,%lf,%lf,%lf", &sptree.RootAge[0], &sptree.RootAge[1], &sptree.RootAge[2], &sptree.RootAge[3]);
break;
- case (15):
+ case (16):
data.pfossilerror[0] = 0.0;
data.pfossilerror[2] = 1; /* default: minimum 2 good fossils */
sscanf(pline+1, "%lf%lf%lf", data.pfossilerror, data.pfossilerror+1, data.pfossilerror+2);
break;
- case (16): com.alpha=t; break;
- case (17): com.ncatG=(int)t; break;
- case (18): com.cleandata=(int)t; break;
- case (19):
+ case (17): com.alpha=t; break;
+ case (18): com.ncatG=(int)t; break;
+ case (19): com.cleandata=(int)t; break;
+ case (20):
sscanf(pline+1,"%lf%lf%lf%lf", &data.BDS[0],&data.BDS[1],&data.BDS[2],&data.BDS[3]);
break;
- case (20):
- sscanf(pline+1,"%lf%lf", data.kappagamma, data.kappagamma+1); break;
case (21):
- sscanf(pline+1,"%lf%lf", data.alphagamma, data.alphagamma+1); break;
+ sscanf(pline+1,"%lf%lf", data.kappagamma, data.kappagamma+1); break;
case (22):
+ sscanf(pline+1,"%lf%lf", data.alphagamma, data.alphagamma+1); break;
+ case (23):
sscanf(pline+1,"%lf%lf%lf%d", &data.rgenepara[0], &data.rgenepara[1], &data.rgenepara[2], &data.rgeneprior);
- if(data.rgenepara[2]<=0) data.rgenepara[2]=1;
- if(data.rgeneprior<0) data.rgeneprior=0;
+ if(data.rgenepara[2]<=0) data.rgenepara[2] = 1;
+ if(data.rgeneprior<0) data.rgeneprior=0;
break;
- case (23):
+ case (24):
sscanf(pline+1,"%lf%lf%lf", data.sigma2para, data.sigma2para+1, data.sigma2para+2);
if(data.sigma2para[2]<=0) data.sigma2para[2]=1;
break;
- case (24): mcmc.print=(int)t; break;
- case (25): mcmc.burnin=(int)t; break;
- case (26): mcmc.sampfreq=(int)t; break;
- case (27): mcmc.nsample=(int)t; break;
- case (28):
+ case (25): mcmc.print=(int)t; break;
+ case (26): mcmc.burnin=(int)t; break;
+ case (27): mcmc.sampfreq=(int)t; break;
+ case (28): mcmc.nsample=(int)t; break;
+ case (29):
puts("finetune is deprecated now.");
break;
sscanf(pline + 1, "%d:%lf%lf%lf%lf%lf%lf", &j, eps, eps + 1, eps + 2, eps + 3, eps + 4, eps + 5);
@@ -1151,6 +1156,12 @@ int GetOptions (char *ctlf)
if(com.alpha==0) { com.fix_alpha=1; com.nalpha=0; }
if(com.clock<1 || com.clock>3) error2("clock should be 1, 2, 3?");
if (mcmc.burnin <= 0) puts("burnin=0: no automatic step adjustment?");
+
+ if (BFbeta && mcmc.usedata ==0)
+ error2("marginal like for prior with usedata =0?");
+ else if (BFbeta==0)
+ BFbeta = 1;
+
return(0);
}
@@ -1346,6 +1357,7 @@ double Infinitesites(FILE *fout)
char *FidedDf[2]={"FixedDsClock1.txt", "FixedDsClock23.txt"};
FILE *fin=gfopen(FidedDf[com.clock>1],"r"), *fmcmc=gfopen(com.mcmcf,"w");
+ if(BFbeta != 1) error2("BFbeta should not be used for Infinitesites?");
com.model=0; com.alpha=0;
mcmc.usedata = 0;
if(data.rgeneprior==0) puts("\aInfiniteSites, not working for cond i.i.d. locus rate prior?");
@@ -2880,7 +2892,7 @@ int UpdateTimes (double *lnL, double finetune[], char accept[])
}
-#if (1) /* this is not used now. */
+#if (0) /* this is not used now. */
int UpdateTimesClock23(double *lnL, double finetune[], char accept[])
{
@@ -3770,7 +3782,7 @@ int MCMC (FILE* fout)
{
FILE *fmcmc = NULL;
int nxpr[2]={6, 2}, i, j, k, ir, g=data.ngene;
- double lnL=0, nround=0, *x, *mx, postEFossil[MaxNFossils]={0};
+ double lnL=0, mlnL=0, nround=0, *x, *mx, postEFossil[MaxNFossils]={0};
double au=data.rgenepara[0], bu=data.rgenepara[1], a=data.rgenepara[2];
char timestr[36];
@@ -3854,6 +3866,7 @@ int MCMC (FILE* fout)
nround = 0;
zero(mcmc.Pjump, mcmc.nfinetune);
zero(mx, com.np);
+ mlnL = 0;
testlnL = 1;
if(fabs(lnL-lnpData(data.lnpDi)) > 0.001) {
printf("\n%12.6f = %12.6f? Resetting lnL\n", lnL, lnpData(data.lnpDi));
@@ -3885,6 +3898,7 @@ int MCMC (FILE* fout)
mcmc.Pjump[j] = (mcmc.Pjump[j]*(nround-1) + mcmc.accept[j])/nround;
if(mcmc.print) collectx(fmcmc, x);
for(j=0; j<com.np; j++) mx[j] = (mx[j]*(nround-1) + x[j])/nround;
+ mlnL = (mlnL*(nround-1)+lnL/BFbeta)/nround;
if(data.pfossilerror[0])
getPfossilerr(postEFossil, nround);
@@ -3892,7 +3906,7 @@ int MCMC (FILE* fout)
if(mcmc.print && ir>=0 && (ir==0 || (ir+1)%mcmc.sampfreq==0)) {
fprintf(fmcmc,"%d", ir+1);
for(j=0;j<com.np; j++) fprintf(fmcmc,"\t%.7f",x[j]);
- if(mcmc.usedata) fprintf(fmcmc,"\t%.3f",lnL);
+ if(mcmc.usedata) fprintf(fmcmc,"\t%.3f", lnL/BFbeta);
FPN(fmcmc);
}
if((ir+1)%max2(mcmc.sampfreq, mcmc.sampfreq*mcmc.nsample/100)==0) {
@@ -3904,7 +3918,7 @@ int MCMC (FILE* fout)
FOR(j,nxpr[0]) printf(" %5.3f", mx[j]);
if(com.np>nxpr[0]+nxpr[1] && nxpr[1]) printf(" -");
FOR(j,nxpr[1]) printf(" %5.3f", mx[com.np-nxpr[1]+j]);
- if(mcmc.usedata) printf(" %4.1f", lnL);
+ if(mcmc.usedata) printf(" %4.1f", mlnL);
}
if(mcmc.sampfreq*mcmc.nsample>20 && (ir+1)%(mcmc.sampfreq*mcmc.nsample/20)==0) {
@@ -3923,6 +3937,7 @@ int MCMC (FILE* fout)
if(mcmc.print) fclose(fmcmc);
+ if(BFbeta!=1) printf("\nBFbeta = %8.6f E_b(lnf(X)) = %9.4f\n", BFbeta, mlnL);
printf("\nTime used: %s", printtime(timestr));
fprintf(fout,"\nTime used: %s", printtime(timestr));
diff --git a/src/paml.h b/src/paml.h
index ce94274..d80a329 100644
--- a/src/paml.h
+++ b/src/paml.h
@@ -51,6 +51,7 @@ double reflect(double x, double a, double b);
#define rndexp(mean) (-(mean)*log(rndu()))
double rnduM0V1 (void);
double rndNormal(void);
+int rndBinomial(int n, double p);
double rndBox(void);
double rndAirplane(void);
double rndStrawhat(void);
@@ -82,8 +83,8 @@ double QuantileChi2 (double prob, double v);
double PDFGamma(double x, double alpha, double beta);
#define CDFGamma(x,alpha,beta) IncompleteGamma((beta)*(x),alpha,LnGamma(alpha))
double logPriorRatioGamma(double xnew, double xold, double a, double b);
-double PDF_InverseGamma(double x, double alpha, double beta);
-#define CDF_InverseGamma(x,alpha,beta) (1-CDFGamma(1/(x),alpha,beta))
+double PDFinvGamma(double x, double alpha, double beta);
+#define CDFinvGamma(x,alpha,beta) (1-CDFGamma(1/(x),alpha,beta))
#define CDFChi2(x,v) CDFGamma(x,(v)/2.0,0.5)
double PDFBeta(double x, double p, double q);
double CDFBeta(double x, double p, double q, double lnbeta);
@@ -118,7 +119,7 @@ double probBinomial (int n, int k, double p);
double probBetaBinomial (int n, int k, double p, double q);
double factorial (int n);
double Binomial(double n, int k, double *scale);
-
+int BinomialK(double alpha, int n, double C[], double S[]);
int GaussLegendreRule(const double **x, const double **w, int order);
int GaussLaguerreRule(const double **x, const double **w, int order);
double NIntegrateGaussLegendre(double(*fun)(double x), double a, double b, int order);
@@ -396,6 +397,6 @@ enum {PrBranch=1, PrNodeNum=2, PrLabel=4, PrNodeStr=8, PrAge=16, PrOmega=32} Out
#define FullSeqNames 0 /* 1: numbers at the beginning of sequence name are part of name */
-#define pamlVerStr "paml version 4.9, March 2015"
+#define pamlVerStr "paml version 4.9d, February 2017"
#endif
diff --git a/src/pamp.c b/src/pamp.c
index 53830fa..de1445f 100644
--- a/src/pamp.c
+++ b/src/pamp.c
@@ -1,645 +1,645 @@
-/* PAMP.c, Copyright, Ziheng Yang, April 1995.
- Specify the sequence type in the file pamp.ctl. Results go into mp.
-
- gcc -o pamp pamp.c tools.o
- pamp <ControlFileName>
-*/
-
-#include "paml.h"
-
-#define NS 2000
-#define NBRANCH (NS*2-2)
-#define NNODE (NS*2-1)
-#define MAXNSONS 10
-#define NGENE 2000
-#define LSPNAME 30
-#define NCODE 20
-#define NCATG 16
-
-double DistanceREV (double Ft[], int n, double alpha, double Root[], double U[],
- double V[], double pi[], double space[], int *cond);
-int PMatBranch (double Ptb[], int n, double branch[],
- double Root[], double U[], double V[], double space[]);
-int PatternLS (FILE *fout, double Ft[],double alpha, double space[], int *cond);
-int testx (double x[], int np);
-int GetOptions (char *ctlf);
-int AlphaMP (FILE* fout);
-int PatternMP (FILE *fout, double Ft[]);
-int PathwayMP1 (FILE *fout, int *maxchange, int NSiteChange[],
- double Ft[], double space[], int job);
-double lfunAlpha_Sullivan (double x);
-double lfunAlpha_YK96 (double x);
-
-struct CommonInfo {
- unsigned char *z[NS];
- char *spname[NS], seqf[256],outf[256],treef[256];
- int seqtype, ns, ls, ngene, posG[NGENE+1],lgene[NGENE],*pose,npatt, readpattern;
- int np, ntime, ncode,fix_kappa,fix_rgene,fix_alpha, clock, model, ncatG, cleandata;
- int print, nhomo;
- double *fpatt, *conP;
- /* not used */
- double lmax,pi[NCODE], kappa,alpha,rou, rgene[NGENE],piG[NGENE][NCODE];
-} com;
-struct TREEB {
- int nbranch, nnode, root, branches[NBRANCH][2];
- double lnL;
-} tree;
-struct TREEN {
- int father, nson, sons[MAXNSONS], ibranch;
- double branch, age, label, *conP;
- char *nodeStr, fossil;
-} *nodes;
-
-
-#define NCATCHANGE 100
-extern int noisy, *ancestor;
-extern double *SeqDistance;
-int maxchange, NSiteChange[NCATCHANGE];
-double MuChange;
-int LASTROUND=0; /* no use for this */
-
-#define LSDISTANCE
-#define REALSEQUENCE
-#define NODESTRUCTURE
-#define RECONSTRUCTION
-#define PAMP
-#include "treesub.c"
-
-int main (int argc, char *argv[])
-{
- FILE *ftree, *fout, *fseq;
- char ctlf[32]="pamp.ctl";
- char *Seqstr[]={"nucleotide", "", "amino-acid", "Binary"};
- int itree, ntree, i, j, s3;
- double *space, *Ft;
-
- com.nhomo=1; com.print=1;
- noisy=2; com.ncatG=8; com.clock=0; com.cleandata=1;
- starttimer();
- GetOptions(ctlf);
- if(argc>1) { strcpy(ctlf, argv[1]); printf("\nctlfile set to %s.\n",ctlf);}
-
- printf("PAMP in %s\n", pamlVerStr);
- if ((fseq=fopen(com.seqf, "r"))==NULL) error2 ("seqfile err.");
- if ((fout=fopen (com.outf, "w"))==NULL) error2("outfile creation err.");
- if((fseq=fopen (com.seqf,"r"))==NULL) error2("No sequence file!");
- ReadSeq (NULL, fseq, com.cleandata, 0);
- SetMapAmbiguity();
- i=(com.ns*2-1)*sizeof(struct TREEN);
- if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
-
- fprintf (fout,"PAMP %15s, %s sequences\n", com.seqf, Seqstr[com.seqtype]);
- if (com.nhomo) fprintf (fout, "nonhomogeneous model\n");
-
- space = (double*)malloc(1000000*sizeof(double)); /* not safe */
- SeqDistance=(double*)malloc(com.ns*(com.ns-1)/2*sizeof(double));
- ancestor=(int*)malloc(com.ns*(com.ns-1)/2*sizeof(int));
- if (SeqDistance==NULL||ancestor==NULL) error2("oom");
-
- i = com.ns*(com.ns-1)/2;
- s3 = sizeof(double)*((com.ns*2-2)*(com.ns*2-2 + 4 + i) + i);
- s3 = max2(s3, com.ncode*com.ncode*(2*com.ns-2+1)*(int)sizeof(double));
-
- Ft = (double*) malloc(s3);
- if (space==NULL || Ft==NULL) error2 ("oom space");
-
- InitializeBaseAA (fout);
- if (com.ngene>1) error2 ("option G not allowed yet");
-
-/*
- PatternLS (fout, Ft, 0., space, &i);
- printf ("\nPairwise estimation of rate matrix done..\n");
- fflush(fout);
-*/
- ftree=gfopen (com.treef,"r");
- fscanf (ftree, "%d%d", &i, &ntree);
- if (i!=com.ns) error2 ("ns in the tree file");
-
- for(itree=0; itree<ntree; itree++) {
-
- printf ("\nTREE # %2d\n", itree+1);
- fprintf (fout,"\nTREE # %2d\n", itree+1);
-
- if (ReadTreeN (ftree, &i,&j, 0, 1)) error2 ("err tree..");
- OutTreeN (F0, 0, 0); FPN (F0);
- OutTreeN (fout, 0, 0); FPN (fout);
-
- for (i=0,maxchange=0; i<NCATCHANGE; i++) NSiteChange[i]=0;
-
- PathwayMP1 (fout, &maxchange, NSiteChange, Ft, space, 0);
- printf ("\nHartigan reconstruction done..\n");
-
- fprintf (fout, "\n\n(1) Branch lengths and substitution pattern\n");
- PatternMP (fout, Ft);
- printf ("pattern done..\n"); fflush(fout);
-
- fprintf (fout, "\n\n(2) Gamma parameter\n");
- AlphaMP (fout);
- printf ("gamma done..\n"); fflush(fout);
-
- fprintf (fout, "\n\n(3) Parsimony reconstructions\n");
- PathwayMP1 (fout, &maxchange, NSiteChange, Ft, space, 1);
- printf ("Yang reconstruction done..\n"); fflush(fout);
- }
- free(nodes);
- return (0);
-}
-
-int GetOptions (char *ctlf)
-{
- int iopt, nopt=6, i, lline=4096, t;
- char line[4096], *pline, opt[32], *comment="*#";
- char *optstr[] = {"seqfile","outfile","treefile", "seqtype", "ncatG", "nhomo"};
- FILE *fctl=gfopen (ctlf, "r");
-
- if (fctl) {
- for (;;) {
- if (fgets (line, lline, fctl) == NULL) break;
- for (i=0,t=0; i<lline&&line[i]; i++)
- if (isalnum(line[i])) { t=1; break; }
- else if (strchr(comment,line[i])) break;
- if (t==0) continue;
- sscanf (line, "%s%*s%d", opt, &t);
- if ((pline=strstr(line, "="))==NULL) error2 ("option file.");
-
- for (iopt=0; iopt<nopt; iopt++) {
- if (strncmp(opt, optstr[iopt], 8)==0) {
- if (noisy>2)
- printf ("\n%3d %15s | %-20s %6d", iopt+1,optstr[iopt],opt,t);
- switch (iopt) {
- case ( 0): sscanf(pline+2, "%s", com.seqf); break;
- case ( 1): sscanf(pline+2, "%s", com.outf); break;
- case ( 2): sscanf(pline+2, "%s", com.treef); break;
- case (3): com.seqtype=t; break;
- case (4): com.ncatG=t; break;
- case (5): com.nhomo=t; break;
- }
- break;
- }
- }
- if (iopt==nopt)
- { printf ("\nopt %s in %s\n", opt, ctlf); exit (-1); }
- }
- fclose (fctl);
- }
- else
- if (noisy) printf ("\nno ctl file..");
-
- if (com.seqtype==0) com.ncode=4;
- else if (com.seqtype==2) com.ncode=20;
- else if (com.seqtype==3) com.ncode=2;
- else error2("seqtype");
- if (com.ncatG>NCATG) error2 ("raise NCATG?");
- return (0);
-}
-
-
-int AlphaMP (FILE* fout)
-{
- int k, ntotal;
- double x, xb[2], lnL, var;
-
- xb[0]=1e-3; xb[1]=99; /* alpha */
-
- fprintf (fout, "\n# changes .. # sites");
- for (k=0,ntotal=0,MuChange=var=0; k<maxchange+1; k++) {
- fprintf (fout, "\n%6d%10d", k, NSiteChange[k]);
- ntotal+=NSiteChange[k]; MuChange+=k*NSiteChange[k];
- var+=k*k*NSiteChange[k];
- }
- MuChange/=ntotal;
- var=(var-MuChange*MuChange*ntotal)/(ntotal-1.);
- x=MuChange*MuChange/(var-MuChange);
- fprintf (fout, "\n\n# sites%6d, total changes%6d\nmean-var%9.4f%9.4f",
- ntotal, (int)(ntotal*MuChange+.5), MuChange, var);
- fprintf (fout, "\nalpha (method of moments)%9.4f", x);
- if (x<=0) x=9;
-
- LineSearch(lfunAlpha_Sullivan, &lnL, &x, xb, 0.02, 1e-8);
- fprintf (fout, "\nalpha (Sullivan et al. 1995)%9.4f\n", x);
-
- MuChange/=tree.nbranch;
- LineSearch(lfunAlpha_YK96, &lnL, &x, xb, 0.02, 1e-8);
- fprintf (fout, "alpha (Yang & Kumar 1995, ncatG= %d)%9.4f\n", com.ncatG,x);
- return (0);
-}
-
-double lfunAlpha_Sullivan (double x)
-{
- int k;
- double lnL=0, a=x, t;
-
- FOR (k, maxchange+1) {
- if (NSiteChange[k]==0) continue;
- t=-a*log(1+MuChange/a);
- if (k)
- t+=LnGamma(k+a)-LnGamma(k+1.) - LnGamma(a)
- + k*log(MuChange/a/(1+MuChange/a));
- lnL += NSiteChange[k]*t;
- }
- return(-lnL);
-}
-
-double lfunAlpha_YK96 (double x)
-{
- int k, ir, b=tree.nbranch, n=com.ncode;
- double lnL=0, prob, a=x, t=MuChange, p;
- double freqK[NCATG], rK[NCATG];
-
- DiscreteGamma (freqK, rK, a, a, com.ncatG, 0);
- FOR (k, maxchange+1) {
- if (NSiteChange[k]==0) continue;
- for (ir=0,prob=0; ir<com.ncatG; ir++) {
- p=1./n+(n-1.)/n*exp(-n/(n-1.)*rK[ir]*t);
- prob+=freqK[ir]*pow(p,(double)(b-k))*pow((1-p)/(n-1.),(double)k);
- }
- lnL += NSiteChange[k]*log(prob);
- }
- return (-lnL);
-}
-
-
-int OutQ (FILE *fout, int n, double Q[], double pi[], double Root[],
- double U[], double V[], double space[])
-{
- char aa3[4]="";
- int i,j;
- double *T1=space, t;
-
- fprintf(fout,"\nrate matrix Q: Qij*dt = prob(i->j; dt)\n");
- if (n<=4) {
-/* matout (fout, pi, 1, n); */
- matout (fout, Q, n, n);
- if (n==4) {
- fprintf (fout, "Order: T, C, A, G");
- t=pi[0]*Q[0*4+1]+pi[1]*Q[1*4+0]+pi[2]*Q[2*4+3]+pi[3]*Q[3*4+2];
- fprintf (fout, "\nAverage Ts/Tv =%9.4f\n", t/(1-t));
- }
- }
- else if (n==20) {
- for (i=0; i<n; i++,FPN(fout))
- FOR (j,n) fprintf (fout, "%6.0f", Q[i*n+j]*100);
-/*
- FOR (i,n) {
- fprintf (fout,"\n%-4s", getAAstr(aa3,i));
- FOR (j,i) fprintf (fout, "%4.0f", Q[i*n+j]/pi[j]*100);
- fprintf (fout, "%4.0f", -Q[i*n+i]*100);
- }
- fputs("\n ",fout); FOR(i,naa) fprintf(fout,"%5s",getAAstr(aa3,i));
-*/
- fprintf (fout, "\n\nPAM matrix, P(0.01)\n");
- FOR (i,n) FOR (j,n) T1[i*n+j]=U[i*n+j]*exp(0.01*Root[j]);
- matby (T1, V, Q, n, n, n);
- FOR (i,n*n) if (Q[i]<0) Q[i]=0;
- FOR (i,n) {
- fprintf (fout,"\n%-4s", getAAstr(aa3,i));
- FOR(j,n) fprintf(fout, "%6.0f", Q[i*n+j]*10000);
- }
- fputs("\n ",fout); FOR(i,n) fprintf(fout,"%5s",getAAstr(aa3,i));
- }
- return (0);
-}
-
-int PMatBranch (double Ptb[], int n, double branch[],
- double Root[], double U[], double V[], double space[])
-{
-/* homogeneised transition prob matrix, with one Q assumed for the whole tree
-*/
- int i, j, k;
- double *T1=space, *P;
-
- FOR (k, tree.nbranch) {
- P=Ptb+k*n*n;
- FOR (i,n) FOR (j,n) T1[i*n+j]=U[i*n+j]*exp(Root[j]*branch[k]);
- matby (T1, V, P, n, n, n);
- FOR (i,n*n) if (P[i]<0) P[i]=0;
-/*
- printf ("\nbranch %d, P(%.5f)", k+1, branch[k]);
- matout (F0, P, n, n);
- testTransP (P, n);
-*/
- }
- return (0);
-}
-
-
-int PatternMP (FILE *fout, double Ft[])
-{
-/* Ft[]: input counts for the F(t) matrix for each branch, output P(t)
-*/
- int n=com.ncode, i,j,k;
- double *Q, *pi, *Root, *U, *V, *branch, *space, *T1, t;
-
- if((Q=(double*)malloc((n*n*6+tree.nbranch)*sizeof(double)))==NULL)
- error2("PathwayMP: oom");
- pi=Q+n*n; Root=pi+n; U=Root+n; V=U+n*n; branch=V+n*n;
- space=T1=branch+tree.nbranch;
-
- for (k=0; k<tree.nbranch; k++) { /* branch lengths */
- xtoy(Ft+k*n*n, Q, n*n);
- branch[k]=nodes[tree.branches[k][1]].branch=
- DistanceREV(Q, n, 0, Root, U, V, pi, space, &j);
- }
- OutTreeB (fout); FPN (fout);
- FOR (i, tree.nbranch) fprintf(fout,"%9.5f", branch[i]);
- fprintf (fout,"\ntree length: %9.5f\n", sum(branch,tree.nbranch));
-
- /* pattern Q from average F(t) */
- fprintf(fout,"\nF(t)");
- xtoy (Ft+tree.nbranch*n*n, Q, n*n);
- matout2 (fout, Q, n, n, 12, 2);
- DistanceREV (Q, n, 0, Root, U, V, pi, space, &j);
- if (noisy>=3&&j==-1) { puts("F(t) modified in DistanceREV"); }
-
- OutQ (fout, n, Q, pi, Root, U, V, T1);
- if (com.nhomo==0)
- PMatBranch (Ft, n, branch, Root, U, V, space);
- else {
- for (k=0; k<tree.nbranch; k++) {
- for (i=0; i<n; i++) {
- t=sum(Ft+k*n*n+i*n, n);
- if (t>1e-5) abyx (1/t, Ft+k*n*n+i*n, n);
- else Ft[k*n*n+i*n+i]=1;
- }
- }
- }
- free(Q);
- return (0);
-}
-
-
-int PathwayMP1 (FILE *fout, int *maxchange, int NSiteChange[],
- double Ft[], double space[], int job)
-{
-/* Hartigan, JA. 1973. Minimum mutation fits to a given tree.
- Biometrics, 29:53-65.
- Yang, Z. 1996.
- job=0: 1st pass: calculates maxchange, NSiteChange[], and Ft[]
- job=1: 2nd pass: reconstructs ancestral character states (->fout)
-*/
- char *pch=(com.seqtype==0?BASEs:(com.seqtype==2?AAs:BINs));
- char *zz[NNODE],nodeb[NNODE],bestPath[NNODE-NS],Equivoc[NS-1];
- int n=com.ncode, nid=tree.nbranch-com.ns+1, it,i1,i2, i,j,k, h, hp,npath;
- int *Ftt=NULL, nchange, nchange0, visit[NS-1]={0};
- double sumpr, bestpr, pr, *pnode=NULL, *pnsite;
-
-
- fputs("\nList of most parsimonious reconstructions (MPRs) at each site: #MPRs (#changes)\n",fout);
- fputs("and then the most likely reconstruction out of the MPRs and its probability\n",fout);
- if((pnsite=(double*)malloc((com.ns-1)*n*sizeof(double)))==NULL)
- error2("PathwayMP1: oom");
-
- PATHWay=(char*)malloc(nid*(n+3)*sizeof(char));
- NCharaCur=PATHWay+nid; ICharaCur=NCharaCur+nid; CharaCur=ICharaCur+nid;
- if (job==0) {
- zero(Ft,n*n*(tree.nbranch+1));
- if((Ftt=(int*)malloc(n*n*tree.nbranch*sizeof(int)))==NULL) error2("oom");
- }
- else {
- pnode=(double*)malloc((nid*com.npatt+1)*(sizeof(double)+sizeof(char)));
- FOR (j,nid) zz[com.ns+j]=(char*)(pnode+nid*com.npatt)+j*com.npatt;
- FOR (j,com.ns) zz[j]=com.z[j];
- if (pnode==NULL) error2 ("oom");
- }
- for (j=0,visit[i=0]=tree.root-com.ns; j<tree.nbranch; j++)
- if (tree.branches[j][1]>=com.ns)
- visit[++i]=tree.branches[j][1]-com.ns;
-
- for (h=0; h<com.ls; h++) {
- hp=com.pose[h];
- if (job==1) {
- fprintf (fout, "\n%4d ", h+1);
- FOR (j, com.ns) fprintf (fout, "%c", pch[com.z[j][hp]]);
- fprintf (fout, ": ");
- FOR (j,nid*n) pnsite[j]=0;
- }
- FOR (j,com.ns) nodeb[j]=com.z[j][hp];
- if (job==0) FOR (j,n*n*tree.nbranch) Ftt[j]=0;
-
- InteriorStatesMP (1, hp, &nchange, NCharaCur, CharaCur, space);
- ICharaCur[j=tree.root-com.ns]=0; PATHWay[j]=CharaCur[j*n+0];
- FOR (j,nid) Equivoc[j]=(NCharaCur[j]>1);
-
- if (nchange>*maxchange) *maxchange=nchange;
- if (nchange>NCATCHANGE-1) error2 ("raise NCATCHANGE");
-
- NSiteChange[nchange]++;
- /* NSiteChange[nchange]+=(int)com.fpatt[hp]; */
-
- DownStates (tree.root);
- for (npath=0,sumpr=bestpr=0; ;) {
- for (j=0,k=visit[nid-1]; j<NCharaCur[k]; j++) {
- PATHWay[k]=CharaCur[k*n+j]; npath++;
- FOR (i,nid) nodeb[i+com.ns]=PATHWay[i];
- if (job==1) {
- FOR (i,nid) fprintf(fout,"%c",pch[PATHWay[i]]); fputc(' ',fout);
- pr=com.pi[(int)nodeb[tree.root]];
- for (i=0; i<tree.nbranch; i++) {
- i1=nodeb[tree.branches[i][0]];
- i2=nodeb[tree.branches[i][1]];
- pr*=Ft[i*n*n+i1*n+i2];
- }
- sumpr+=pr;
- FOR (i,nid) pnsite[i*n+nodeb[i+com.ns]]+=pr;
- if (pr>bestpr)
- { bestpr=pr; FOR(i,nid) bestPath[i]=PATHWay[i];}
- }
- else {
- for (i=0,nchange0=0; i<tree.nbranch; i++) {
- i1=nodeb[tree.branches[i][0]];
- i2=nodeb[tree.branches[i][1]];
- if(i1!=i2) nchange0++;
- Ftt[i*n*n+i1*n+i2]++;
- }
- if (nchange0!=nchange) {
- printf("\a\nerr:PathwayMP %d != %d", nchange, nchange0);
- fprintf(fout,".%d. ", nchange0); /* ??? */
- }
- }
- }
- for (j=nid-2; j>=0; j--) {
- if(Equivoc[k=visit[j]] == 0) continue;
- if (ICharaCur[k]+1<NCharaCur[k]) {
- PATHWay[k] = CharaCur[k*n + (++ICharaCur[k])];
- DownStates (k+com.ns);
- break;
- }
- else { /* if (next equivocal node is not ancestor) update node k */
- for (i=j-1; i>=0; i--) if (Equivoc[(int)visit[i]]) break;
- if (i>=0) {
- for (it=k+com.ns,i=visit[i]+com.ns; ; it=nodes[it].father)
- if (it==tree.root || nodes[it].father==i) break;
- if (it==tree.root)
- DownStatesOneNode (k+com.ns, nodes[k+com.ns].father);
- }
- }
- }
- if (j<0) break;
- } /* for (npath) */
-/*
- printf ("\rsite pattern %4d/%4d: %6d%6d", hp+1,com.npatt,npath,nchange);
-*/
- if (job==0)
- FOR (j,n*n*tree.nbranch) Ft[j]+=(double)Ftt[j]/npath*com.fpatt[hp];
- else {
- FOR (i,nid) zz[com.ns+i][hp]=bestPath[i];
- FOR (i,nid) pnode[i*com.npatt+hp]=pnsite[i*n+bestPath[i]]/sumpr;
- fprintf (fout, " |%4d (%d) | ", npath, nchange);
- if (npath>1) {
- FOR (i,nid) fprintf (fout, "%c", pch[bestPath[i]]);
- fprintf (fout, " (%.3f)", bestpr/sumpr);
-
- }
- }
- } /* for (h) */
- free(PATHWay);
- if (job==0) {
- free(Ftt);
- FOR (i,tree.nbranch) FOR (j,n*n) Ft[tree.nbranch*n*n+j]+=Ft[i*n*n+j];
- }
- else {
- fprintf (fout,"\n\nApprox. relative accuracy at each node, by site\n");
- FOR (h, com.ls) {
- hp=com.pose[h];
- fprintf (fout,"\n%4d ", h+1);
- FOR (j, com.ns) fprintf (fout, "%c", pch[com.z[j][hp]]);
- fprintf (fout, ": ");
- FOR (i,nid) if (pnode[i*com.npatt+hp]<.99999) break;
- if (i<nid) FOR (j, nid)
- fprintf(fout,"%c (%5.3f) ", pch[zz[j][hp]],pnode[j*com.npatt+hp]);
- }
- /* Site2Pattern (fout); */
- fprintf (fout,"\n\nlist of extant and reconstructed sequences\n\n");
- for(j=0;j<tree.nnode;j++,FPN(fout)) {
- if(j<com.ns) fprintf(fout,"%-20s", com.spname[j]);
- else fprintf(fout,"node #%-14d", j+1);
- print1seq (fout, zz[j], (com.readpattern?com.npatt:com.ls), com.pose);
- }
- free(pnode);
- }
- free(pnsite);
- return (0);
-}
-
-double DistanceREV (double Ft[], int n,double alpha,double Root[],double U[],
- double V[], double pi[], double space[], int *cond)
-{
-/* input: Ft, n, alpha
- output: Q(in Ft), t, Root, U, V, and cond
- space[n*n*2]
-*/
- int i,j, InApplicable;
- double *Q=Ft, *T1=space, *T2=space+n*n, t, pi_sqrt[20], small=0.1/com.ls;
-
- for (i=0,t=0; i<n; i++) FOR (j,n) if (i-j) t+=Q[i*n+j];
- if (t<small) { *cond=1; zero(Q,n*n); return (0); }
-
- for(i=0;i<n;i++) for (j=0;j<i;j++)
- Q[i*n+j]=Q[j*n+i]=(Q[i*n+j]+Q[j*n+i])/2;
-
- abyx(1./sum(Q,n*n), Q, n*n);
- for(i=0;i<n;i++) {
- pi[i]=sum(Q+i*n, n);
- if(pi[i]>small)
- abyx(1/pi[i], Q+i*n, n);
- }
-
- eigenQREV(Q, pi, n, Root, U, V, pi_sqrt);
- for(i=0,InApplicable=0; i<n; i++) {
- if (Root[i]<=0) {
- InApplicable=1;
- Root[i]=-300; /* adhockery */
- }
- else
- Root[i]=(alpha<=0?log(Root[i]):gammap(Root[i],alpha));
- }
- FOR (i,n) FOR (j,n) T1[i*n+j]=U[i*n+j]*Root[j];
- matby (T1, V, Q, n, n, n);
- for (i=0,t=0; i<n; i++) t-=pi[i]*Q[i*n+i];
-
- if(noisy>=9 && InApplicable) printf("Root(P)<0. adhockery invoked\n");
- if(t<=0) error2("err: DistanceREV");
-
- FOR (i,n) Root[i]/=t;
- FOR (i, n) FOR (j,n) { Q[i*n+j]/=t; if (i-j) Q[i*n+j]=max2(0,Q[i*n+j]); }
-
- return (t);
-}
-
-
-int PatternLS (FILE *fout, double Ft[], double alpha,double space[],int *cond)
-{
-/* space[n*n*2]
-*/
- int n=com.ncode, i,j,k,h, it;
- double *Q=Ft,*Qt=Q+n*n,*Qm=Qt+n*n;
- double *pi,*Root,*U, *V, *T1=space, *branch, t;
- FILE *fdist=gfopen("Distance", "w");
-
- if((pi=(double*)malloc((n*n*3+tree.nbranch)*sizeof(double)))==NULL)
- error2("PatternLS: oom");
- Root=pi+n; U=Root+n; V=U+n*n; branch=V+n*n;
-
- *cond=0;
- for (i=0,zero(Qt,n*n),zero(Qm,n*n); i<com.ns; i++) {
- for (j=0; j<i; j++) {
- for (h=0,zero(Q,n*n); h<com.npatt; h++) {
- Q[(com.z[i][h])*n+com.z[j][h]] += com.fpatt[h]/2;
- Q[(com.z[j][h])*n+com.z[i][h]] += com.fpatt[h]/2;
- }
- FOR (k,n*n) Qt[k]+=Q[k]/(com.ns*(com.ns-1)/2);
- it=i*(i-1)/2+j;
- SeqDistance[it]=DistanceREV (Q, n, alpha, Root,U,V, pi, space, &k);
-
- if (k==-1) {
- *cond=-1; printf("\n%d&%d: F(t) modified in DistanceREV",i+1,j+1);
- }
-
- fprintf(fdist,"%9.5f",SeqDistance[it]);
-/*
-FOR (k,n)
-if (Q[k*n+k]>0) { printf ("%d %d %.5f\n", i+1, j+1, Q[k*n+k]); }
-*/
- FOR (k,n*n) Qm[k]+=Q[k]/(com.ns*(com.ns-1)/2);
- }
- FPN(fdist);
- }
- fclose (fdist);
- DistanceREV (Qt, n, alpha, Root, U, V, pi, space, &k);
- if (k==-1) { puts ("F(t) modified in DistanceREV"); }
-
- fprintf (fout, "\n\nQ: from average F over pairwise comparisons");
- OutQ(fout, n, Qt, pi, Root, U, V, T1);
- fprintf (fout, "\n\nQ: average of Qs over pairwise comparisons\n");
- fprintf (fout, "(disregard this if very different from the previous Q)");
- OutQ (fout, n, Qm, pi, Root, U, V, T1);
-
- if (tree.nbranch) {
- fillxc (branch, 0.1, tree.nbranch);
- LSDistance (&t, branch, testx);
- OutTreeB (fout); FPN (fout);
- FOR (i,tree.nbranch) fprintf(fout,"%9.5f", branch[i]);
- PMatBranch (Ft, com.ncode, branch, Root, U, V, space);
- }
- free(pi);
- return (0);
-}
-
-int testx (double x[], int np)
-{
- int i;
- double tb[]={1e-5, 99};
- FOR(i,np) if(x[i]<tb[0] ||x[i]>tb[1]) return(-1);
- return(0);
-}
-
-int SetBranch (double x[])
-{
- int i, status=0;
- double small=1e-5;
-
- FOR (i,tree.nnode)
- if (i!=tree.root && (nodes[i].branch=x[nodes[i].ibranch])<-small)
- status=-1;
- return (status);
-}
+/* PAMP.c, Copyright, Ziheng Yang, April 1995.
+ Specify the sequence type in the file pamp.ctl. Results go into mp.
+
+ gcc -o pamp pamp.c tools.o
+ pamp <ControlFileName>
+*/
+
+#include "paml.h"
+
+#define NS 2000
+#define NBRANCH (NS*2-2)
+#define NNODE (NS*2-1)
+#define MAXNSONS 10
+#define NGENE 2000
+#define LSPNAME 30
+#define NCODE 20
+#define NCATG 16
+
+double DistanceREV (double Ft[], int n, double alpha, double Root[], double U[],
+ double V[], double pi[], double space[], int *cond);
+int PMatBranch (double Ptb[], int n, double branch[],
+ double Root[], double U[], double V[], double space[]);
+int PatternLS (FILE *fout, double Ft[],double alpha, double space[], int *cond);
+int testx (double x[], int np);
+int GetOptions (char *ctlf);
+int AlphaMP (FILE* fout);
+int PatternMP (FILE *fout, double Ft[]);
+int PathwayMP1 (FILE *fout, int *maxchange, int NSiteChange[],
+ double Ft[], double space[], int job);
+double lfunAlpha_Sullivan (double x);
+double lfunAlpha_YK96 (double x);
+
+struct CommonInfo {
+ unsigned char *z[NS];
+ char *spname[NS], seqf[256],outf[256],treef[256];
+ int seqtype, ns, ls, ngene, posG[NGENE+1],lgene[NGENE],*pose,npatt, readpattern;
+ int np, ntime, ncode,fix_kappa,fix_rgene,fix_alpha, clock, model, ncatG, cleandata;
+ int print, nhomo;
+ double *fpatt, *conP;
+ /* not used */
+ double lmax,pi[NCODE], kappa,alpha,rou, rgene[NGENE],piG[NGENE][NCODE];
+} com;
+struct TREEB {
+ int nbranch, nnode, root, branches[NBRANCH][2];
+ double lnL;
+} tree;
+struct TREEN {
+ int father, nson, sons[MAXNSONS], ibranch;
+ double branch, age, label, *conP;
+ char *nodeStr, fossil;
+} *nodes;
+
+
+#define NCATCHANGE 100
+extern int noisy, *ancestor;
+extern double *SeqDistance;
+int maxchange, NSiteChange[NCATCHANGE];
+double MuChange;
+int LASTROUND=0; /* no use for this */
+
+#define LSDISTANCE
+#define REALSEQUENCE
+#define NODESTRUCTURE
+#define RECONSTRUCTION
+#define PAMP
+#include "treesub.c"
+
+int main (int argc, char *argv[])
+{
+ FILE *ftree, *fout, *fseq;
+ char ctlf[32]="pamp.ctl";
+ char *Seqstr[]={"nucleotide", "", "amino-acid", "Binary"};
+ int itree, ntree, i, j, s3;
+ double *space, *Ft;
+
+ com.nhomo=1; com.print=1;
+ noisy=2; com.ncatG=8; com.clock=0; com.cleandata=1;
+ starttimer();
+ GetOptions(ctlf);
+ if(argc>1) { strcpy(ctlf, argv[1]); printf("\nctlfile set to %s.\n",ctlf);}
+
+ printf("PAMP in %s\n", pamlVerStr);
+ if ((fseq=fopen(com.seqf, "r"))==NULL) error2 ("seqfile err.");
+ if ((fout=fopen (com.outf, "w"))==NULL) error2("outfile creation err.");
+ if((fseq=fopen (com.seqf,"r"))==NULL) error2("No sequence file!");
+ ReadSeq (NULL, fseq, com.cleandata, 0);
+ SetMapAmbiguity();
+ i=(com.ns*2-1)*sizeof(struct TREEN);
+ if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
+
+ fprintf (fout,"PAMP %15s, %s sequences\n", com.seqf, Seqstr[com.seqtype]);
+ if (com.nhomo) fprintf (fout, "nonhomogeneous model\n");
+
+ space = (double*)malloc(1000000*sizeof(double)); /* not safe */
+ SeqDistance=(double*)malloc(com.ns*(com.ns-1)/2*sizeof(double));
+ ancestor=(int*)malloc(com.ns*(com.ns-1)/2*sizeof(int));
+ if (SeqDistance==NULL||ancestor==NULL) error2("oom");
+
+ i = com.ns*(com.ns-1)/2;
+ s3 = sizeof(double)*((com.ns*2-2)*(com.ns*2-2 + 4 + i) + i);
+ s3 = max2(s3, com.ncode*com.ncode*(2*com.ns-2+1)*(int)sizeof(double));
+
+ Ft = (double*) malloc(s3);
+ if (space==NULL || Ft==NULL) error2 ("oom space");
+
+ InitializeBaseAA (fout);
+ if (com.ngene>1) error2 ("option G not allowed yet");
+
+/*
+ PatternLS (fout, Ft, 0., space, &i);
+ printf ("\nPairwise estimation of rate matrix done..\n");
+ fflush(fout);
+*/
+ ftree=gfopen (com.treef,"r");
+ fscanf (ftree, "%d%d", &i, &ntree);
+ if (i!=com.ns) error2 ("ns in the tree file");
+
+ for(itree=0; itree<ntree; itree++) {
+
+ printf ("\nTREE # %2d\n", itree+1);
+ fprintf (fout,"\nTREE # %2d\n", itree+1);
+
+ if (ReadTreeN (ftree, &i,&j, 0, 1)) error2 ("err tree..");
+ OutTreeN (F0, 0, 0); FPN (F0);
+ OutTreeN (fout, 0, 0); FPN (fout);
+
+ for (i=0,maxchange=0; i<NCATCHANGE; i++) NSiteChange[i]=0;
+
+ PathwayMP1 (fout, &maxchange, NSiteChange, Ft, space, 0);
+ printf ("\nHartigan reconstruction done..\n");
+
+ fprintf (fout, "\n\n(1) Branch lengths and substitution pattern\n");
+ PatternMP (fout, Ft);
+ printf ("pattern done..\n"); fflush(fout);
+
+ fprintf (fout, "\n\n(2) Gamma parameter\n");
+ AlphaMP (fout);
+ printf ("gamma done..\n"); fflush(fout);
+
+ fprintf (fout, "\n\n(3) Parsimony reconstructions\n");
+ PathwayMP1 (fout, &maxchange, NSiteChange, Ft, space, 1);
+ printf ("Yang reconstruction done..\n"); fflush(fout);
+ }
+ free(nodes);
+ return (0);
+}
+
+int GetOptions (char *ctlf)
+{
+ int iopt, nopt=6, i, lline=4096, t;
+ char line[4096], *pline, opt[32], *comment="*#";
+ char *optstr[] = {"seqfile","outfile","treefile", "seqtype", "ncatG", "nhomo"};
+ FILE *fctl=gfopen (ctlf, "r");
+
+ if (fctl) {
+ for (;;) {
+ if (fgets (line, lline, fctl) == NULL) break;
+ for (i=0,t=0; i<lline&&line[i]; i++)
+ if (isalnum(line[i])) { t=1; break; }
+ else if (strchr(comment,line[i])) break;
+ if (t==0) continue;
+ sscanf (line, "%s%*s%d", opt, &t);
+ if ((pline=strstr(line, "="))==NULL) error2 ("option file.");
+
+ for (iopt=0; iopt<nopt; iopt++) {
+ if (strncmp(opt, optstr[iopt], 8)==0) {
+ if (noisy>2)
+ printf ("\n%3d %15s | %-20s %6d", iopt+1,optstr[iopt],opt,t);
+ switch (iopt) {
+ case ( 0): sscanf(pline+2, "%s", com.seqf); break;
+ case ( 1): sscanf(pline+2, "%s", com.outf); break;
+ case ( 2): sscanf(pline+2, "%s", com.treef); break;
+ case (3): com.seqtype=t; break;
+ case (4): com.ncatG=t; break;
+ case (5): com.nhomo=t; break;
+ }
+ break;
+ }
+ }
+ if (iopt==nopt)
+ { printf ("\nopt %s in %s\n", opt, ctlf); exit (-1); }
+ }
+ fclose (fctl);
+ }
+ else
+ if (noisy) printf ("\nno ctl file..");
+
+ if (com.seqtype==0) com.ncode=4;
+ else if (com.seqtype==2) com.ncode=20;
+ else if (com.seqtype==3) com.ncode=2;
+ else error2("seqtype");
+ if (com.ncatG>NCATG) error2 ("raise NCATG?");
+ return (0);
+}
+
+
+int AlphaMP (FILE* fout)
+{
+ int k, ntotal;
+ double x, xb[2], lnL, var;
+
+ xb[0]=1e-3; xb[1]=99; /* alpha */
+
+ fprintf (fout, "\n# changes .. # sites");
+ for (k=0,ntotal=0,MuChange=var=0; k<maxchange+1; k++) {
+ fprintf (fout, "\n%6d%10d", k, NSiteChange[k]);
+ ntotal+=NSiteChange[k]; MuChange+=k*NSiteChange[k];
+ var+=k*k*NSiteChange[k];
+ }
+ MuChange/=ntotal;
+ var=(var-MuChange*MuChange*ntotal)/(ntotal-1.);
+ x=MuChange*MuChange/(var-MuChange);
+ fprintf (fout, "\n\n# sites%6d, total changes%6d\nmean-var%9.4f%9.4f",
+ ntotal, (int)(ntotal*MuChange+.5), MuChange, var);
+ fprintf (fout, "\nalpha (method of moments)%9.4f", x);
+ if (x<=0) x=9;
+
+ LineSearch(lfunAlpha_Sullivan, &lnL, &x, xb, 0.02, 1e-8);
+ fprintf (fout, "\nalpha (Sullivan et al. 1995)%9.4f\n", x);
+
+ MuChange/=tree.nbranch;
+ LineSearch(lfunAlpha_YK96, &lnL, &x, xb, 0.02, 1e-8);
+ fprintf (fout, "alpha (Yang & Kumar 1995, ncatG= %d)%9.4f\n", com.ncatG,x);
+ return (0);
+}
+
+double lfunAlpha_Sullivan (double x)
+{
+ int k;
+ double lnL=0, a=x, t;
+
+ FOR (k, maxchange+1) {
+ if (NSiteChange[k]==0) continue;
+ t=-a*log(1+MuChange/a);
+ if (k)
+ t+=LnGamma(k+a)-LnGamma(k+1.) - LnGamma(a)
+ + k*log(MuChange/a/(1+MuChange/a));
+ lnL += NSiteChange[k]*t;
+ }
+ return(-lnL);
+}
+
+double lfunAlpha_YK96 (double x)
+{
+ int k, ir, b=tree.nbranch, n=com.ncode;
+ double lnL=0, prob, a=x, t=MuChange, p;
+ double freqK[NCATG], rK[NCATG];
+
+ DiscreteGamma (freqK, rK, a, a, com.ncatG, 0);
+ FOR (k, maxchange+1) {
+ if (NSiteChange[k]==0) continue;
+ for (ir=0,prob=0; ir<com.ncatG; ir++) {
+ p=1./n+(n-1.)/n*exp(-n/(n-1.)*rK[ir]*t);
+ prob+=freqK[ir]*pow(p,(double)(b-k))*pow((1-p)/(n-1.),(double)k);
+ }
+ lnL += NSiteChange[k]*log(prob);
+ }
+ return (-lnL);
+}
+
+
+int OutQ (FILE *fout, int n, double Q[], double pi[], double Root[],
+ double U[], double V[], double space[])
+{
+ char aa3[4]="";
+ int i,j;
+ double *T1=space, t;
+
+ fprintf(fout,"\nrate matrix Q: Qij*dt = prob(i->j; dt)\n");
+ if (n<=4) {
+/* matout (fout, pi, 1, n); */
+ matout (fout, Q, n, n);
+ if (n==4) {
+ fprintf (fout, "Order: T, C, A, G");
+ t=pi[0]*Q[0*4+1]+pi[1]*Q[1*4+0]+pi[2]*Q[2*4+3]+pi[3]*Q[3*4+2];
+ fprintf (fout, "\nAverage Ts/Tv =%9.4f\n", t/(1-t));
+ }
+ }
+ else if (n==20) {
+ for (i=0; i<n; i++,FPN(fout))
+ FOR (j,n) fprintf (fout, "%6.0f", Q[i*n+j]*100);
+/*
+ FOR (i,n) {
+ fprintf (fout,"\n%-4s", getAAstr(aa3,i));
+ FOR (j,i) fprintf (fout, "%4.0f", Q[i*n+j]/pi[j]*100);
+ fprintf (fout, "%4.0f", -Q[i*n+i]*100);
+ }
+ fputs("\n ",fout); FOR(i,naa) fprintf(fout,"%5s",getAAstr(aa3,i));
+*/
+ fprintf (fout, "\n\nPAM matrix, P(0.01)\n");
+ FOR (i,n) FOR (j,n) T1[i*n+j]=U[i*n+j]*exp(0.01*Root[j]);
+ matby (T1, V, Q, n, n, n);
+ FOR (i,n*n) if (Q[i]<0) Q[i]=0;
+ FOR (i,n) {
+ fprintf (fout,"\n%-4s", getAAstr(aa3,i));
+ FOR(j,n) fprintf(fout, "%6.0f", Q[i*n+j]*10000);
+ }
+ fputs("\n ",fout); FOR(i,n) fprintf(fout,"%5s",getAAstr(aa3,i));
+ }
+ return (0);
+}
+
+int PMatBranch (double Ptb[], int n, double branch[],
+ double Root[], double U[], double V[], double space[])
+{
+/* homogeneised transition prob matrix, with one Q assumed for the whole tree
+*/
+ int i, j, k;
+ double *T1=space, *P;
+
+ FOR (k, tree.nbranch) {
+ P=Ptb+k*n*n;
+ FOR (i,n) FOR (j,n) T1[i*n+j]=U[i*n+j]*exp(Root[j]*branch[k]);
+ matby (T1, V, P, n, n, n);
+ FOR (i,n*n) if (P[i]<0) P[i]=0;
+/*
+ printf ("\nbranch %d, P(%.5f)", k+1, branch[k]);
+ matout (F0, P, n, n);
+ testTransP (P, n);
+*/
+ }
+ return (0);
+}
+
+
+int PatternMP (FILE *fout, double Ft[])
+{
+/* Ft[]: input counts for the F(t) matrix for each branch, output P(t)
+*/
+ int n=com.ncode, i,j,k;
+ double *Q, *pi, *Root, *U, *V, *branch, *space, *T1, t;
+
+ if((Q=(double*)malloc((n*n*6+tree.nbranch)*sizeof(double)))==NULL)
+ error2("PathwayMP: oom");
+ pi=Q+n*n; Root=pi+n; U=Root+n; V=U+n*n; branch=V+n*n;
+ space=T1=branch+tree.nbranch;
+
+ for (k=0; k<tree.nbranch; k++) { /* branch lengths */
+ xtoy(Ft+k*n*n, Q, n*n);
+ branch[k]=nodes[tree.branches[k][1]].branch=
+ DistanceREV(Q, n, 0, Root, U, V, pi, space, &j);
+ }
+ OutTreeB (fout); FPN (fout);
+ FOR (i, tree.nbranch) fprintf(fout,"%9.5f", branch[i]);
+ fprintf (fout,"\ntree length: %9.5f\n", sum(branch,tree.nbranch));
+
+ /* pattern Q from average F(t) */
+ fprintf(fout,"\nF(t)");
+ xtoy (Ft+tree.nbranch*n*n, Q, n*n);
+ matout2 (fout, Q, n, n, 12, 2);
+ DistanceREV (Q, n, 0, Root, U, V, pi, space, &j);
+ if (noisy>=3&&j==-1) { puts("F(t) modified in DistanceREV"); }
+
+ OutQ (fout, n, Q, pi, Root, U, V, T1);
+ if (com.nhomo==0)
+ PMatBranch (Ft, n, branch, Root, U, V, space);
+ else {
+ for (k=0; k<tree.nbranch; k++) {
+ for (i=0; i<n; i++) {
+ t=sum(Ft+k*n*n+i*n, n);
+ if (t>1e-5) abyx (1/t, Ft+k*n*n+i*n, n);
+ else Ft[k*n*n+i*n+i]=1;
+ }
+ }
+ }
+ free(Q);
+ return (0);
+}
+
+
+int PathwayMP1 (FILE *fout, int *maxchange, int NSiteChange[],
+ double Ft[], double space[], int job)
+{
+/* Hartigan, JA. 1973. Minimum mutation fits to a given tree.
+ Biometrics, 29:53-65.
+ Yang, Z. 1996.
+ job=0: 1st pass: calculates maxchange, NSiteChange[], and Ft[]
+ job=1: 2nd pass: reconstructs ancestral character states (->fout)
+*/
+ char *pch=(com.seqtype==0?BASEs:(com.seqtype==2?AAs:BINs));
+ char *zz[NNODE],nodeb[NNODE],bestPath[NNODE-NS],Equivoc[NS-1];
+ int n=com.ncode, nid=tree.nbranch-com.ns+1, it,i1,i2, i,j,k, h, hp,npath;
+ int *Ftt=NULL, nchange, nchange0, visit[NS-1]={0};
+ double sumpr, bestpr, pr, *pnode=NULL, *pnsite;
+
+
+ fputs("\nList of most parsimonious reconstructions (MPRs) at each site: #MPRs (#changes)\n",fout);
+ fputs("and then the most likely reconstruction out of the MPRs and its probability\n",fout);
+ if((pnsite=(double*)malloc((com.ns-1)*n*sizeof(double)))==NULL)
+ error2("PathwayMP1: oom");
+
+ PATHWay=(char*)malloc(nid*(n+3)*sizeof(char));
+ NCharaCur=PATHWay+nid; ICharaCur=NCharaCur+nid; CharaCur=ICharaCur+nid;
+ if (job==0) {
+ zero(Ft,n*n*(tree.nbranch+1));
+ if((Ftt=(int*)malloc(n*n*tree.nbranch*sizeof(int)))==NULL) error2("oom");
+ }
+ else {
+ pnode=(double*)malloc((nid*com.npatt+1)*(sizeof(double)+sizeof(char)));
+ FOR (j,nid) zz[com.ns+j]=(char*)(pnode+nid*com.npatt)+j*com.npatt;
+ FOR (j,com.ns) zz[j]=com.z[j];
+ if (pnode==NULL) error2 ("oom");
+ }
+ for (j=0,visit[i=0]=tree.root-com.ns; j<tree.nbranch; j++)
+ if (tree.branches[j][1]>=com.ns)
+ visit[++i]=tree.branches[j][1]-com.ns;
+
+ for (h=0; h<com.ls; h++) {
+ hp=com.pose[h];
+ if (job==1) {
+ fprintf (fout, "\n%4d ", h+1);
+ FOR (j, com.ns) fprintf (fout, "%c", pch[com.z[j][hp]]);
+ fprintf (fout, ": ");
+ FOR (j,nid*n) pnsite[j]=0;
+ }
+ FOR (j,com.ns) nodeb[j]=com.z[j][hp];
+ if (job==0) FOR (j,n*n*tree.nbranch) Ftt[j]=0;
+
+ InteriorStatesMP (1, hp, &nchange, NCharaCur, CharaCur, space);
+ ICharaCur[j=tree.root-com.ns]=0; PATHWay[j]=CharaCur[j*n+0];
+ FOR (j,nid) Equivoc[j]=(NCharaCur[j]>1);
+
+ if (nchange>*maxchange) *maxchange=nchange;
+ if (nchange>NCATCHANGE-1) error2 ("raise NCATCHANGE");
+
+ NSiteChange[nchange]++;
+ /* NSiteChange[nchange]+=(int)com.fpatt[hp]; */
+
+ DownStates (tree.root);
+ for (npath=0,sumpr=bestpr=0; ;) {
+ for (j=0,k=visit[nid-1]; j<NCharaCur[k]; j++) {
+ PATHWay[k]=CharaCur[k*n+j]; npath++;
+ FOR (i,nid) nodeb[i+com.ns]=PATHWay[i];
+ if (job==1) {
+ FOR (i,nid) fprintf(fout,"%c",pch[PATHWay[i]]); fputc(' ',fout);
+ pr=com.pi[(int)nodeb[tree.root]];
+ for (i=0; i<tree.nbranch; i++) {
+ i1=nodeb[tree.branches[i][0]];
+ i2=nodeb[tree.branches[i][1]];
+ pr*=Ft[i*n*n+i1*n+i2];
+ }
+ sumpr+=pr;
+ FOR (i,nid) pnsite[i*n+nodeb[i+com.ns]]+=pr;
+ if (pr>bestpr)
+ { bestpr=pr; FOR(i,nid) bestPath[i]=PATHWay[i];}
+ }
+ else {
+ for (i=0,nchange0=0; i<tree.nbranch; i++) {
+ i1=nodeb[tree.branches[i][0]];
+ i2=nodeb[tree.branches[i][1]];
+ if(i1!=i2) nchange0++;
+ Ftt[i*n*n+i1*n+i2]++;
+ }
+ if (nchange0!=nchange) {
+ printf("\a\nerr:PathwayMP %d != %d", nchange, nchange0);
+ fprintf(fout,".%d. ", nchange0); /* ??? */
+ }
+ }
+ }
+ for (j=nid-2; j>=0; j--) {
+ if(Equivoc[k=visit[j]] == 0) continue;
+ if (ICharaCur[k]+1<NCharaCur[k]) {
+ PATHWay[k] = CharaCur[k*n + (++ICharaCur[k])];
+ DownStates (k+com.ns);
+ break;
+ }
+ else { /* if (next equivocal node is not ancestor) update node k */
+ for (i=j-1; i>=0; i--) if (Equivoc[(int)visit[i]]) break;
+ if (i>=0) {
+ for (it=k+com.ns,i=visit[i]+com.ns; ; it=nodes[it].father)
+ if (it==tree.root || nodes[it].father==i) break;
+ if (it==tree.root)
+ DownStatesOneNode (k+com.ns, nodes[k+com.ns].father);
+ }
+ }
+ }
+ if (j<0) break;
+ } /* for (npath) */
+/*
+ printf ("\rsite pattern %4d/%4d: %6d%6d", hp+1,com.npatt,npath,nchange);
+*/
+ if (job==0)
+ FOR (j,n*n*tree.nbranch) Ft[j]+=(double)Ftt[j]/npath*com.fpatt[hp];
+ else {
+ FOR (i,nid) zz[com.ns+i][hp]=bestPath[i];
+ FOR (i,nid) pnode[i*com.npatt+hp]=pnsite[i*n+bestPath[i]]/sumpr;
+ fprintf (fout, " |%4d (%d) | ", npath, nchange);
+ if (npath>1) {
+ FOR (i,nid) fprintf (fout, "%c", pch[bestPath[i]]);
+ fprintf (fout, " (%.3f)", bestpr/sumpr);
+
+ }
+ }
+ } /* for (h) */
+ free(PATHWay);
+ if (job==0) {
+ free(Ftt);
+ FOR (i,tree.nbranch) FOR (j,n*n) Ft[tree.nbranch*n*n+j]+=Ft[i*n*n+j];
+ }
+ else {
+ fprintf (fout,"\n\nApprox. relative accuracy at each node, by site\n");
+ FOR (h, com.ls) {
+ hp=com.pose[h];
+ fprintf (fout,"\n%4d ", h+1);
+ FOR (j, com.ns) fprintf (fout, "%c", pch[com.z[j][hp]]);
+ fprintf (fout, ": ");
+ FOR (i,nid) if (pnode[i*com.npatt+hp]<.99999) break;
+ if (i<nid) FOR (j, nid)
+ fprintf(fout,"%c (%5.3f) ", pch[zz[j][hp]],pnode[j*com.npatt+hp]);
+ }
+ /* Site2Pattern (fout); */
+ fprintf (fout,"\n\nlist of extant and reconstructed sequences\n\n");
+ for(j=0;j<tree.nnode;j++,FPN(fout)) {
+ if(j<com.ns) fprintf(fout,"%-20s", com.spname[j]);
+ else fprintf(fout,"node #%-14d", j+1);
+ print1seq (fout, zz[j], (com.readpattern?com.npatt:com.ls), com.pose);
+ }
+ free(pnode);
+ }
+ free(pnsite);
+ return (0);
+}
+
+double DistanceREV (double Ft[], int n,double alpha,double Root[],double U[],
+ double V[], double pi[], double space[], int *cond)
+{
+/* input: Ft, n, alpha
+ output: Q(in Ft), t, Root, U, V, and cond
+ space[n*n*2]
+*/
+ int i,j, InApplicable;
+ double *Q=Ft, *T1=space, *T2=space+n*n, t, pi_sqrt[20], small=0.1/com.ls;
+
+ for (i=0,t=0; i<n; i++) FOR (j,n) if (i-j) t+=Q[i*n+j];
+ if (t<small) { *cond=1; zero(Q,n*n); return (0); }
+
+ for(i=0;i<n;i++) for (j=0;j<i;j++)
+ Q[i*n+j]=Q[j*n+i]=(Q[i*n+j]+Q[j*n+i])/2;
+
+ abyx(1./sum(Q,n*n), Q, n*n);
+ for(i=0;i<n;i++) {
+ pi[i]=sum(Q+i*n, n);
+ if(pi[i]>small)
+ abyx(1/pi[i], Q+i*n, n);
+ }
+
+ eigenQREV(Q, pi, n, Root, U, V, pi_sqrt);
+ for(i=0,InApplicable=0; i<n; i++) {
+ if (Root[i]<=0) {
+ InApplicable=1;
+ Root[i]=-300; /* adhockery */
+ }
+ else
+ Root[i]=(alpha<=0?log(Root[i]):gammap(Root[i],alpha));
+ }
+ FOR (i,n) FOR (j,n) T1[i*n+j]=U[i*n+j]*Root[j];
+ matby (T1, V, Q, n, n, n);
+ for (i=0,t=0; i<n; i++) t-=pi[i]*Q[i*n+i];
+
+ if(noisy>=9 && InApplicable) printf("Root(P)<0. adhockery invoked\n");
+ if(t<=0) error2("err: DistanceREV");
+
+ FOR (i,n) Root[i]/=t;
+ FOR (i, n) FOR (j,n) { Q[i*n+j]/=t; if (i-j) Q[i*n+j]=max2(0,Q[i*n+j]); }
+
+ return (t);
+}
+
+
+int PatternLS (FILE *fout, double Ft[], double alpha,double space[],int *cond)
+{
+/* space[n*n*2]
+*/
+ int n=com.ncode, i,j,k,h, it;
+ double *Q=Ft,*Qt=Q+n*n,*Qm=Qt+n*n;
+ double *pi,*Root,*U, *V, *T1=space, *branch, t;
+ FILE *fdist=gfopen("Distance", "w");
+
+ if((pi=(double*)malloc((n*n*3+tree.nbranch)*sizeof(double)))==NULL)
+ error2("PatternLS: oom");
+ Root=pi+n; U=Root+n; V=U+n*n; branch=V+n*n;
+
+ *cond=0;
+ for (i=0,zero(Qt,n*n),zero(Qm,n*n); i<com.ns; i++) {
+ for (j=0; j<i; j++) {
+ for (h=0,zero(Q,n*n); h<com.npatt; h++) {
+ Q[(com.z[i][h])*n+com.z[j][h]] += com.fpatt[h]/2;
+ Q[(com.z[j][h])*n+com.z[i][h]] += com.fpatt[h]/2;
+ }
+ FOR (k,n*n) Qt[k]+=Q[k]/(com.ns*(com.ns-1)/2);
+ it=i*(i-1)/2+j;
+ SeqDistance[it]=DistanceREV (Q, n, alpha, Root,U,V, pi, space, &k);
+
+ if (k==-1) {
+ *cond=-1; printf("\n%d&%d: F(t) modified in DistanceREV",i+1,j+1);
+ }
+
+ fprintf(fdist,"%9.5f",SeqDistance[it]);
+/*
+FOR (k,n)
+if (Q[k*n+k]>0) { printf ("%d %d %.5f\n", i+1, j+1, Q[k*n+k]); }
+*/
+ FOR (k,n*n) Qm[k]+=Q[k]/(com.ns*(com.ns-1)/2);
+ }
+ FPN(fdist);
+ }
+ fclose (fdist);
+ DistanceREV (Qt, n, alpha, Root, U, V, pi, space, &k);
+ if (k==-1) { puts ("F(t) modified in DistanceREV"); }
+
+ fprintf (fout, "\n\nQ: from average F over pairwise comparisons");
+ OutQ(fout, n, Qt, pi, Root, U, V, T1);
+ fprintf (fout, "\n\nQ: average of Qs over pairwise comparisons\n");
+ fprintf (fout, "(disregard this if very different from the previous Q)");
+ OutQ (fout, n, Qm, pi, Root, U, V, T1);
+
+ if (tree.nbranch) {
+ fillxc (branch, 0.1, tree.nbranch);
+ LSDistance (&t, branch, testx);
+ OutTreeB (fout); FPN (fout);
+ FOR (i,tree.nbranch) fprintf(fout,"%9.5f", branch[i]);
+ PMatBranch (Ft, com.ncode, branch, Root, U, V, space);
+ }
+ free(pi);
+ return (0);
+}
+
+int testx (double x[], int np)
+{
+ int i;
+ double tb[]={1e-5, 99};
+ FOR(i,np) if(x[i]<tb[0] ||x[i]>tb[1]) return(-1);
+ return(0);
+}
+
+int SetBranch (double x[])
+{
+ int i, status=0;
+ double small=1e-5;
+
+ FOR (i,tree.nnode)
+ if (i!=tree.root && (nodes[i].branch=x[nodes[i].ibranch])<-small)
+ status=-1;
+ return (status);
+}
diff --git a/src/tools.c b/src/tools.c
index 8d30165..36f3bff 100644
--- a/src/tools.c
+++ b/src/tools.c
@@ -14,9 +14,7 @@
char BASEs[]="TCAGUYRMKSWHBVD-N?";
char *EquateBASE[]={"T","C","A","G", "T", "TC","AG","CA","TG","CG","TA",
"TCA","TCG","CAG","TAG", "TCAG","TCAG","TCAG"};
-char BASEs5[]="TCAGEUYRMKSWHBVD-N?";
-char *EquateBASE5[]={"T","C","A","G", "E", "T", "TC","AG","CA","TG","CG","TA",
- "TCA","TCG","CAG","TAG", "TCAG","TCAG","TCAG"};
+
char CODONs[256][4], AAs[] = "ARNDCQEGHILKMFPSTWYV-*?X";
char nChara[256], CharaMap[256][64];
char AA3Str[]= {"AlaArgAsnAspCysGlnGluGlyHisIleLeuLysMetPheProSerThrTrpTyrVal***"};
@@ -133,7 +131,7 @@ int CodeChara (char b, int seqtype)
/* This codes nucleotides or amino acids into 0, 1, 2, ...
*/
int i, n=(seqtype<=1?4:(seqtype==2?20:2));
- char *pch=(seqtype<=1 ? BASEs : (seqtype==2 ? AAs: (seqtype==5 ? BASEs5 : BINs)));
+ char *pch=(seqtype<=1 ? BASEs : (seqtype==2 ? AAs: BINs));
if (seqtype<=1)
switch (b) {
@@ -174,7 +172,7 @@ int transform (char *z, int ls, int direction, int seqtype)
*/
int il, status=0;
char *p;
- char *pch=(seqtype<=1 ? BASEs : (seqtype==2 ? AAs: (seqtype==5 ? BASEs5 : BINs)));
+ char *pch=(seqtype<=1 ? BASEs : (seqtype==2 ? AAs: BINs));
if (direction)
for (il=0,p=z; il<ls; il++,p++) {
@@ -757,16 +755,16 @@ int NucListall(char b, int *nb, int ib[4])
*/
int j, k;
- k = strchr(BASEs,(int)b) - BASEs;
+ k = (int)(strchr(BASEs,(int)b) - BASEs);
if(k<0)
{ printf("NucListall: strange character %c\n",b); return(-1);}
if(k<4) {
*nb = 1; ib[0] = k;
}
else {
- *nb = strlen(EquateBASE[k]);
+ *nb = (int)strlen(EquateBASE[k]);
for(j=0; j< *nb; j++)
- ib[j] = strchr(BASEs,EquateBASE[k][j]) - BASEs;
+ ib[j] = (int)(strchr(BASEs,EquateBASE[k][j]) - BASEs);
}
return(0);
}
@@ -842,7 +840,7 @@ int printcu (FILE *fout, double fcodon[], int icode)
if (fcodon) { zero(faa,21); zero(fb3x4,12); }
else wc=0;
for(i=0; i<4; i++) strcpy(ss3[i],"\0\0\0");
- noodle = strc(4*(10+2+wc)-2,word[1]);
+ noodle = strc(4*(10+2+wc)-2, word[1]);
fprintf(fout, "\n%s\n", noodle);
for(i=0; i<4; i++,FPN(fout)) {
for(j=0; j<4; j++) {
@@ -1028,7 +1026,7 @@ int printsma (FILE*fout, char*spname[], unsigned char*z[], int ns, int l, int ll
*/
int igroup, ngroup, lt, h,hp, i, b,b0=-1,igap, lspname=30, lseqlen=7;
char indel='-', ambi='?', equal='.';
- char *pch=(seqtype<=1 ? BASEs : (seqtype==2 ? AAs: (seqtype==5 ? BASEs5 : BINs)));
+ char *pch=(seqtype<=1 ? BASEs : (seqtype==2 ? AAs: BINs));
char codon[4]=" ";
if(l==0) return(1);
@@ -1082,7 +1080,7 @@ void starttimer (void)
time_start=time(NULL);
}
-char* printtime (char timestr[])
+char *printtime (char timestr[])
{
/* print time elapsed since last call to starttimer()
*/
@@ -1093,8 +1091,8 @@ char* printtime (char timestr[])
h = (int)t/3600;
m = (int)(t%3600)/60;
s = (int)(t-(t/60)*60);
- if(h) sprintf(timestr,"%d:%02d:%02d", h,m,s);
- else sprintf(timestr,"%2d:%02d", m,s);
+ if(h) sprintf(timestr,"%d:%02d:%02d", h,m,s);
+ else sprintf(timestr, "%2d:%02d", m, s);
return(timestr);
}
@@ -1238,7 +1236,7 @@ int binarysearch (const void *key, const void *base, size_t n, size_t size, int(
Each element has size size. If a match is found, the function returns the index for the
element found. Otherwise it returns the loc where key should be inserted. This does not deal with ties.
*/
- int l=0, u=n-1, m=u, z;
+ int l=0, u=(int)n-1, m=u, z;
*found = 0;
while (l <= u) {
@@ -1316,8 +1314,7 @@ void bigexp(double lnx, double *a, double *b)
*a = pow(10, z-(*b));
}
-static unsigned int z_rndu=1237;
-static int w_rndu=1237;
+unsigned int z_rndu=1237, w_rndu=1237;
void SetSeed (int seed, int PrintSeed)
{
@@ -1533,6 +1530,18 @@ double rndNormal (void)
}
+int rndBinomial(int n, double p)
+{
+/* This may be too slow when n is large.
+*/
+ int i, x=0;
+
+ for (i=0; i<n; i++)
+ if(rndu() < p) x ++;
+ return (x);
+}
+
+
double rndBactrian (void)
{
/* This returns a variate from the 1:1 mixture of two normals N(-m, 1-m^2) and N(m, 1-m^2),
@@ -1789,7 +1798,6 @@ int rndpoisson (double m)
return ((int) em);
}
-
double rndgamma (double a)
{
/* This returns a random variable from gamma(a, 1).
@@ -1797,38 +1805,36 @@ double rndgamma (double a)
ACM Transactions on Mathematical Software, 26 (3): 363-372.
This is not entirely safe and is noted to produce zero when a is small (0.001).
*/
- double a0=a, c, d, u, v, x;
+ double a0 = a, c, d, u, v, x, small=1E-300;
- if(a<1) a ++;
+ if (a < 1) a++;
- d = a - 1.0/3.0;
- c = (1.0/3.0) / sqrt(d);
+ d = a - 1.0 / 3.0;
+ c = (1.0 / 3.0) / sqrt(d);
- for ( ; ; ) {
+ for (; ; ) {
do {
- x = rndNormal();
+ x = rndNormal( );
v = 1.0 + c * x;
- }
- while (v <= 0);
-
+ } while (v <= 0);
+
v *= v * v;
- u = rndu();
+ u = rndu( );
- if (u < 1 - 0.0331 * x * x * x * x)
+ if (u < 1 - 0.0331 * x * x * x * x)
break;
if (log(u) < 0.5 * x * x + d * (1 - v + log(v)))
break;
}
v *= d;
- if(a0 < 1)
- v *= pow(rndu(), 1/a0);
- if(v==0)
- printf("\a\nrndgamma returning 0.\n");
+ if (a0 < 1) /* this may cause underflow if a is small, like 0.01 */
+ v *= pow(rndu( ), 1 / a0);
+ if (v == 0) /* underflow */
+ v = small;
return v;
}
-
double rndbeta (double p, double q)
{
/* this generates a random beta(p,q) variate
@@ -2289,7 +2295,7 @@ double logPriorRatioGamma(double xnew, double x, double a, double b)
}
-double PDF_InverseGamma (double x, double alpha, double beta)
+double PDFinvGamma (double x, double alpha, double beta)
{
/* inverse-gamma density:
mean=beta/(alpha-1); var=beta^2/[(alpha-1)^2*(alpha-2)]
@@ -2838,23 +2844,23 @@ double probBinomial (int n, int k, double p)
}
-double probBetaBinomial (int n, int k, double p, double q)
+double probBetaBinomial(int n, int k, double p, double q)
{
/* This calculates beta-binomial probability of k succeses out of n trials,
- The binomial probability parameter has distribution beta(p, q)
+ The binomial probability parameter has distribution beta(a, b)
- prob(x) = C1(-a,k) * C2(-b,n-k)/C3(-a-b,n)
+ prob(x) = C1(-a, k) * C2(-b, n-k) / C3(-a-b, n)
*/
- double a=p,b=q, C1,C2,C3,scale1,scale2,scale3;
+ double a = p, b = q, C1, C2, C3, scale1, scale2, scale3;
- if(a<=0 || b<=0) return(0);
+ if (a <= 0 || b <= 0) return(0);
C1 = Binomial(-a, k, &scale1);
- C2 = Binomial(-b, n-k, &scale2);
- C3 = Binomial(-a-b, n, &scale3);
- C1 *= C2/C3;
- if(C1<0)
+ C2 = Binomial(-b, n - k, &scale2);
+ C3 = Binomial(-a - b, n, &scale3);
+ C1 *= C2 / C3;
+ if (C1<0)
error2("error in probBetaBinomial");
- return C1*exp(scale1+scale2-scale3);
+ return C1*exp(scale1 + scale2 - scale3);
}
@@ -4355,11 +4361,11 @@ int ScatterPlot (int n, int nseries, int yLorR[], double x[], double y[],
printf ("\ny[1]: (%10.2e, %10.2e)\n", ymin[0], ymax[0]);
if (ny==2) printf ("y[2]: (%10.2e, %10.2e) \n", ymin[1], ymax[1]);
- chart=(char*)malloc((nrow+1)*ncolr*sizeof(char));
+ chart = (char*)malloc((nrow+1)*ncolr*sizeof(char));
for (i=0; i<nrow+1; i++) {
for (j=1; j<ncol; j++) chart[i*ncolr+j]=' ';
- if (i%5==0) chart[i*ncolr+0]=chart[i*ncolr+j++]='+';
- else chart[i*ncolr+0]=chart[i*ncolr+j++]='|';
+ if (i%5==0) chart[i*ncolr+0] = chart[i*ncolr+j++] = '+';
+ else chart[i*ncolr+0] = chart[i*ncolr+j++] = '|';
chart[i*ncolr+j]='\0';
if (i==0||i==nrow)
FOR(j,ncol+1) chart[i*ncolr+j]=(char)(j%10==0?'+':'-');
@@ -4467,26 +4473,59 @@ double Binomial (double n, int k, double *scale)
/* calculates (n choose k), where n is any real number, and k is integer.
If(*scale!=0) the result should be c+exp(*scale).
*/
- double c=1,i,large=1e99;
+ double c = 1, i, large = 1e99;
*scale=0;
if((int)k!=k)
error2("k is not a whole number in Binomial.");
- if(n<0 && k%2==1)
- c = -1;
- if(k==0) return(1);
- if(n>0 && (k<0 || k>n)) return (0);
-
- if(n>0 && (int)n==n) k=min2(k,(int)n-k);
- for (i=1; i<=k; i++) {
- c *= (n-k+i)/i;
- if(c>large) {
- *scale += log(c); c=1;
+ if (k == 0) return(1);
+ if (n>0 && (k<0 || k>n)) return (0);
+
+ if(n>0 && (int)n==n) k = min2(k, (int)n - k);
+ for (i = 1; i <= k; i++) {
+ c *= (n - k + i) / i;
+ if (c > large) {
+ *scale += log(c); c = 1;
}
}
return(c);
}
+int BinomialK (double alpha, int n, double C[], double S[])
+{
+/* This calculates (alpha, i), for i = 0, ..., n. The result are in C[i] * exp(S[i]).
+*/
+ int i, nround = n, alphaint = (int)alpha;
+ double c = 1, large = 1E200;
+
+ if (alpha>0 && fabs(alpha - alphaint) < 1e-100) { /* usual combinations */
+ nround = min2(n, alphaint / 2);
+ }
+ C[0] = 1; S[0] = 0;
+ for (i = 1; i <= nround; i++) {
+ c *= (alpha - i + 1) / i;
+ S[i] = S[i - 1];
+ if (c > large) {
+ S[i] += log(c); c = 1;
+ }
+ C[i] = c;
+ }
+ for (; i <= min2(n, alphaint); i++) { /* if alpha is int and n > alpha/2 */
+ C[i] = C[alphaint - i]; S[i] = S[alphaint - i];
+ }
+ for (; i <= n; i++) { /* if alpha is int and n > alpha */
+ C[i] = 0; S[i] = 0;
+ }
+ /*
+ matout2(F0, C, n / 10, 10, 9, 1);
+ matout2(F0, S, n / 10, 10, 9, 1);
+ for (i = 0; i <= n; i++) C[i] *= exp(S[i]);
+ matout2(F0, C, n / 10, 10, 9, 1);
+ */
+ return(0);
+}
+
+
/****************************
Vectors and matrices
*****************************/
@@ -4555,7 +4594,8 @@ int matout (FILE *fout, double x[], int n, int m)
{
int i,j;
for (i=0,FPN(fout); i<n; i++,FPN(fout))
- FOR(j,m) fprintf(fout," %11.6f", x[i*m+j]);
+ for(j=0; j<m; j++)
+ fprintf(fout," %11.6f", x[i*m+j]);
return (0);
}
@@ -5704,25 +5744,25 @@ int gradient (int n, double x[], double f0, double g[],
{
/* f0 = fun(x) is always given.
*/
- int i,j;
- double *x0=space, *x1=space+n, eh0=Small_Diff, eh; /* 1e-7 */
+ int i, j;
+ double *x0 = space, *x1 = space + n, eh0 = Small_Diff, eh; /* 1e-7 */
if (Central) {
- for(i=0; i<n; i++) {
- for(j=0; j<n; j++)
+ for (i = 0; i<n; i++) {
+ for (j = 0; j<n; j++)
x0[j] = x1[j] = x[j];
- eh = eh0*(fabs(x[i])+1);
+ eh = pow(eh0*(fabs(x[i]) + 1), 0.67);
x0[i] -= eh; x1[i] += eh;
- g[i] = ((*fun)(x1,n) - (*fun)(x0,n))/(eh*2.0);
+ g[i] = ((*fun)(x1, n) - (*fun)(x0, n)) / (eh*2.0);
}
}
else {
- for(i=0; i<n; i++) {
- for(j=0; j<n; j++)
- x1[j]=x[j];
- eh=eh0*(fabs(x[i])+1);
- x1[i]+=eh;
- g[i] = ((*fun)(x1,n)-f0)/eh;
+ for (i = 0; i<n; i++) {
+ for (j = 0; j<n; j++)
+ x1[j] = x[j];
+ eh = eh0*(fabs(x[i]) + 1);
+ x1[i] += eh;
+ g[i] = ((*fun)(x1, n) - f0) / eh;
}
}
return(0);
@@ -5815,7 +5855,7 @@ int nls2 (FILE *fout, double *sx, double * x0, int nx,
(*fun) (x0, y, n, ny);
for (i=0, s0=0; i<ny; i++) s0 += y[i]*y[i];
- FOR (ii, maxround) {
+ for(ii=0; ii<maxround; ii++) {
increase=0;
if (jacobi) (*jacobi) (x0, J, n, ny);
else jacobi_gradient (x0, J, fun, space_J, n, ny);
@@ -5826,7 +5866,7 @@ int nls2 (FILE *fout, double *sx, double * x0, int nx,
v = sqrt (t) / (double) (ny*n); /* v = 0.0; */
}
- FOR (i,n) {
+ for (i = 0; i<n; i++) {
for (j=0,t=0; j<ny; j++) t += J[j*n+i] * y[j];
g[i] = 2*t;
C[i*(n+1)+n] = -t;
@@ -5841,11 +5881,11 @@ int nls2 (FILE *fout, double *sx, double * x0, int nx,
v *= bigger;
continue;
}
- FOR (i,n) p[i] = C[i*(n+1)+n];
+ for (i = 0; i<n; i++) p[i] = C[i*(n+1)+n];
t = bound (n, x0, p, x, testx);
if (t>1) t=1;
- FOR (i,n) x[i] = x0[i] + t * p[i];
+ for (i=0; i<n; i++) x[i] = x0[i] + t * p[i];
(*fun) (x, y, n, ny);
for (i=0,s=0; i<ny; i++) s += y[i]*y[i];
@@ -5868,8 +5908,7 @@ int nls2 (FILE *fout, double *sx, double * x0, int nx,
-double bound (int nx, double x0[], double p[], double x[],
- int(*testx)(double x[], int nx))
+double bound (int nx, double x0[], double p[], double x[], int(*testx)(double x[], int nx))
{
/* find largest t so that x[]=x0[]+t*p[] is still acceptable.
for bounded minimization, p is possibly changed in this function
@@ -5879,7 +5918,7 @@ double bound (int nx, double x0[], double p[], double x[],
double factor=20, by=1, small=1e-8; /* small=(SIZEp>1?1e-7:1e-8) */
xtoy (x0, x, nx);
- FOR (i,nx) {
+ for (i = 0; i<nx; i++) {
x[i]=x0[i]+small*p[i];
if ((*testx) (x, nx)) { p[i]=0.0; nd++; }
x[i]=x0[i];
@@ -5887,7 +5926,7 @@ double bound (int nx, double x0[], double p[], double x[],
if (nd==nx) { if (noisy) puts ("bound:no move.."); return (0); }
for (by=0.75; ; ) {
- FOR (i,nx) x[i]=x0[i]+factor*p[i];
+ for (i = 0; i<nx; i++) x[i]=x0[i]+factor*p[i];
if ((*testx)(x,nx)==0) break;
factor *= by;
}
@@ -5897,7 +5936,7 @@ double bound (int nx, double x0[], double p[], double x[],
-double LineSearch (double(*fun)(double x),double *f,double *x0,double xb[2],double step, double e)
+double LineSearch(double(*fun)(double x), double *f, double *x0, double xb[2], double step, double e)
{
/* linear search using quadratic interpolation
@@ -6286,21 +6325,21 @@ int gradientB (int n, double x[], double f0, double g[],
/* f0=fun(x) is always provided.
xmark=0: central; 1: upper; -1: down
*/
- int i,j;
- double *x0=space, *x1=space+n, eh0=Small_Diff, eh; /* eh0=1e-6 || 1e-7 */
-
- for(i=0; i<n; i++) {
- eh = eh0*(fabs(x[i])+1);
- if (xmark[i]==0 && (AlwaysCenter || SIZEp<1)) { /* central */
- for(j=0; j<n; j++) x0[j] = x1[j] = x[j];
- x0[i] -= eh; x1[i] += eh;
- g[i] = ((*fun)(x1,n) - (*fun)(x0,n))/(eh*2.0);
+ int i, j;
+ double *x0 = space, *x1 = space + n, eh0 = Small_Diff, eh; /* eh0=1e-6 || 1e-7 */
+
+ for (i = 0; i<n; i++) {
+ eh = eh0*(fabs(x[i]) + 1);
+ if (xmark[i] == 0 && (AlwaysCenter || SIZEp<1)) { /* central */
+ for (j = 0; j<n; j++) x0[j] = x1[j] = x[j];
+ eh = pow(eh, .67); x0[i] -= eh; x1[i] += eh;
+ g[i] = ((*fun)(x1, n) - (*fun)(x0, n)) / (eh*2.0);
}
- else { /* forward or backward */
- for(j=0; j<n; j++) x1[j] = x[j];
+ else { /* forward or backward */
+ for (j = 0; j<n; j++) x1[j] = x[j];
if (xmark[i]) eh *= -xmark[i];
x1[i] += eh;
- g[i] = ((*fun)(x1,n) - f0)/eh;
+ g[i] = ((*fun)(x1, n) - f0) / eh;
}
}
return(0);
diff --git a/src/treesub.c b/src/treesub.c
index 901aaa8..294f2c1 100644
--- a/src/treesub.c
+++ b/src/treesub.c
@@ -3,7 +3,7 @@
such as baseml, basemlg, codeml, and pamp.
*/
-extern char BASEs[], *EquateBASE[], BASEs5[], *EquateBASE5[], AAs[], BINs[], CODONs[][4], nChara[], CharaMap[][64];
+extern char BASEs[], *EquateBASE[], AAs[], BINs[], CODONs[][4], nChara[], CharaMap[][64];
extern int noisy;
@@ -54,29 +54,29 @@ double SS, NN, Sd, Nd; /* kostas, # of syn. sites,# of non syn. sites,# of syn.
int PatternWeightSimple(void)
{
-/* This is modified from PatternWeight() and collaps sites into patterns,
- for nucleotide, amino acid, or codon sequences.
- This relies on \0 being the end of the string so that sequences should not be
- encoded before this routine is called.
- com.pose[i] has labels for genes as input and maps sites to patterns in return.
- com.fpatt, a vector of doubles, wastes space as site pattern counts are integers.
- Sequences z[ns*ls] are copied into patterns zt[ls*lpatt], and bsearch is used
- twice to avoid excessive copying, to count npatt first & to generate fpatt etc.
+/* This is modified from PatternWeight(), and does not deal with multiple genes in
+ the same alignment (com.ngene, com.lgene[], com.posG[], etc.)
+ Binary search is used to sort site patterns, with patterns represented using 0-ended strings.
+ The routine works with nucleotide, amino acid, or codon sequences.
+ This should work with both encoded and un-encoded sequences.
+ If com.pose is not NULL, this generates the site-to-pattern map in com.pose[].
+ com.fpatt holds site-pattern counts.
+ Sequences z[ns][ls] are copied into patterns zt[ls*lpatt], and bsearch is used
+ twice to avoid excessive copying, first to count npatt and identify the site patterns and
+ second to generate fpatt[], pose[] etc.
*/
int maxnpatt = com.ls, h, l, u, ip, j, k, same;
- /* int n31 = (com.seqtype==CODONseq ? 3 : 1); */
- int n31 = 1;
- int lpatt = com.ns*n31 + 1; /* extra 0 used for easy debugging, can be avoided */
+ int n31 = (com.seqtype==CODONseq ? 3 : 1);
+ int lpatt = com.ns*n31 + 1;
int *p2s; /* point patterns to sites in zt */
- char *zt;
- unsigned char *p;
+ char timestr[36];
+ unsigned char *p, *zt;
double nc = (com.seqtype == 1 ? 64 : com.ncode) + !com.cleandata + 1;
int debug = 0;
- char DS[] = "DS";
/* (A) Collect and sort patterns. Get com.npatt.
- Move sequences com.z[ns][ls] into sites zt[ls*lpatt].
- Use p2s to map patterns to sites in zt to avoid copying.
+ Move sequences com.z[ns][ls] into sites zt[ls*lpatt].
+ Use p2s to map patterns to sites in zt to avoid copying.
*/
if ((com.seqtype == 1 && com.ns<5) || (com.seqtype != 1 && com.ns<7))
@@ -89,13 +89,13 @@ int PatternWeightSimple(void)
for (j = 0; j<com.ns; j++)
for (h = 0; h<com.ls; h++)
for (k = 0; k<n31; k++)
- zt[h*lpatt + j*n31 + k] = com.z[j][h*n31 + k];
+ zt[h*lpatt + j*n31 + k] = (unsigned char)(com.z[j][h*n31 + k] + 1);
com.npatt = l = u = ip = 0;
for (h = 0; h<com.ls; h++) {
if (debug) printf("\nh %3d %s", h, zt + h*lpatt);
/* bsearch in existing patterns. Knuth 1998 Vol3 Ed2 p.410
- ip is the loc for match or insertion. [l,u] is the search interval.
+ ip is the loc for match or insertion. [l,u] is the search interval.
*/
same = 0;
if (h != 0) { /* not 1st pattern? */
@@ -121,16 +121,21 @@ int PatternWeightSimple(void)
}
if (debug) {
- printf(": %3d (%c ilu %3d%3d%3d) ", com.npatt, DS[same], ip, l, u);
+ printf(": %3d (%c ilu %3d%3d%3d) ", com.npatt, (same?'S':'D'), ip, l, u);
for (j = 0; j<com.npatt; j++)
printf(" %s", zt + p2s[j] * lpatt);
}
+ if(noisy>2 && ((h+1)%10000==0 || h+1==com.ls))
+ printf("\r%12d patterns at %8d / %8d sites (%.1f%%), %s",
+ com.npatt, h+1, com.ls, (h+1.)*100/com.ls, printtime(timestr));
} /* for (h) */
-
+ if(noisy>2) printf("\n%d patterns\n", com.npatt);
+
/* (B) count pattern frequencies */
com.fpatt = (double*)realloc(com.fpatt, com.npatt*sizeof(double));
if (com.fpatt == NULL) error2("oom fpatt");
- for (ip = 0; ip<com.npatt; ip++) com.fpatt[ip] = 0;
+ memset(com.fpatt, 0, com.npatt*sizeof(double));
+
for (h = 0; h<com.ls; h++) {
for (same = 0, l = 0, u = com.npatt - 1;;) {
if (u<l) break;
@@ -143,114 +148,188 @@ int PatternWeightSimple(void)
if (!same)
error2("ghost pattern?");
com.fpatt[ip]++;
+ if(com.pose) com.pose[h] = ip;
} /* for (h) */
-
for (j = 0; j<com.ns; j++) {
- for (ip = 0, p = com.z[j]; ip<com.npatt; ip++)
- for (k = 0; k<n31; k++)
- *p++ = zt[p2s[ip] * lpatt + j*n31 + k];
+ com.z[j] = p = (unsigned char*)realloc(com.z[j], com.npatt * sizeof(unsigned char));
+ for (ip = 0; ip<com.npatt; ip++)
+ for (k = 0; k<n31; k++)
+ *p++ = (unsigned char)(zt[p2s[ip] * lpatt + j*n31 + k] - 1);
}
free(p2s); free(zt);
return (0);
}
+int ConvertSiteJC69like(unsigned char *z[], int ns, int h, unsigned char zh[])
+{
+/* This converts a site (or pattern) of nucleotides or amino acids for JC69-like models.
+ Sequence alignments in com.z[] are already encoded into 0, 1, ...
+ If the data have no ambiguities (com.cleandata=1), the routine converts,
+ for example, a site 1120 (CCAT) into 0012 (TTCA) before checking against old
+ patterns already found. If a site contain non-gap ambiguities, it is not
+ converted. For every site, the routine changes ? or N into - first, and then
+ convert the site iff there are no non-gap ambiguities. Thus CC?T will be
+ converted into CC-T first and then into TT-C. A site with CCRT will not be
+ convertd. In theory such sites may be compressed as well, but the effort is
+ perhaps not worthwhile.
+*/
+ char b, gap;
+ char *pch = (com.seqtype == 0 ? BASEs : (com.seqtype == 2 ? AAs : BINs));
+ int npatt0 = com.npatt, j, k, same = 0, convert;
+
+ gap = (char)(strchr(pch, (int)'-') - pch);
+
+ if (com.cleandata) { /* clean data, always convert */
+ zh[0] = b = 0;
+ b++;
+ for (j = 1; j<com.ns; j++) {
+ for (k = 0; k<j; k++)
+ if (z[j][h] == z[k][h]) break;
+ zh[j] = (k<j ? zh[k] : b++);
+ }
+ }
+ else { /* convert only if there are no non-gap ambiguity characters */
+ for (j = 0; j<ns; j++)
+ zh[j] = z[j][h];
+
+ /* After this loop, convert = 0 or 1 decides whether to convert. */
+ for (j = 0, convert = 1; j<ns; j++) {
+ if (zh[j] < com.ncode)
+ continue;
+ if (nChara[(int)zh[j]] == com.ncode) {
+ zh[j] = gap;
+ continue;
+ }
+ convert = 0;
+ break;
+ }
+ if (convert) {
+ b = 0;
+ if (zh[0] != gap)
+ zh[0] = b++;
+ for (j = 1; j<ns; j++) {
+ if (zh[j] != gap) {
+ for (k = 0; k<j; k++)
+ if (zh[j] == z[k][h]) break;
+ if (k<j) zh[j] = zh[k];
+ else zh[j] = b++;
+ }
+ }
+ }
+ }
+ for (j = 0; j<ns; j++) zh[j] ++; /* change 0 to 1. */
+ return(0);
+}
+
+
int PatternWeightJC69like (void)
{
- /* This collaps site patterns further for JC69-like models, called after
- PatternWeight(). This is used for JC and poisson amino acid models.
- The routine could be merged into PatternWeight(), which should lead to
- faster computation, but this is not done because right now
- InitializeBaseAA() prints out base or amino acid frequencies after
- PatternWeight() and before this routine.
-
- If the data have no ambiguities (com.cleandata=1), the routine recodes
- the data, for example, changing data at a site 1120 (CCAT) into 0012
- (TTCA) before checking against old patterns already found. If the data
- contain ambiguities, they are not encoded. In that case, for every
- site, the routine changes ? or N into - first. It then checks whether there
- are any other ambibiguities and will recode if and only if there are not
- any other ambiguities. For example, a site with data CC?T will be
- changed into CC-T first and then recoded into TT-C and checked against
- old patterns found. A site with data CCRT will not be recoded. In theory
- such sites may be packed as well, but perhaps the effort is not worthwhile.
- The routine checks data like CCRT against old patterns already found,
-
- If com.pose is not NULL, the routine also updates com.pose. This allows
- the program to work if com.readpattern==1.
- */
- char zh[NS], b, gap;
- char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: (com.seqtype==5?BASEs5:BINs)));
- int npatt0=com.npatt, h, ht, j,k, same=0, ig, recode;
-
- if(com.seqtype==1)
- error2("PatternWeightJC69like does not work for codon seqs");
- if(noisy>3) printf("Counting site patterns again, for JC69.\n");
- gap = (char) (strchr(pch, (int)'-') - pch);
- for (h=0,com.npatt=0,ig=-1; h<npatt0; h++) {
- if (ig<com.ngene-1 && h==com.posG[ig+1])
- com.posG[++ig] = com.npatt;
-
- if(com.cleandata) { /* clean data, always recode */
- zh[0] = b = 0;
- b++;
- for (j=1; j<com.ns; j++) {
- for(k=0; k<j; k++)
- if (com.z[j][h]==com.z[k][h]) break;
- zh[j] = (k<j ? zh[k] : b++);
- }
- }
- else { /* recode only if there are no non-gap ambiguity characters */
- for(j=0; j<com.ns; j++)
- zh[j] = com.z[j][h];
-
- /* After this loop, recode = 0 or 1 decides whether to recode. */
- for (j=0,recode=1; j<com.ns; j++) {
- if (zh[j] < com.ncode)
- continue;
- if (nChara[(int)zh[j]] == com.ncode) {
- zh[j] = gap;
- continue;
- }
- recode = 0;
- break;
- }
- if(recode) {
- b = 0;
- if(zh[0] != gap)
- zh[0] = b++;
- for (j=1; j<com.ns; j++) {
- if(zh[j] != gap) {
- for(k=0; k<j; k++)
- if (zh[j] == com.z[k][h]) break;
- if(k<j) zh[j] = zh[k];
- else zh[j] = b++;
- }
- }
- }
- }
-
- for (ht=com.posG[ig],same=0; ht<com.npatt; ht++) {
- for (j=0,same=1; j<com.ns; j++)
- if (zh[j]!=com.z[j][ht]) {
- same = 0; break;
- }
- if (same) break;
- }
- if (same)
- com.fpatt[ht] += com.fpatt[h];
- else {
- for(j=0; j<com.ns; j++) com.z[j][com.npatt] = zh[j];
- com.fpatt[com.npatt++] = com.fpatt[h];
- }
- if(com.pose)
- for(k=0; k<com.ls; k++)
- if(com.pose[k]==h) com.pose[k] = ht;
- } /* for (h) */
- com.posG[com.ngene] = com.npatt;
- if(noisy>3) printf ("new no. site patterns:%7d\n", com.npatt);
-
- return (0);
+/* This collaps site patterns further for JC69-like models, called after
+ PatternWeight(). This is used for JC and poisson amino acid models.
+ The routine could be merged into PatternWeight(), which should lead to
+ faster computation, but this is not done because right now
+ InitializeBaseAA() prints out base or amino acid frequencies after
+ PatternWeight() and before this routine.
+
+ If com.pose is not NULL, the routine also updates com.pose. This allows the program
+ to work if com.readpattern==1.
+ This works for nucleotide and amino acid models, but not codon models.
+ This routine is nearly identical to PatternWeight, which works for un-encoded sequences.
+ fpatt0 stores the old com.fpatt info, while com.fpatt is shrunk.
+ Think about merging them (encode sequences first and compress sites).
+*/
+ int npatt0=com.npatt, lpatt = com.ns + 1, h, l, u, ip, j, k, same;
+ int *p2s; /* point patterns to sites in zt */
+ char timestr[36];
+ unsigned char *p, *zt;
+ double *fpatt0;
+ int debug = 0;
+
+ /* (A) Collect and sort patterns. Get com.npatt.
+ Move sequences com.z[ns][ls] into sites zt[ls*lpatt].
+ Use p2s to map patterns to sites in zt to avoid copying.
+ */
+ if (noisy>2) printf("Counting site patterns again, for JC69.. %s\n", printtime(timestr));
+ if (com.seqtype == 1) error2("PatternWeightJC69like does not work for codon seqs");
+ if (com.ngene>1) error2("PatternWeightJC69like does not work when ngene > 1");
+
+ p2s = (int*)malloc(npatt0 * sizeof(int));
+ zt = (char*)malloc(npatt0*lpatt * sizeof(char));
+ fpatt0 = (double*)malloc(npatt0* sizeof(double));
+ if (p2s == NULL || zt == NULL || fpatt0 == NULL) error2("oom p2s or zt or fpatt0");
+ memset(zt, 0, npatt0*lpatt * sizeof(char));
+ memmove(fpatt0, com.fpatt, npatt0*sizeof(double));
+ for (h = 0; h<npatt0; h++)
+ ConvertSiteJC69like(com.z, com.ns, h, zt + h*lpatt);
+
+ l = u = ip = com.npatt = 0;
+ for (h = 0; h<npatt0; h++) {
+ if (debug) printf("\nh %3d %s", h, zt + h*lpatt);
+
+ /* bsearch in existing patterns. Knuth 1998 Vol3 Ed2 p.410
+ ip is the loc for match or insertion. [l,u] is the search interval.
+ */
+ same = 0;
+ if (h != 0) { /* not 1st pattern? */
+ for (l = 0, u = com.npatt - 1; ; ) {
+ if (u<l) break;
+ ip = (l + u) / 2;
+ k = strcmp(zt + h*lpatt, zt + p2s[ip] * lpatt);
+ if (k<0) u = ip - 1;
+ else if (k>0) l = ip + 1;
+ else { same = 1; break; }
+ }
+ }
+ if (!same) {
+ if (l > ip) ip++; /* last comparison in bsearch had k > 0. */
+ /* Insert new pattern at ip. This is the expensive step. */
+ if (ip<com.npatt)
+ memmove(p2s + ip + 1, p2s + ip, (com.npatt - ip) * sizeof(int));
+ p2s[ip] = h;
+ com.npatt++;
+ }
+ if (debug) {
+ printf(": %3d (%c ilu %3d%3d%3d) ", com.npatt, (same ? 'S' : 'D'), ip, l, u);
+ for (j = 0; j<com.npatt; j++)
+ printf(" %s", zt + p2s[j] * lpatt);
+ }
+ if (noisy>2 && ((h + 1) % 10000 == 0 || h + 1 == npatt0))
+ printf("\rCompressing, %6d patterns at %6d / %6d sites (%.1f%%), %s",
+ com.npatt, h + 1, npatt0, (h + 1.) * 100 / npatt0, printtime(timestr));
+ } /* for (h) */
+ if (noisy>2) printf("\n");
+
+ /* (B) count pattern frequencies and collect pose[] */
+ com.fpatt = (double*)realloc(com.fpatt, com.npatt * sizeof(double));
+ memset(com.fpatt, 0, com.npatt * sizeof(double));
+
+ for (h = 0; h<npatt0; h++) {
+ for (same = 0, l = 0, u = com.npatt - 1; ; ) {
+ if (u<l) break;
+ ip = (l + u) / 2;
+ k = strcmp(zt + h*lpatt, zt + p2s[ip] * lpatt);
+ if (k<0) u = ip - 1;
+ else if (k>0) l = ip + 1;
+ else { same = 1; break; }
+ }
+ if (!same) error2("ghost pattern?");
+ com.fpatt[ip] += fpatt0[h];
+ if(com.pose) com.pose[h] = ip;
+ if (noisy>2 && ((h + 1) % 10000 == 0 || h + 1 == npatt0))
+ printf("\rCollecting patterns, %6d patterns at %6d / %6d sites (%.1f%%), %s",
+ com.npatt, h + 1, npatt0, (h + 1.) * 100 / npatt0, printtime(timestr));
+ } /* for (h) */
+ if (noisy>2) printf("\n");
+
+ for (j = 0; j<com.ns; j++) {
+ com.z[j] = (unsigned char*)realloc(com.z[j], com.npatt * sizeof(unsigned char));
+ for (ip = 0, p = com.z[j]; ip<com.npatt; ip++)
+ *p++ = (unsigned char)(zt[p2s[ip] * lpatt + j] - 1);
+ }
+ free(p2s); free(zt); free(fpatt0);
+
+ return (0);
}
@@ -416,7 +495,7 @@ int ReadSeq (FILE *fout, FILE *fseq, int cleandata, int locus)
int n31=(com.seqtype==CODONseq||com.seqtype==CODON2AAseq?3:1);
int gap=(n31==3?3:10), nchar=(com.seqtype==AAseq?20:4);
int h,b[3]={0};
- char *pch=((com.seqtype<=1||com.seqtype==CODON2AAseq) ? BASEs : (com.seqtype==2 ? AAs: (com.seqtype==5 ? BASEs5 : BINs)));
+ char *pch=((com.seqtype<=1||com.seqtype==CODON2AAseq) ? BASEs : (com.seqtype==2 ? AAs: BINs));
char str[4]=" ";
char *NEXUSend="end;";
double lst;
@@ -430,7 +509,7 @@ int ReadSeq (FILE *fout, FILE *fseq, int cleandata, int locus)
if (noisy>=9 && (com.seqtype<=CODONseq||com.seqtype==CODON2AAseq)) {
puts("\n\nAmbiguity character definition table:\n");
for(i=0; i<(int)strlen(BASEs); i++) {
- nb = strlen(EquateBASE[i]);
+ nb = (int)strlen(EquateBASE[i]);
printf("%c (%d): ", BASEs[i], nb);
for(j=0; j<nb; j++) printf("%c ", EquateBASE[i][j]);
FPN(F0);
@@ -598,9 +677,9 @@ readseq:
}
p = line+(line[0]=='=' || line[0]=='>') ;
while(isspace(*p)) p++;
- if ((ch=strstr(p," ")-p)<lspname && ch>0) lspname=ch;
+ if ((ch=(int)(strstr(p," ")-p)) < lspname && ch>0) lspname=ch;
strncpy (com.spname[j], p, lspname);
- k = strlen(com.spname[j]);
+ k = (int)strlen(com.spname[j]);
p += (k<lspname?k:lspname);
for (; k>0; k--) /* trim spaces */
@@ -667,10 +746,10 @@ readseq:
if (igroup==0) {
lspname = LSPNAME;
while(isspace(*p)) p++;
- if ((ch=strstr(p," ")-p)<lspname && ch>0)
+ if ((ch = (int)(strstr(p," ")-p)) < lspname && ch>0)
lspname = ch;
strncpy (com.spname[j], p, lspname);
- k = strlen(com.spname[j]);
+ k = (int)strlen(com.spname[j]);
p += (k<lspname?k:lspname);
for (; k>0; k--) /* trim spaces */
@@ -996,18 +1075,18 @@ int printPatterns(FILE *fout)
void EncodeSeqs (void)
{
- /* This encodes sequences and set up com.TipMap[][], called after sites are collapsed
- into patterns.
- */
+/* This encodes sequences and set up com.TipMap[][], called after sites are collapsed
+ into patterns.
+*/
int n=com.ncode, nA, is,h, i, j, k,ic, indel=0, ch, b[3];
- char *pch = ((com.seqtype==0||com.seqtype==1) ? BASEs : (com.seqtype==2 ? AAs : (com.seqtype==5 ? BASEs5: BINs)));
+ char *pch = ((com.seqtype==0||com.seqtype==1) ? BASEs : (com.seqtype==2 ? AAs : BINs));
unsigned char c[4]="", str[4]=" ";
if(com.seqtype != 1) {
for(is=0; is<com.ns; is++) {
for (h=0; h<com.npatt; h++) {
ch = com.z[is][h];
- com.z[is][h] = (char)(k = strchr(pch, ch) - pch);
+ com.z[is][h] = (char)(k = (int)(strchr(pch, ch) - pch));
if(k<0) {
printf("strange character %c in seq %d site %d\n", ch, is+1, h+1);
exit(-1);
@@ -1070,9 +1149,9 @@ void SetMapAmbiguity (void)
/* This sets up CharaMap, the map from the ambiguity characters to resolved characters.
*/
int n=com.ncode, i,j, i0,i1,i2, nb[3], ib[3][4], ic;
- char *pch = (com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs : (com.seqtype==5 ? BASEs5: BINs)));
- char *pbases = (com.seqtype==0 ? BASEs : (com.seqtype==5 ? BASEs5: NULL));
- char **pEquateBASE = (com.seqtype==0 ? EquateBASE : (com.seqtype==5 ? EquateBASE5 : NULL));
+ char *pch = (com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs : BINs));
+ char *pbases = (com.seqtype==0 ? BASEs : NULL);
+ char **pEquateBASE = (com.seqtype==0 ? EquateBASE : NULL);
char debug=0;
for(j=0; j<n; j++) { /* basic characters, coded according to the definition in pch. */
@@ -1221,147 +1300,142 @@ void AllPatterns (FILE* fout)
}
-int PatternWeight (void)
+int PatternWeight(void)
{
- /* This collaps sites into patterns, for nucleotide, amino acid, or codon sequences.
- This relies on \0 being the end of the string so that sequences should not be
- encoded before this routine is called.
- com.pose[i] has labels for genes as input and maps sites to patterns in return.
- com.fpatt, a vector of doubles, wastes space as site pattern counts are integers.
- Sequences z[ns*ls] are copied into patterns zt[ls*lpatt], and bsearch is used
- twice to avoid excessive copying, to count npatt first & to generate fpatt etc.
- */
- int maxnpatt=com.ls, h, ip,l,u, j, k, same, ig, *poset;
- // int gap = (com.seqtype==CODONseq ? 3 : 10);
- int n31 = (com.seqtype==CODONseq ? 3 : 1);
- int lpatt=com.ns*n31+1; /* extra 0 used for easy debugging, can be voided */
- int *p2s; /* point patterns to sites in zt */
- char *zt, timestr[36];
- unsigned char *p;
- double nc = (com.seqtype == 1 ? 64 : com.ncode) + !com.cleandata+1;
- int debug=0;
- char DS[]="DS";
-
- /* (A)
- Collect and sort patterns. Get com.npatt, com.lgene, com.posG.
- Move sequences com.z[ns][ls] into sites zt[ls*lpatt].
- Use p2s to map patterns to sites in zt to avoid copying.
- */
- if(noisy) printf("Counting site patterns.. %s\n", printtime(timestr));
-
- if((com.seqtype==1 && com.ns<5) || (com.seqtype!=1 && com.ns<7))
- maxnpatt = (int)(pow(nc, (double)com.ns) + 0.5) * com.ngene;
- if(maxnpatt>com.ls) maxnpatt = com.ls;
- p2s = (int*)malloc(maxnpatt*sizeof(int));
- zt = (char*)malloc(com.ls*lpatt*sizeof(char));
- if(p2s==NULL || zt==NULL) error2("oom p2s or zt");
- memset(zt, 0, com.ls*lpatt*sizeof(char));
- for(j=0; j<com.ns; j++)
- for(h=0; h<com.ls; h++)
- for(k=0; k<n31; k++)
- zt[h*lpatt+j*n31+k] = com.z[j][h*n31+k];
-
- for(j=0; j<com.ns; j++) free(com.z[j]);
-
- for(ig=0; ig<com.ngene; ig++) com.lgene[ig] = 0;
- for(ig=0,com.npatt=0; ig<com.ngene; ig++) {
- com.posG[ig] = l = u = ip = com.npatt;
- for(h=0; h<com.ls; h++) {
- if(com.pose[h] != ig) continue;
- if(debug) printf("\nh %3d %s", h, zt+h*lpatt);
-
- /* bsearch in existing patterns. Knuth 1998 Vol3 Ed2 p.410
- ip is the loc for match or insertion. [l,u] is the search interval.
- */
- same = 0;
- if(com.lgene[ig]++ != 0) { /* not 1st pattern? */
- for(l=com.posG[ig], u=com.npatt-1; ; ) {
- if(u<l) break;
- ip = (l+u)/2;
- k = strcmp(zt+h*lpatt, zt+p2s[ip]*lpatt);
- if(k<0) u = ip - 1;
- else if(k>0) l = ip + 1;
- else { same = 1; break; }
- }
- }
- if(!same) {
- if(com.npatt>maxnpatt)
- error2("npatt > maxnpatt");
- if(l > ip) ip++; /* last comparison in bsearch had k > 0. */
- /* Insert new pattern at ip. This is the expensive step. */
-
- if(ip<com.npatt)
- memmove(p2s+ip+1, p2s+ip, (com.npatt-ip)*sizeof(int));
-
- /*
- for(j=com.npatt; j>ip; j--)
- p2s[j] = p2s[j-1];
- */
- p2s[ip] = h;
- com.npatt ++;
- }
-
- if(debug) {
- printf(": %3d (%c ilu %3d%3d%3d) ", com.npatt, DS[same], ip, l, u);
- for(j=0; j<com.npatt; j++)
- printf(" %s", zt+p2s[j]*lpatt);
- }
- if(noisy && ((h+1)%10000==0 || h+1==com.ls))
- printf("\r%12d patterns at %8d / %8d sites (%.1f%%), %s",
- com.npatt, h+1, com.ls, (h+1.)*100/com.ls, printtime(timestr));
-
- } /* for (h) */
- } /* for (ig) */
- if(noisy) FPN(F0);
-
- /* (B) count pattern frequencies and collect pose[] */
- com.posG[com.ngene] = com.npatt;
- for(j=0; j<com.ngene; j++)
- if(com.lgene[j]==0)
- error2("some gene labels are missing");
- for(j=1; j<com.ngene; j++)
- com.lgene[j] += com.lgene[j-1];
-
- com.fpatt = (double*)realloc(com.fpatt, com.npatt*sizeof(double));
- poset = (int*)malloc(com.ls*sizeof(int));
- if(com.fpatt==NULL || poset==NULL) error2("oom poset");
- for(ip=0; ip<com.npatt; ip++) com.fpatt[ip] = 0;
-
- for(ig=0; ig<com.ngene; ig++) {
- for(h=0; h<com.ls; h++) {
- if(com.pose[h] != ig) continue;
- for(same=0, l=com.posG[ig], u=com.posG[ig+1]-1; ; ) {
- if(u<l) break;
- ip = (l+u)/2;
- k = strcmp(zt+h*lpatt, zt+p2s[ip]*lpatt);
- if(k<0) u = ip - 1;
- else if(k>0) l = ip + 1;
- else { same = 1; break; }
- }
- if(!same)
- error2("ghost pattern?");
- com.fpatt[ip]++;
- poset[h] = ip;
- } /* for (h) */
- } /* for (ig) */
-
- if(com.seqtype==CODONseq && com.ngene==3 &&com.lgene[0]==com.ls/3) {
- puts("\nCheck option G in data file? (Enter)\n");
- }
-
- for(j=0; j<com.ns; j++) {
- com.z[j] = (unsigned char*)malloc(com.npatt*n31*sizeof(char));
- for(ip=0,p=com.z[j]; ip<com.npatt; ip++)
- for(k=0; k<n31; k++)
- *p++ = zt[p2s[ip]*lpatt + j*n31 + k];
- }
- memcpy(com.pose, poset, com.ls*sizeof(int));
- free(poset); free(p2s); free(zt);
-
- return (0);
+/* This collaps sites into patterns, for nucleotide, amino acid, or codon sequences.
+ This relies on \0 being the end of the string.
+ com.pose[i] has labels for genes as input and maps sites to patterns in return.
+ com.fpatt has site-pattern counts.
+ This deals with multiple genes/partitions, and uses com.ngene, com.lgene[], com.posG[] etc.
+ Sequences z[ns][ls] are copied into patterns zt[ls*lpatt], and bsearch is used
+ twice to avoid excessive copying, the first round to count npatt and identify the site patterns
+ and the second round to generate fpatt[] & com.pose[].
+*/
+ int maxnpatt = com.ls, h, ip, l, u, j, k, same, ig, *poset;
+ // int gap = (com.seqtype==CODONseq ? 3 : 10);
+ int n31 = (com.seqtype == CODONseq ? 3 : 1);
+ int lpatt = com.ns*n31 + 1; /* extra 0 used for easy debugging, can be voided */
+ int *p2s; /* point patterns to sites in zt */
+ char timestr[36];
+ unsigned char *p, *zt;
+ double nc = (com.seqtype == 1 ? 64 : com.ncode) + !com.cleandata + 1;
+ int debug = 0;
+
+ /* (A) Collect and sort patterns. Get com.npatt, com.lgene, com.posG.
+ Move sequences com.z[ns][ls] into sites zt[ls*lpatt].
+ Use p2s to map patterns to sites in zt to avoid copying.
+ */
+ if (noisy) printf("Counting site patterns.. %s\n", printtime(timestr));
+
+ if ((com.seqtype == 1 && com.ns<5) || (com.seqtype != 1 && com.ns<7))
+ maxnpatt = (int)(pow(nc, (double)com.ns) + 0.5) * com.ngene;
+ if (maxnpatt>com.ls) maxnpatt = com.ls;
+ p2s = (int*)malloc(maxnpatt * sizeof(int));
+ zt = (char*)malloc(com.ls*lpatt * sizeof(char));
+ if (p2s == NULL || zt == NULL) error2("oom p2s or zt");
+ memset(zt, 0, com.ls*lpatt * sizeof(char));
+ for (j = 0; j<com.ns; j++)
+ for (h = 0; h<com.ls; h++)
+ for (k = 0; k<n31; k++)
+ zt[h*lpatt + j*n31 + k] = (unsigned char)(com.z[j][h*n31 + k] + 1);
+
+ for (ig = 0; ig<com.ngene; ig++) com.lgene[ig] = 0;
+ for (ig = 0, com.npatt = 0; ig<com.ngene; ig++) {
+ com.posG[ig] = l = u = ip = com.npatt;
+ for (h = 0; h<com.ls; h++) {
+ if (com.pose[h] != ig) continue;
+ if (debug) printf("\nh %3d %s", h, zt + h*lpatt);
+
+ /* bsearch in existing patterns. Knuth 1998 Vol3 Ed2 p.410
+ ip is the loc for match or insertion. [l,u] is the search interval.
+ */
+ same = 0;
+ if (com.lgene[ig]++ != 0) { /* not 1st pattern? */
+ for (l = com.posG[ig], u = com.npatt - 1; ; ) {
+ if (u<l) break;
+ ip = (l + u) / 2;
+ k = strcmp(zt + h*lpatt, zt + p2s[ip] * lpatt);
+ if (k<0) u = ip - 1;
+ else if (k>0) l = ip + 1;
+ else { same = 1; break; }
+ }
+ }
+ if (!same) {
+ if (com.npatt>maxnpatt)
+ error2("npatt > maxnpatt");
+ if (l > ip) ip++; /* last comparison in bsearch had k > 0. */
+ /* Insert new pattern at ip. This is the expensive step. */
+ if (ip<com.npatt)
+ memmove(p2s + ip + 1, p2s + ip, (com.npatt - ip) * sizeof(int));
+ p2s[ip] = h;
+ com.npatt++;
+ }
+
+ if (debug) {
+ printf(": %3d (%c ilu %3d%3d%3d) ", com.npatt, (same ? 'S' : 'D'), ip, l, u);
+ for (j = 0; j<com.npatt; j++)
+ printf(" %s", zt + p2s[j] * lpatt);
+ }
+ if (noisy && ((h + 1) % 10000 == 0 || h + 1 == com.ls))
+ printf("\rCompressing, %6d patterns at %6d / %6d sites (%.1f%%), %s",
+ com.npatt, h + 1, com.ls, (h + 1.) * 100 / com.ls, printtime(timestr));
+
+ } /* for (h) */
+ if (noisy) FPN(F0);
+ } /* for (ig) */
+
+ /* (B) count pattern frequencies and collect pose[] */
+ com.posG[com.ngene] = com.npatt;
+ for (j = 0; j<com.ngene; j++)
+ if (com.lgene[j] == 0)
+ error2("some genes do not have any sites?");
+ for (j = 1; j<com.ngene; j++)
+ com.lgene[j] += com.lgene[j - 1];
+
+ com.fpatt = (double*)realloc(com.fpatt, com.npatt * sizeof(double));
+ poset = (int*)malloc(com.ls * sizeof(int));
+ if (com.fpatt == NULL || poset == NULL) error2("oom poset");
+ memset(com.fpatt, 0, com.npatt * sizeof(double));
+
+ for (ig = 0; ig<com.ngene; ig++) {
+ for (h = 0; h<com.ls; h++) {
+ if (com.pose[h] != ig) continue;
+ for (same = 0, l = com.posG[ig], u = com.posG[ig + 1] - 1; ; ) {
+ if (u<l) break;
+ ip = (l + u) / 2;
+ k = strcmp(zt + h*lpatt, zt + p2s[ip] * lpatt);
+ if (k<0) u = ip - 1;
+ else if (k>0) l = ip + 1;
+ else { same = 1; break; }
+ }
+ if (!same)
+ error2("ghost pattern?");
+ com.fpatt[ip]++;
+ poset[h] = ip;
+ if (noisy && ((h + 1) % 10000 == 0 || h + 1 == com.ls))
+ printf("\rCollecting patterns, %6d patterns at %6d / %6d sites (%.1f%%), %s",
+ com.npatt, h + 1, com.ls, (h + 1.) * 100 / com.ls, printtime(timestr));
+ } /* for (h) */
+ if (noisy) FPN(F0);
+ } /* for (ig) */
+
+ if (com.seqtype == CODONseq && com.ngene == 3 && com.lgene[0] == com.ls / 3)
+ puts("\nCheck option G in data file?\n");
+
+ for (j = 0; j<com.ns; j++) {
+ com.z[j] = (unsigned char*)realloc(com.z[j], com.npatt*n31 * sizeof(unsigned char));
+ for (ip = 0, p = com.z[j]; ip<com.npatt; ip++)
+ for (k = 0; k<n31; k++)
+ *p++ = (unsigned char)(zt[p2s[ip] * lpatt + j*n31 + k] - 1);
+ }
+ memcpy(com.pose, poset, com.ls * sizeof(int));
+ free(poset); free(p2s); free(zt);
+
+ return (0);
}
+
void AddFreqSeqGene(int js,int ig,double pi0[],double pi[]);
@@ -1402,7 +1476,7 @@ int InitializeBaseAA (FILE *fout)
This routine is called by baseml and aaml. codonml uses another
routine InitializeCodon()
*/
- char *pch = (com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs : (com.seqtype==5 ? BASEs5: BINs)));
+ char *pch = (com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs : BINs));
char indel[]="-?";
int wname=30, h,js,k, ig, nconstp, n=com.ncode;
int irf, nrf=20;
@@ -1551,7 +1625,7 @@ void AddFreqSeqGene(int js, int ig, double pi0[], double pi[])
using pi0, by resolving ambiguities. The data are coded. com.cleandata==1 or 0.
This is for nucleotide and amino acid sequences only.
*/
- //char *pch = (com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs : (com.seqtype==5 ? BASEs5: BINs)));
+ //char *pch = (com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs : BINs));
int k, h, b, n=com.ncode;
double t;
@@ -1598,7 +1672,7 @@ int RemoveIndel(void)
*/
int n=com.ncode, h,k, j,js,lnew,nindel, n31=1;
char b, *miss; /* miss[h]=1 if site (codon) h is missing, 0 otherwise */
- char *pch=((com.seqtype<=1||com.seqtype==CODON2AAseq)?BASEs:(com.seqtype==2?AAs: (com.seqtype==5?BASEs5:BINs)));
+ char *pch=((com.seqtype<=1||com.seqtype==CODON2AAseq)?BASEs:(com.seqtype==2?AAs: BINs));
if(com.seqtype==CODONseq || com.seqtype==CODON2AAseq) {
n31=3; n=4;
@@ -1654,7 +1728,7 @@ int MPInformSites (void)
Not used for a long time. Does not work if com.pose is NULL.
*/
char *imark;
- char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: (com.seqtype==5?BASEs5:BINs)));
+ char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: BINs));
int h, i, markb[NS], inf, lsinf;
FILE *finf, *fninf;
@@ -1715,7 +1789,7 @@ int print1seq (FILE*fout, unsigned char *z, int ls, int pose[])
This uses com.seqtype.
*/
int h, hp, gap=10;
- char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: (com.seqtype==5?BASEs5:BINs)));
+ char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: BINs));
// char str[4]="";
// int nb = (com.seqtype==CODONseq?3:1);
@@ -1781,7 +1855,7 @@ void printSeqs (FILE *fout, int *pose, char keep[], int format)
else if (format==1) {
for(h=0,FPN(fout); h<com.npatt; h++) {
/* fprintf(fout," %12.8f", com.fpatt[h]/(double)com.ls); */
- fprintf(fout, " %.0f", com.fpatt[h]);
+ fprintf(fout, " %4.0f", com.fpatt[h]);
if((h+1)%15==0) FPN(fout);
}
}
@@ -1789,6 +1863,7 @@ void printSeqs (FILE *fout, int *pose, char keep[], int format)
fflush(fout);
}
+
#define gammap(x,alpha) (alpha*(1-pow(x,-1.0/alpha)))
/* DistanceREV () used to be here, moved to pamp.
*/
@@ -2805,41 +2880,39 @@ int IsNameNumber(char line[])
-int ReadTreeN (FILE *ftree, int *haslength, int *haslabel, int copyname, int popline)
-{
- /* Read a tree from ftree, using the parenthesis node representation of trees.
- Branch lengths are read in nodes[].branch, and branch (node) labels
- (integers) are preceeded by # and read in nodes[].label. If the clade label
- $ is used, the label is read into CladeLabel[] first and then moved into
- nodes[].label in the routine DownTreeCladeLabel().
+/* Read a tree from ftree, using the parenthesis node representation of trees.
+Branch lengths are read in nodes[].branch, and branch (node) labels
+(integers) are preceeded by # and read in nodes[].label. If the clade label
+$ is used, the label is read into CladeLabel[] first and then moved into
+nodes[].label in the routine DownTreeCladeLabel().
- Calibration information for mcmctree may be read into nodes[].branch and nodes[].label,
- as well as nodes[].NodeStr, and is processed inside mcmctree.
- *haslength is set to 1 (branch lengths), 2 (calibration info) or 3 (both).
- However, the bit for calibrations is set only if the symbols > < exist and not for
- calibrations specified using L, U, G, etc, which will be stored in nodes[].NodeStr
- and processed using ProcessFossilInfo() in mcmctree.
- mcmctree should abort if *haslength == 1 or 3 after this routine.
+Calibration information for mcmctree may be read into nodes[].branch and nodes[].label,
+as well as nodes[].NodeStr, and is processed inside mcmctree.
+*haslength is set to 1 (branch lengths), 2 (calibration info) or 3 (both).
+However, the bit for calibrations is set only if the symbols > < exist and not for
+calibrations specified using L, U, G, etc, which will be stored in nodes[].NodeStr
+and processed using ProcessFossilInfo() in mcmctree.
+mcmctree should abort if *haslength == 1 or 3 after this routine.
- This assumes that com.ns is known.
- Species names are considered case-sensitive, with trailing spaces ignored.
+This assumes that com.ns is known. Names are considered case-sensitive, with trailing spaces ignored.
- copyname = 0: species numbers and names are both accepted, but names have
- to match the names in com.spname[], which are from the
- sequence data file. Used by baseml and codeml, for example.
- 1: species names are copied into com.spname[], but species
- numbers are accepted. Used by evolver for simulation,
- in which case no species names were read before.
- 2: the tree must have species names, which are copied into com.spname[].
- Note that com.ns is assumed known. To remove this restrition,
- one has to consider the space for nodes[], CladeLabel, starting
- node number etc.
+copyname = 0: species numbers and names are both accepted, but names have to match the names
+ in com.spname[], which are from the sequence data file.
+ Used by baseml and codeml, for example.
+ 1: species names are copied into com.spname[], but species numbers are accepted.
+ Used by evolver for simulation, in which case no species names were read before.
+ 2: the tree must have species names, which are copied into com.spname[].
+
+Note that com.ns is assumed known. To remove this restrition, one has to consider the space
+for nodes[], CladeLabel, starting node number etc.
- isname = 0: species number; 1: species name;
+isname = 0: species number; 1: species name;
- Ziheng note (18/12/2011): I have changed the code so that sequence number is not used
- anymore. isname = 1 always.
- */
+Ziheng note (18/12/2011): I have changed the code so that sequence number is not used
+anymore. isname = 1 always.
+*/
+int ReadTreeN (FILE *ftree, int *haslength, int *haslabel, int copyname, int popline)
+{
int hascalibration=0, cnode, cfather = -1; /* current node and father */
int inodeb=0; /* node number that will have the next branch length */
int cladeLabels=0, i,j,k, level=0, isname, ch=' ', icurspecies=0;
@@ -2912,7 +2985,7 @@ int ReadTreeN (FILE *ftree, int *haslength, int *haslabel, int copyname, int pop
else if (ch==':'||ch=='>') {
if(ch==':') *haslength=1;
else hascalibration = 1;
- fscanf(ftree,"%lf",&nodes[inodeb].branch);
+ fscanf(ftree, "%lf", &nodes[inodeb].branch);
}
else if (ch==quote[0] || ch==quote[1]) {
for (k=0; ; k++) { /* read notes into line[] */
@@ -2932,11 +3005,15 @@ int ReadTreeN (FILE *ftree, int *haslength, int *haslabel, int copyname, int pop
*haslabel = 1;
sscanf(pch+1, "%lf", &nodes[inodeb].label);
}
- else if((pch = strchr(line,'$'))) {
+ else if(pch = strchr(line,'$')) {
*haslabel=1;
sscanf(pch+1, "%d", &CladeLabel[inodeb]);
}
- else if(pch = strchr(line,'<')) {
+ else if(pch = strchr(line, '>')) {
+ hascalibration = 1;
+ sscanf(pch + 1, "%lf", &nodes[inodeb].branch);
+ }
+ else if(pch = strchr(line, '<')) {
hascalibration = 1;
sscanf(pch+1, "%lf", &nodes[inodeb].label);
}
@@ -3372,11 +3449,11 @@ void PointconPnodes (void)
This routine updates internal nodes com.conP only.
End nodes (com.conP0) are updated in InitConditionalPNode().
*/
- size_t nintern=0, i;
+ int nintern=0, i;
for(i=0; i<tree.nbranch+1; i++)
if(nodes[i].nson>0) /* more thinking */
- nodes[i].conP = com.conP + com.ncode*com.npatt*nintern ++;
+ nodes[i].conP = com.conP + (size_t)com.ncode*com.npatt*nintern ++;
}
@@ -4492,7 +4569,7 @@ int StepwiseAdditionMP (double space[])
_U0=(int*)malloc(com.npatt*_mnnode*sizeof(int));
_step0=(int*)malloc(com.npatt*_mnnode*sizeof(int));
if (noisy>2)
- printf("\n%9ld bytes for MP (U0 & N0)\n", 2*com.npatt*_mnnode*sizeof(int));
+ printf("\n%9zd bytes for MP (U0 & N0)\n", 2*com.npatt*_mnnode*sizeof(int));
if (_U0==NULL || _step0==NULL) error2("oom U0&step0");
FOR (i,ns0) z0[i]=com.z[i];
@@ -5971,7 +6048,7 @@ int AncestralMarginal (FILE *fout, double x[], double fhsiteAnc[], double Sir[])
Deals with node scaling to avoid underflows. See above
(Z. Yang, 2 Sept 2001)
*/
- char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: (com.seqtype==5?BASEs5:BINs)));
+ char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: BINs));
char *zanc, str[4]="",codon[2][4]={" "," "}, aa[4]="";
char *sitepatt=(com.readpattern?"pattern":"site");
int n=com.ncode, inode, ic=0,b[3],i,j,k1=-1,k2=-1,c1,c2,k3, lsc=com.ls;
@@ -6290,7 +6367,7 @@ int ChangesSites(FILE*frst, int coding, char *zanc)
nonsynonymous changes are counted separately.
Added in Nov 2000.
*/
- char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: (com.seqtype==5?BASEs5:BINs)));
+ char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: BINs));
char codon[2][4]={" "," "};
int h,hp,inode,k1,k2,d, ls1=(com.readpattern?com.npatt:com.ls);
double S,N,Sd,Nd, S1,N1,Sd1,Nd1, b,btotal=0, p,C;
@@ -6578,7 +6655,7 @@ void PrintAncState1site (char ancState1site[], double prob)
{
int i;
char codon[4]="";
- char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: (com.seqtype==5?BASEs5:BINs)));
+ char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: BINs));
for(i=0; i<tree.nnode-com.ns; i++) {
if(com.seqtype==1) {
@@ -6626,7 +6703,7 @@ int AncestralJointPPSG2000 (FILE *fout, double x[])
This outputs results by pattern. I tried to print results by site (rather than by pattern),
but gave up as some variables use the same memory (e.g., combIndex) for different site patterns.
*/
- char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: (com.seqtype==5?BASEs5:BINs)));
+ char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: BINs));
char codon[4]="";
int n=com.ncode, nintern=tree.nnode-com.ns, nson, i,j,k,h,hp,igene;
int maxnson, maxncomb, lst=(com.readpattern?com.npatt:com.ls);
@@ -7458,7 +7535,7 @@ int print1site (FILE*fout, int h)
site in the original data file or the h-th pattern. The data are coded.
naa > 1 if the codon codes for more than one amino acid.
*/
- char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: (com.seqtype==5?BASEs5:BINs)));
+ char *pch=(com.seqtype==0 ? BASEs : (com.seqtype==2 ? AAs: BINs));
char compatibleAAs[20]="";
int n=com.ncode, i, b, aa=0;
@@ -8501,7 +8578,7 @@ int ReadTreeSeqs (FILE*fout)
if (haslength & 1)
error2("Tree should have fossil calibrations but not branch lengths!");
#endif
-
+
/* read sequences at each locus, construct gene tree by pruning sptree */
data.ngene = com.ndata;
com.ndata=1;
diff --git a/src/yn00.c b/src/yn00.c
index 52ef594..4ae1bbf 100644
--- a/src/yn00.c
+++ b/src/yn00.c
@@ -1,907 +1,907 @@
-/* yn00.c
- Pairwise estimation of dS and dN by the method of Yang & Nielsen
- (2000 Mol. Biol. Evol. 17:32-43)
-
- Copyright, 1998, Ziheng Yang
-
- cc -o yn00 -fast yn00.c tools.o -lm
- cl -O2 yn00.c tools.o
- yn00 <SequenceFileName>
-
- Codon sequences are encoded as 0,1,...,61, as in codeml.c.
-*/
-#include "paml.h"
-#define NS 1000
-#define LSPNAME 30
-#define NCODE 64
-#define NGENE 2000
-
-int GetOptions (char *ctlf);
-int EncodeSeqCodon(void);
-int Statistics(FILE *fout, double space[]);
-int DistanceMatLWL85 (FILE *fout);
-int DistanceYN00(int is, int js, double*S, double*N, double*dS,double*dN,
- double *SEdS, double *SEdN, double *t,double space[]);
-int GetKappa (void);
-int GetFreqs(int is1, int is2, double f3x4[], double pi[]);
-int CountSites(char z[],double pi[],double*Stot,double*Ntot,
- double fbS[],double fbN[]);
-int GetPMatCodon(double P[],double t, double kappa, double omega, double space[]);
-int CountDiffs(char z1[],char z2[],
- double*Sdts,double*Sdtv,double*Ndts,double*Ndtv,double PMat[]);
-int DistanceF84(double n, double P, double Q, double pi[],
- double*k_HKY, double*t, double*SEt);
-double dsdnREV (int is, int js, double space[]);
-
-int ExpPattFreq(double t,double kappa,double omega,double pi[],double space[]);
-int ConsistencyMC(void);
-int InfiniteData(double t,double kappa,double omega,double f3x4_0[],
- double space[]);
-void SimulateData2s64(FILE* fout, double f3x4_0[], double space[]);
-
-struct common_info {
- unsigned char *z[NS];
- char *spname[NS], seqf[512],outf[512];
- int ns,ls,npatt,codonf,icode,ncode,getSE,*pose,verbose, seqtype, readpattern;
- int cleandata, fcommon,kcommon, weighting, ndata, print;
- double *fpatt, pi[NCODE], f3x4s[NS][12], kappa, omega;
- int ngene,posG[NGENE+1],lgene[NGENE],fix_rgene, model;
- double rgene[NGENE],piG[NGENE][NCODE], alpha;
-} com;
-
-
-int FROM61[64], FROM64[64], FourFold[4][4];
-double PMat[NCODE*NCODE];
-char *codonfreqs[]={"Fequal", "F1x4", "F3x4", "Fcodon"};
-enum {Fequal, F1x4, F3x4, Fcodon} CodonFreqs;
-
-FILE *frst, *frst1, *frub;
-extern char BASEs[], AAs[];
-extern int noisy, GeneticCode[][64];
-int Nsensecodon;
-enum {NODEBUG, KAPPA, SITES, DIFF} DebugFunctions;
-int debug=0;
-
-double omega_NG, dN_NG, dS_NG; /* what are these for? */
-
-
-#define YN00
-#define REALSEQUENCE
-#include "treesub.c"
-
-
-int main(int argc, char *argv[])
-{
- char dsf[512]="2YN.dS", dnf[512]="2YN.dN", tf[512]="2YN.t";
- FILE *fout, *fseq, *fds, *fdn, *ft;
- char ctlf[96]="yn00.ctl", timestr[64];
- int n=com.ncode, is,js, j, idata, wname=20, sspace;
- double t=0.4, dS=0.1,dN=0.1, S,N, SEdS, SEdN, f3x4[12], *space=NULL;
-
- /* ConsistencyMC(); */
-
- printf("YN00 in %s\n", pamlVerStr);
- starttimer();
- if (argc>1) strcpy(ctlf, argv[1]);
- com.seqtype=1; com.cleandata=1; /* works for clean data only? */
- com.ndata=1; com.print=0;
- noisy=1; com.icode=0; com.fcommon=0; com.kcommon=1;
- GetOptions(ctlf);
- setmark_61_64 ();
- fout = fopen (com.outf, "w");
- frst = fopen("rst", "w");
- frst1 = fopen("rst1", "w");
- frub = fopen ("rub", "w");
- if (fout==NULL || frst==NULL) error2("outfile creation err.");
- fds = (FILE*)fopen(dsf, "w");
- fdn = (FILE*)fopen(dnf, "w");
- ft = (FILE*)fopen(tf, "w");
- if(fds==NULL || fdn==NULL || ft==NULL) error2("file open error");
-
- if((fseq=fopen (com.seqf,"r"))==NULL) {
- printf ("\n\nSequence file %s not found!\n", com.seqf);
- exit(-1);
- }
- for (idata=0; idata<com.ndata; idata++) {
- if (com.ndata>1) {
- printf("\nData set %d\n", idata+1);
- fprintf(fout, "\n\nData set %d\n", idata+1);
- fprintf(frst, "\t%d", idata+1);
- }
-
- ReadSeq((com.verbose?fout:NULL), fseq, com.cleandata, 0);
- SetMapAmbiguity();
-
- sspace = max2(200000,64*com.ns*sizeof(double));
- sspace = max2(sspace,64*64*5*sizeof(double));
- if ((space=(double*)realloc(space,sspace))==NULL) error2("oom space");
-
- com.kappa = 4.6;
- com.omega = 1;
- fprintf(fout,"YN00 %15s", com.seqf);
- Statistics(fout, space);
-
- if(noisy) printf("\n\n(A) Nei-Gojobori (1986) method\n");
- fprintf(fout,"\n\n(A) Nei-Gojobori (1986) method\n");
- DistanceMatNG86 (fout, NULL, NULL, NULL, 0);
- fflush(fout);
-
- if(noisy) printf("\n\n(B) Yang & Nielsen (2000) method\n\n");
- fprintf(fout,"\n\n(B) Yang & Nielsen (2000) method\n\n");
- fprintf(fout,"Yang Z, Nielsen R (2000) Estimating synonymous and nonsynonymous substitution rates under realistic evolutionary models. Mol. Biol. Evol. 17:32-43\n");
- if(!com.weighting) fputs("\n(equal weighting of pathways)\n",fout);
-
- if(com.fcommon) GetFreqs(-1, -1, f3x4, com.pi);
- if(com.kcommon) {
- GetKappa();
- printf("kappa = %.2f\n\n",com.kappa);
- /* puts("kappa?"); scanf("%lf", &com.kappa); */
- }
-
- fputs("\nseq. seq. S N t kappa omega dN +- SE dS +- SE\n\n",fout);
- fprintf(fds,"%6d\n", com.ns);
- fprintf(fdn,"%6d\n", com.ns);
- fprintf(ft,"%6d\n", com.ns);
- for(is=0; is<com.ns; is++) {
- fprintf(fds,"%-*s ", wname,com.spname[is]);
- fprintf(fdn,"%-*s ", wname,com.spname[is]);
- fprintf(ft,"%-*s ", wname,com.spname[is]);
- for(js=0; js<is; js++) {
- if(noisy) printf("%3d vs. %3d\n", is+1, js+1);
- fprintf(fout, " %3d %3d ", is+1, js+1);
-
- if(!com.fcommon) GetFreqs(is, js, f3x4, com.pi);
- if(!com.kcommon) GetKappa();
- j = DistanceYN00(is, js, &S, &N, &dS,&dN, &SEdS, &SEdN, &t,space);
-
- fprintf(fout,"%7.1f %7.1f %8.4f %7.4f %7.4f %6.4f +- %6.4f %7.4f +- %6.4f\n",
- S,N,t,com.kappa,com.omega,dN,SEdN,dS,SEdS);
- fprintf(frst," YN: %8.4f%8.4f%8.4f %6.4f +- %6.4f %7.4f +- %6.4f\n",
- t,com.kappa,com.omega,dN,SEdN,dS,SEdS);
-
- fprintf(fds," %7.4f",dS); fprintf(fdn," %7.4f",dN); fprintf(ft," %7.4f",t);
- } /* for (js) */
- FPN(fds); FPN(fdn); FPN(ft);
- fflush(fds); fflush(fdn); fflush(ft);
- } /* for (is) */
- FPN(fds); FPN(fdn); FPN(ft);
-
- if(noisy) printf("\n\n(C) LWL85, LPB93 & LWLm methods\n\n");
- fprintf(fout,"\n\n(C) LWL85, LPB93 & LWLm methods\n\n");
- fprintf(fout,"Li W.-H., C.-I. Wu, Luo (1985) A new method for estimating synonymous and nonsynonymous rates of nucleotide substitutions considering the relative likelihood of nucleotide and codon changes. Mol. Biol. Evol. 2: 150-174.\n");
- fprintf(fout,"Li W-H (1993) Unbiased estimation of the rates of synonymous and nonsynonymous substitution. J. Mol. Evol. 36:96-99\n");
- fprintf(fout,"Pamilo P, Bianchi NO (1993) Evolution of the Zfx and Zfy genes - rates and interdependence between the genes. Mol. Biol. Evol. 10:271-281\n");
- fprintf(fout,"Yang Z (2006) Computational Molecular Evolution. Oxford University Press, Oxford. Eqs. 2.12 & 2.13\n");
-
- DistanceMatLWL85(fout);
-
- fflush(frst);
- if(noisy) printf("\nTime used: %s\n", printtime(timestr));
- }
- return (0);
-}
-
-
-
-int GetOptions (char *ctlf)
-{
- int i, nopt=9, lline=4096;
- char line[4096], *pline, opt[20], comment='*';
- char *optstr[]={"seqfile","outfile", "verbose", "noisy", "icode",
- "weighting","commonkappa", "commonf3x4", "ndata"};
- double t;
- FILE *fctl;
-
- if((fctl=fopen(ctlf,"r"))==NULL) error2("\nctl file open error.\n");
- printf ("\nReading options from %s..\n", ctlf);
- for (;;) {
- if (fgets (line, lline, fctl) == NULL) break;
- for (i=0,t=0,pline=line; i<lline&&line[i]; i++)
- if (isalnum(line[i])) { t=1; break; }
- else if (line[i]==comment) break;
- if (t==0) continue;
- sscanf (line, "%s%*s%lf", opt, &t);
- if ((pline=strstr(line, "="))==NULL) error2("option file.");
-
- for (i=0; i<nopt; i++) {
- if (strncmp(opt, optstr[i], 8)==0) {
- if (noisy>2)
- printf ("\n%3d %15s | %-20s %6.2f", i+1,optstr[i],opt,t);
- switch (i) {
- case (0): sscanf(pline+2, "%s", com.seqf); break;
- case (1): sscanf(pline+2, "%s", com.outf); break;
- case (2): com.verbose=(int)t; break;
- case (3): noisy=(int)t; break;
- case (4): com.icode=(int)t; break;
- case (5): com.weighting=(int)t; break;
- case (6): com.kcommon=(int)t; break;
- case (7): com.fcommon=(int)t; break;
- case (8): com.ndata=(int)t; break;
- }
- break;
- }
- }
- if (i==nopt)
- { printf ("\noption %s in %s\n", opt, ctlf); exit (-1); }
- }
-
- for (i=0,Nsensecodon=0; i<64; i++)
- if (GeneticCode[com.icode][i]!=-1) Nsensecodon++;
- com.ncode = Nsensecodon;
- fclose (fctl);
- FPN(F0);
- return (0);
-}
-
-int DistanceYN00(int is, int js, double*S, double*N, double*dS,double*dN,
- double *SEdS, double *SEdN, double *t,double space[])
-{
-/* calculates dS, dN, w, t by weighting.
- com.kappa & com.pi[] are calculated beforehand are not updated.
-*/
- int j,k,ir,nround=10, status=0;
- double fbS[4],fbN[4],fbSt[4],fbNt[4], St,Nt, Sdts,Sdtv,Ndts,Ndtv, kappaS,kappaN;
- double w0=0,dS0=0,dN0=0, accu=5e-4, minomega=1e-5,maxomega=99;
-
- if(*t==0) *t=.5;
- if(com.omega<=0) com.omega=1;
- for(k=0; k<4; k++) fbS[k] = fbN[k] = 0;
- if(debug) printf("\nCountSites\n");
- if(noisy>3) printf("\n");
- for(k=0,*S=*N=0; k<2; k++) {
- CountSites(com.z[k==0?is:js], com.pi, &St, &Nt, fbSt, fbNt);
- *S += St/2;
- *N += Nt/2;
- for(j=0; j<4; j++) {
- fbS[j] += fbSt[j]/2;
- fbN[j] += fbNt[j]/2;
- }
- if(noisy>3) printf("Seq. %d: S = %9.3f N=%9.3f\n",k+1,St,Nt);
- }
- if(noisy>3) {
- printf("Ave. : S = %9.3f N=%9.3f\n\n",*S,*N);
- printf("Base freqs at syn & nonsyn sites\n%10s%10s%10s%10s\n", "T", "C", "A", "G");
- for(k=0; k<4; k++) printf(" %9.6f", fbS[k]); FPN(F0);
- for(k=0; k<4; k++) printf(" %9.6f", fbN[k]); FPN(F0);
- }
- if(noisy>3)
- printf(" # Sdts Sdtv Ndts Ndtv | t kappa w dN dS | kappaS kappaN\n");
-
- /* initial values? */
- if(com.weighting) {
- if(*t<0.001 || *t>5) *t=0.5;
- if(com.omega<0.01 || com.omega>5) com.omega=.5;
- }
- for (ir=0; ir<(com.weighting?nround:1); ir++) { /* weighting or iteration */
- if(com.weighting)
- GetPMatCodon(PMat,*t,com.kappa,com.omega,space);
- else
- for(j=0; j<com.ncode*com.ncode; j++)
- PMat[j] = 1;
-
- CountDiffs(com.z[is], com.z[js], &Sdts, &Sdtv, &Ndts, &Ndtv, PMat);
-
- if(DistanceF84(*S, Sdts/ *S, Sdtv/ *S, fbS, &kappaS, dS, SEdS)) status=-1;
- if(DistanceF84(*N, Ndts/ *N, Ndtv/ *N, fbN, &kappaN, dN, SEdN)) status=-1;
-
- if(*dS<1e-9) {
- status = -1;
- com.omega = maxomega;
- }
- else
- com.omega= max2(minomega, *dN/ *dS);
- *t = *dS * 3 * *S/(*S + *N) + *dN * 3 * *N/(*S + *N);
- if(noisy>3) {
- printf("%2d %7.2f%7.2f%7.2f%7.2f |", ir+1, Sdts,Sdtv,Ndts,Ndtv);
- printf("%8.4f%8.4f%8.4f%8.4f%8.4f", *t, com.kappa,com.omega,*dN,*dS);
- printf(" | %8.4f%8.4f\n", kappaS,kappaN);
- }
- if(fabs(*dS-dS0)<accu && fabs(*dN-dN0)<accu && fabs(com.omega-w0)<accu)
- break;
- dS0=*dS; dN0=*dN; w0=com.omega;
- } /* for (ir) */
- if(ir==nround) status=-2;
- /* if(status) printf("\n\tstatus: %d\n", status); */
- return(status);
-}
-
-
-
-int Statistics(FILE *fout, double space[])
-{
-/* This calculates base frequencies, using npatt & fpatt[]
-*/
- int h, is,j, c[3], wname=20;
- double f3x4tot[12], *fb3tot=com.pi, *fb3s=space;
-
- if(fout) {
- fprintf(fout, "\n\nns =%4d\tls =%4d", com.ns, com.ls);
- fprintf(fout,"\n\nCodon position x base (3x4) table for each sequence.");
- }
- zero(f3x4tot,12); zero(fb3s,64*com.ns);
- for(is=0; is<com.ns; is++) zero(com.f3x4s[is], 12);
- for (is=0; is<com.ns; is++) {
- for (h=0; h<com.npatt; h++) {
- j = FROM61[com.z[is][h]];
- c[0]=j/16; c[1]=(j%16)/4; c[2]=j%4;
- fb3s[is*64+j] += com.fpatt[h];
- for(j=0; j<3; j++)
- com.f3x4s[is][j*4+c[j]] += com.fpatt[h]/com.ls;
- }
- for(j=0; j<12; j++) f3x4tot[j] += com.f3x4s[is][j]/com.ns;
- if(fout) {
- fprintf(fout,"\n\n%-*s", wname, com.spname[is]);
- for(j=0; j<3; j++) {
- fprintf (fout, "\nposition %2d:", j+1);
- for(h=0; h<4; h++)
- fprintf (fout,"%5c:%7.5f", BASEs[h], com.f3x4s[is][j*4+h]);
- }
- }
- }
- if(fout) {
- fprintf (fout, "\n\nAverage");
- for(j=0; j<3; j++) {
- fprintf (fout, "\nposition %2d:", j+1);
- for(h=0; h<4; h++)
- fprintf (fout,"%5c:%7.5f", BASEs[h], f3x4tot[j*4+h]);
- }
- for(is=0,zero(fb3tot,64);is<com.ns;is++)
- for(j=0; j<64; j++) fb3tot[j] += fb3s[is*64+j];
- fprintf (fout, "\n\nCodon usage for each species\n");
- printcums (fout, com.ns, fb3s, com.icode);
- fprintf (fout, "\nSums\n");
- printcums (fout, 1, fb3tot, com.icode);
- }
-
- return(0);
-}
-
-int GetFreqs(int is1, int is2, double f3x4[], double pi[])
-{
-/* uses com.fcommon and com.f3x4s to calculate f3x4[] and pi[].
- Codon frequencies pi[] are calculated from the f3x4 table.
- The calculation is duplicated when com.fcommon=1.
-*/
- int n=com.ncode, j, k, ic, b[3];
-
- if (com.fcommon)
- for(j=0,zero(f3x4,12);j<com.ns;j++)
- for(k=0; k<12; k++) f3x4[k]+=com.f3x4s[j][k]/com.ns;
- else
- for(k=0; k<12; k++)
- f3x4[k] = (com.f3x4s[is1][k]+com.f3x4s[is2][k])/2;
-
- if (noisy>=9)
- matout(F0, f3x4, 3, 4);
- for(j=0; j<n; j++) {
- ic=FROM61[j]; b[0]=ic/16; b[1]=(ic%16)/4; b[2]=ic%4;
- pi[j] = f3x4[b[0]] * f3x4[4+b[1]] * f3x4[8+b[2]];
- }
- abyx(1/sum(pi,n), pi, n);
-
- return (0);
-}
-
-
-int DistanceMatLWL85 (FILE *fout)
-{
-/* This implements 3 methods: LWL85 (Li, Wu & Luo 1985), LPB (Li 1993,
- Pamilo & Bianchi 1993), and LWL85m (equation 12 in book; check other refs).
- alpha is not used.
-*/
- int i,j,k, h, wname=15;
- char *codon1, *codon2, str[4]=" ";
- double L[3], sdiff[3], vdiff[3], Lt[3], sdifft[3], vdifft[3], A[3],B[3];
- double P[3],Q[3], a,b, dS,dN, pS2, S,N, Sd,Nd;
-
- for(i=0; i<com.ns; i++) {
- for(j=0; j<i; j++) { /* pair i and j */
- for(k=0; k<3; k++) L[k] = sdiff[k] = vdiff[k] = 0;
-
- for (h=0; h<com.npatt; h++) {
- codon1 = CODONs[com.z[i][h]];
- codon2 = CODONs[com.z[j][h]];
- difcodonLWL85(codon1, codon2, Lt, sdifft, vdifft, 0, com.icode);
- for(k=0; k<3; k++) {
- L[k] += Lt[k]*com.fpatt[h];
- sdiff[k] += sdifft[k]*com.fpatt[h];
- vdiff[k] += vdifft[k]*com.fpatt[h];
- }
- }
-
- for(k=0; k<3; k++) {
- P[k] = sdiff[k]/L[k];
- Q[k] = vdiff[k]/L[k];
- a = 1 - 2*P[k] - Q[k];
- b = 1 - 2*Q[k];
- A[k] = -log(a)/2 + log(b)/4;
- B[k] = -log(b)/2;
- }
- if(fout) {
- fprintf(fout, "\n%d (%s) vs. %d (%s)\n\n", i+1, com.spname[i], j+1, com.spname[j]);
- fprintf(fout,"L(i): %9.1f %9.1f %9.1f sum=%9.1f\n", L[0],L[1],L[2],L[0]+L[1]+L[2]);
- fprintf(fout,"Ns(i): %9.4f %9.4f %9.4f sum=%9.4f\n", sdiff[0],sdiff[1],sdiff[2], sdiff[0]+sdiff[1]+sdiff[2]);
- fprintf(fout,"Nv(i): %9.4f %9.4f %9.4f sum=%9.4f\n", vdiff[0],vdiff[1],vdiff[2], vdiff[0]+vdiff[1]+vdiff[2]);
- fprintf(fout,"A(i): %9.4f %9.4f %9.4f\n", A[0],A[1],A[2]);
- fprintf(fout,"B(i): %9.4f %9.4f %9.4f\n", B[0],B[1],B[2]);
-
- Sd = L[1]*A[1] + L[2]*(A[2]+B[2]);
- Nd = L[1]*B[1] + L[0]*(A[0]+B[0]);
- pS2 = 1/3.;
- S = L[1]*pS2 + L[2];
- N = L[1]*(1-pS2) + L[0];
- dS = Sd/S;
- dN = Nd/N;
- fprintf(fout,"LWL85: dS = %7.4f dN = %7.4f w =%7.4f S =%7.1f N =%7.1f\n", dS,dN, dN/dS, S, N);
- pS2 = A[2]/(A[2]+B[2]);
- S = L[1]*pS2 + L[2];
- N = L[1]*(1-pS2) + L[0];
- dS = Sd/S;
- dN = Nd/N;
- fprintf(fout,"LWL85m: dS = %7.4f dN = %7.4f w =%7.4f S =%7.1f N =%7.1f (rho = %.3f)\n", dS,dN, dN/dS, S, N, pS2);
-
- dS = (L[1]*A[1]+L[2]*A[2])/(L[1]+L[2]) + B[2];
- dN = (L[0]*B[0]+L[1]*B[1])/(L[0]+L[1]) + A[0];
- fprintf(fout,"LPB93: dS = %7.4f dN = %7.4f w =%7.4f\n", dS, dN, dN/dS);
- }
- }
- if(noisy) printf(" %3d",i+1);
- }
- if(noisy) FPN(F0);
- if(fout) FPN(fout);
- return (0);
-}
-
-
-
-int GetKappa(void)
-{
-/* This calculates mutational transition/transversion rate ratio kappa
- using 4-fold degenerate sites from pairwise comparisons
- under HKY85, weighting estimates by the numbers of sites
-*/
- int is,js,j,k,h, i1,pos,c[2],aa[2],b[2][3],a,ndeg,by[3]={16,4,1}, status=0;
- double ka[2], F[2][16],S[2],wk[2], t,P,Q,pi[4];
- /* F&S&wk [0]: non-degenerate; [1]:4-fold; S:sites */
- double kdefault=(com.kappa>0?com.kappa:(com.icode==1?10:2));
- char str1[4]=" ",str2[4]=" ", *sitestr[2]={"non-degenerate","4-fold"};
-
- for(is=0,com.kappa=0;is<com.ns;is++) {
- for(js=0; js<is; js++) {
- if(noisy>=9) printf ("\n%4d vs. %3d", is+1, js+1);
- for(k=0; k<2; k++) zero(F[k],16);
- for(h=0; h<com.npatt; h++) {
- c[0] = FROM61[com.z[is][h]];
- c[1] = FROM61[com.z[js][h]];
- for(k=0; k<2; k++) {
- b[k][0] = c[k]/16;
- b[k][1] = (c[k]%16)/4;
- b[k][2] = c[k]%4;
- aa[k] = GeneticCode[com.icode][c[k]];
- }
-
- /* find non-degenerate sites */
- for(pos=0; pos<3; pos++) { /* check all positions */
- for(k=0,ndeg=0;k<2;k++) { /* two codons */
- for(i1=0; i1<4; i1++) {
- if(i1==b[k][pos]) continue;
- a = GeneticCode[com.icode][c[k]+(i1-b[k][pos])*by[pos]];
- if(a==aa[k]) break;
- }
- if(i1==4) ndeg++;
- }
- if(ndeg==2) {
- F[0][b[0][pos]*4+b[1][pos]] += .5*com.fpatt[h];
- F[0][b[1][pos]*4+b[0][pos]] += .5*com.fpatt[h];
- }
-
- }
- /* find 4-fold degenerate sites at 3rd positions */
- for(k=0,ndeg=0;k<2;k++) { /* two codons */
- for(j=0,i1=c[k]-b[k][2]; j<4; j++)
- if(j!=b[k][2] && GeneticCode[com.icode][i1+j]!=aa[k]) break;
- if(aa[0]==aa[1] && j==4) ndeg++;
- }
- if (ndeg<2) continue;
- F[1][b[0][2]*4+b[1][2]] += .5*com.fpatt[h];
- F[1][b[1][2]*4+b[0][2]] += .5*com.fpatt[h];
- } /* for (h) */
- for(k=0; k<2; k++) { /* two kinds of sites */
- /*
- if(noisy>3) printf("\n%s:\n",sitestr[k]);
- */
- S[k] = sum(F[k],16);
- if(S[k]<=0) { wk[k]=0; continue; }
- for(j=0; j<16; j++) F[k][j]/=S[k];
- P = (F[k][0*4+1]+F[k][2*4+3])*2;
- Q = 1-(F[k][0*4+0]+F[k][1*4+1]+F[k][2*4+2]+F[k][3*4+3]) - P;
- for(j=0; j<4; j++)
- pi[j] = sum(F[k]+j*4,4);
- DistanceF84(S[k], P,Q,pi, &ka[k], &t, NULL);
- wk[k] = (ka[k]>0?S[k]:0);
-
- /* matout(F0,F[k],4,4); matout(F0,pi,1,4); */
- /*
- if(noisy>3)
- printf("\nSPQkt:%9.4f%9.5f%9.5f%9.4f%9.4f\n",S[k],P,Q,ka[k],t);
- */
- }
- if(wk[0]+wk[1]==0) {
- status = -1;
- ka[0] = kdefault;
- if(noisy>3) printf("\ngot no kappa! fix it at %.4f\n",ka[0]);
- }
- else
- ka[0] = (ka[0]*wk[0]+ka[1]*wk[1])/(wk[0]+wk[1]);
- com.kappa += ka[0]/(com.ns*(com.ns-1.)/2);
- } /* for(js) */
- } /* for(is) */
-
- return (status);
-}
-
-
-int CountSites(char z[],double pi[],double*Stot,double*Ntot,double fbS[],double fbN[])
-{
-/* This calculates the total numbers of synonymous and nonsynonymous sites
- (Stot & Ntot) in the sequence z[] using com.kappa and pi[].
- It also count the base frequencies at the synonymous and nonsynonymous
- sites. Total number of sites is scaled to be equal to sequence length
- even if some changes are to stop codons. Since pi[] is scaled to sum
- to one, rates to stop codons are not considered.
- The counting goes through the sequence codon by codon, and so is different
- from the counting in codeml, which uses pi[] to count the sites.
-*/
- int h, j,k, c[2],aa[2], b[3], by[3]={16,4,1};
- double r, S,N, kappa=com.kappa;
-
- *Stot = *Ntot = 0;
- for(k=0; k<4; k++)
- fbS[k] = fbN[k] = 0;
- for (h=0; h<com.npatt; h++) {
- c[0] = FROM61[z[h]];
- b[0] = c[0]/16; b[1]=(c[0]%16)/4; b[2]=c[0]%4;
- aa[0] = GeneticCode[com.icode][c[0]];
- if (aa[0]==-1)
- error2("stop codon");
- for (j=0,S=N=0; j<3; j++) {
- for(k=0; k<4; k++) { /* b[j] changes to k */
- if (k==b[j]) continue;
- c[1] = c[0]+(k-b[j])*by[j];
- aa[1] = GeneticCode[com.icode][c[1]];
- if(aa[1] == -1) continue;
- r = pi[FROM64[c[1]]];
- if (k+b[j]==1 || k+b[j]==5) r *= kappa; /* transition */
- if (aa[0]==aa[1]) { S += r; fbS[b[j]] += r*com.fpatt[h]; }
- else { N += r; fbN[b[j]] += r*com.fpatt[h]; }
- }
- }
- *Stot += com.fpatt[h]*S;
- *Ntot += com.fpatt[h]*N;
- }
- r = 3*com.ls/(*Stot+*Ntot); *Stot*=r; *Ntot*=r;
- r = sum(fbS,4); for(k=0; k<4; k++) fbS[k] /= r;
- r = sum(fbN,4); for(k=0; k<4; k++) fbN[k] /= r;
- return (0);
-}
-
-
-int GetPMatCodon(double P[],double t, double kappa, double omega, double space[])
-{
-/* Get PMat=exp(Q*t) for weighting pathways
-*/
- int nterms=100, n=com.ncode, ic1, ic2, i,j,k, aa[2],ndiff,pos=0,from[3],to[3];
- double *Q=P, *U=space+n*n, *V=U+n*n, *Root=V+n*n, mr, spacesqrt[NCODE];
-
- for(i=0; i<n*n; i++) Q[i] = 0;
- for (i=0; i<n; i++) {
- ic1=FROM61[i]; from[0]=ic1/16; from[1]=(ic1/4)%4; from[2]=ic1%4;
- for(j=0; j<i; j++) {
- ic2=FROM61[j]; to[0]=ic2/16; to[1]=(ic2/4)%4; to[2]=ic2%4;
- aa[0] = GeneticCode[com.icode][ic1];
- aa[1] = GeneticCode[com.icode][ic2];
- if (aa[0]==-1 || aa[1]==-1) continue;
- for (k=0,ndiff=0; k<3; k++)
- if(from[k] != to[k]) { ndiff++; pos=k; }
- if (ndiff!=1) continue;
- Q[i*n+j] = 1;
- if ((from[pos]+to[pos]-1)*(from[pos]+to[pos]-5)==0)
- Q[i*n+j] *= kappa;
- if(aa[0] != aa[1]) Q[i*n+j] *= omega;
- Q[j*n+i] = Q[i*n+j];
- }
- }
-
- for(i=0; i<n; i++) for(j=0; j<n; j++)
- Q[i*n+j] *= com.pi[j];
-
- for (i=0,mr=0; i<n; i++) {
- Q[i*n+i] = -sum(Q+i*n,n);
- mr -= com.pi[i]*Q[i*n+i];
- }
-
- eigenQREV(Q, com.pi, n, Root, U, V, spacesqrt);
- for(i=0; i<n; i++) Root[i] /= mr;
- PMatUVRoot(P, t, n, U, V, Root);
- /*
- testTransP(PMat, n);
- fprintf(frub,"\a\nP(%.5f)\n", t);
- for(i=0; i<n; i++,FPN(frub)) for(j=0; j<n; j++)
- fprintf(frub, " %9.5g", PMat[i*n+j]);
- fflush(frub);
- */
- return (0);
-}
-
-
-
-int CountDiffs(char z1[],char z2[], double*Sdts,double*Sdtv,double*Ndts,double*Ndtv,double PMat[])
-{
-/* Count the numbers of synonymous and nonsynonymous differences between
- sequences z1 and z2, weighting pathways with PMat. No weighting if PMat=NULL
- Modified from difcodon()
- dmark[i] (=0,1,2) is the i_th different codon position (i=0,1,ndiff).
- step[j] (=0,1,2) is the codon position to be changed at step j (j=0,1,ndiff).
- b[i][j] (=0,1,2,3) is the nucleotide at position j (0,1,2) in codon i (0,1)
- sts,stv,nts,ntv are syn ts & tv and nonsyn ts & tv at a codon site.
- stspath[k] stvpath[k] ntspath[k] ntvpath[k] are syn ts & tv and
- nonsyn ts & tv differences on path k (k=2,6).
-*/
- char str[4]=" ";
- int n=com.ncode, h,i1,i2,i,k, transi, c[2],ct[2],aa[2], by[3]={16,4,1};
- int dmark[3], step[3], b[2][3], bt1[3], bt2[3];
- int ndiff, npath, nstop, stspath[6],stvpath[6],ntspath[6],ntvpath[6];
- double sts,stv,nts,ntv; /* syn ts & tv, nonsyn ts & tv for 2 codons */
- double ppath[6], sump,p;
-
- *Sdts = *Sdtv = *Ndts = *Ndtv = 0;
- for (h=0; h<com.npatt; h++) {
- c[0] = FROM61[z1[h]];
- c[1] = FROM61[z2[h]];
- if (c[0]==c[1]) continue;
- for(i=0; i<2; i++) {
- b[i][0]=c[i]/16; b[i][1]=(c[i]%16)/4; b[i][2]=c[i]%4;
- aa[i] = GeneticCode[com.icode][c[i]];
- }
- if (aa[0]==-1 || aa[1]==-1)
- error2("stop codon in sequence.");
- ndiff=0; sts=stv=nts=ntv=0;
- for(k=0; k<3; k++) dmark[k] = -1;
- for(k=0; k<3; k++) if(b[0][k] != b[1][k]) dmark[ndiff++] = k;
- npath=1;
- if(ndiff>1) npath = (ndiff==2 ? 2 : 6);
- if (ndiff==1) {
- transi = b[0][dmark[0]]+b[1][dmark[0]];
- transi = (transi==1 || transi==5);
- if (aa[0]==aa[1]) { if (transi) sts++; else stv++; }
- else { if (transi) nts++; else ntv++; }
- }
- else { /* ndiff=2 or 3 */
- if(debug==DIFF) {
- printf("\n\nh=%d %s (%c) .. ", h+1,getcodon(str,c[0]),AAs[aa[0]]);
- printf("%s (%c): ", getcodon(str,c[1]), AAs[aa[1]]);
- }
- nstop=0;
- for(k=0; k<npath; k++) {
- if(debug==DIFF) printf("\npath %d: ", k+1);
-
- for(i1=0; i1<3; i1++) step[i1] = -1;
- if (ndiff==2) {
- step[0] = dmark[k];
- step[1] = dmark[1-k];
- }
- else {
- step[0] = k/2;
- step[1] = k%2;
- if (step[0]<=step[1]) step[1]++;
- step[2] = 3-step[0]-step[1];
- }
- for(i1=0; i1<3; i1++) bt1[i1] = bt2[i1]=b[0][i1];
- stspath[k] = stvpath[k] = ntspath[k] = ntvpath[k] = 0;
- /* mutations along each path */
- for (i1=0,ppath[k]=1; i1<ndiff; i1++) {
- bt2[step[i1]] = b[1][step[i1]];
- for (i2=0,ct[0]=ct[1]=0; i2<3; i2++) {
- ct[0] += bt1[i2]*by[i2];
- ct[1] += bt2[i2]*by[i2];
- }
- ppath[k] *= PMat[ FROM64[ct[0]]*n + FROM64[ct[1]] ];
- for(i2=0; i2<2; i2++) aa[i2] = GeneticCode[com.icode][ct[i2]];
-
- if(debug==DIFF) printf("%s (%c) %.5f: ", getcodon(str,ct[1]),AAs[aa[1]],PMat[ct[0]*n+ct[1]]);
-
- if (aa[1]==-1) {
- nstop++; ppath[k]=0; break;
- }
- transi = b[0][step[i1]]+b[1][step[i1]];
- transi = (transi==1 || transi==5); /* transition? */
-
- if(aa[0]==aa[1]) { if(transi) stspath[k]++; else stvpath[k]++; }
- else { if(transi) ntspath[k]++; else ntvpath[k]++; }
- for(i2=0; i2<3; i2++) bt1[i2] = bt2[i2];
- }
-
- if(debug==DIFF) printf(" p =%.9f", ppath[k]);
-
- } /* for(k,npath) */
- if (npath==nstop) { /* all paths through stop codons */
- puts ("all paths through stop codons..");
- if (ndiff==2) { nts=.5; ntv=1.5; }
- else { nts=.5; ntv=2.5; }
- }
- else {
- sump = sum(ppath,npath);
- if(sump<1e-20) {
- printf("\nsump=0, npath=%4d\nh=%2d ", npath, h+1);
- printf("(%s ", getcodon(str,c[0]));
- printf("%s)", getcodon(str,c[1]));
- for(k=0; k<npath; k++) printf(" %9.6g", ppath[k]); FPN(F0);
- matout(frub, PMat, n, n);
- exit(-1);
-
- /*
- sump=1; FOR(k,npath) if(ppath[k]) ppath[k]=1./(npath-nstop);
- */
- }
- for(k=0; k<npath; k++) {
- p = ppath[k]/sump;
- sts += stspath[k]*p;
- stv += stvpath[k]*p;
- nts += ntspath[k]*p;
- ntv += ntvpath[k]*p;
- }
-
- if(debug==DIFF) {
- for(k=0; k<npath; k++) printf("\n p =%.5f", ppath[k]/sump); FPN(F0);
- printf(" syn ts & tv, nonsyn ts & tv:%9.5f%9.5f%9.5f%9.5f\n",sts,stv,nts,ntv);
- }
- }
-
- if(debug==DIFF) getchar();
-
- } /* if (ndiff) */
- *Sdts += com.fpatt[h]*sts;
- *Sdtv += com.fpatt[h]*stv;
- *Ndts += com.fpatt[h]*nts;
- *Ndtv += com.fpatt[h]*ntv;
- } /* for (h) */
- return (0);
-}
-
-
-int DistanceF84(double n, double P, double Q, double pi[],
- double*k_HKY, double*t, double*SEt)
-{
-/* This calculates kappa and d from P (proportion of transitions) & Q
- (proportion of transversions) & pi under F84.
- When F84 fails, we try to use K80. When K80 fails, we try
- to use JC69. When JC69 fails, we set distance t to maxt.
- Variance formula under F84 is from Tateno et al. (1994), and briefly
- checked against simulated data sets.
-*/
- int failF84=0,failK80=0,failJC69=0;
- double tc,ag, Y,R, a=0,b=0, A=-1,B=-1,C=-1, k_F84;
- double Qsmall=min2(1e-10,0.1/n), maxkappa=999,maxt=99;
-
- *k_HKY=-1;
- Y=pi[0]+pi[1]; R=pi[2]+pi[3]; tc=pi[0]*pi[1]; ag=pi[2]*pi[3];
- if (P+Q>1) { *t=maxt; *k_HKY=1; return(3); }
- if (P<-1e-10 || Q<-1e-10 || fabs(Y+R-1)>1e-8) {
- printf("\nPQYR & pi[]: %9.5f%9.5f%9.5f%9.5f",P,Q,Y,R);
- matout(F0,pi,1,4);
- error2("DistanceF84: input err.");
- }
- if(Q<Qsmall) failF84=failK80=1;
- else if(Y<=0 || R<=0 || (tc<=0 && ag<=0)) failF84=1;
- else {
- A=tc/Y+ag/R; B=tc+ag; C=Y*R;
- a=(2*B+2*(tc*R/Y+ag*Y/R)*(1-Q/(2*C)) - P) / (2*A);
- b=1-Q/(2*C);
- if (a<=0 || b<=0) failF84=1;
- }
- if (!failF84) {
- a=-.5*log(a); b=-.5*log(b);
- if(b<=0) failF84=1;
- else {
- k_F84 = a/b-1;
- *t = 4*b*(tc*(1+ k_F84/Y) + ag*(1+ k_F84/R)+C);
- *k_HKY = (B + (tc/Y+ag/R)* k_F84)/B; /* k_F84=>k_HKY85 */
- if(SEt) {
- a = A*C/(A*C-C*P/2-(A-B)*Q/2);
- b = A*(A-B)/(A*C-C*P/2-(A-B)*Q/2) - (A-B-C)/(C-Q/2);
- *SEt = sqrt((a*a*P+b*b*Q-square(a*P+b*Q))/n);
- }
- }
- }
- if(failF84 && !failK80) { /* try K80 */
- if (noisy>=9) printf("\na=%.5f b=%.5f, use K80\n", a,b);
- a=1-2*P-Q; b=1-2*Q;
- if (a<=0 || b<=0) failK80=1;
- else {
- a=-log(a); b=-log(b);
- if(b<=0) failK80=1;
- else {
- *k_HKY=(.5*a-.25*b)/(.25*b);
- *t = .5*a+.25*b;
- }
- if(SEt) {
- a=1/(1-2*P-Q); b=(a+1/(1-2*Q))/2;
- *SEt = sqrt((a*a*P+b*b*Q-square(a*P+b*Q))/n);
- }
- }
- }
- if(failK80) {
- if((P+=Q)>=.75) { failJC69=1; P=.75*(n-1.)/n; }
- *t = -.75*log(1-P*4/3.);
- if(*t>maxt) *t=maxt;
- if(SEt) {
- *SEt = sqrt(9*P*(1-P)/n) / (3-4*P);
- }
- }
- if(*k_HKY>maxkappa) *k_HKY=maxkappa;
-
- return(failF84 + failK80 + failJC69);
-}
-
-
-
-#if 0
-
-double dsdnREV (int is, int js, double space[])
-{
-/* This calculates ds and dn by recovering the Q*t matrix using the equation
- F(t) = PI * P(t) = PI * exp(Q*t)
- This is found not to work well and is not published.
- space[64*64*5]
- The code here is broken since I changed the coding. Codons are now coded 0, 1, ..., 60.
-*/
- int n=com.ncode, i,j, h;
- double *F=PMat, *Qt=F;
- double *Root=space+n*n,*pi=Root+n, *U=pi+n,*V=U+n*n;
- double *T1=V+n*n,*T2=T1+n*n, t, small=1e-6;
-
- fprintf(frst,"\npi in model\n");
- matout(frst,com.pi,1,n);
- FOR(i,n*n) F[i]=0;
- FOR (h,com.npatt) {
- F[com.z[is][h]*n+com.z[js][h]]+=com.fpatt[h]/(2*com.ls);
- F[com.z[js][h]*n+com.z[is][h]]+=com.fpatt[h]/(2*com.ls);
- }
- if(fabs(1-sum(F,n*n))>1e-6) error2("Sum F != 1 in dsdnREV");
-
- FOR (i,n) {
- pi[i]=sum(F+i*n, n);
-/*
- if (F[i*n+i]<=small || F[i*n+i]<pi[i]/4)
-*/
- if (F[i*n+i]<=small) F[i*n+i]=1-pi[i]+F[i*n+i];
- else abyx(1/pi[i], F+i*n, n);
- }
- if (eigen (1, F, n, Root, T1, U, V, T2)) error2 ("eigen jgl");
- xtoy (U, V, n*n);
- matinv (V, n, n, T1);
-
-fprintf(frst,"\npi in data\n");
-matout (frst, pi, 1, n); FPN(F0);
-matout (frst, Root, 1, n);
-
- FOR (i,n) {
- if (Root[i]<=0)
- printf (" Root %d:%10.4f", i+1, Root[i]);
- Root[i]=log(Root[i]);
- }
- FOR (i,n) FOR (j,n) T1[i*n+j]=U[i*n+j]*Root[j];
- matby (T1, V, Qt, n, n, n);
- for (i=0,t=0; i<n; i++) t-=pi[i]*Qt[i*n+i];
- if (t<=0) puts ("err: dsdnREV");
-
- FOR(i,n*n) Qt[i]+=1e-8; /* remove negative numbers from rounding errors */
-
- matout(frst,Qt,n,n);
-printf("\nt = %.5f\n", t);
-
- return (0);
-}
-
-
-#endif
+/* yn00.c
+ Pairwise estimation of dS and dN by the method of Yang & Nielsen
+ (2000 Mol. Biol. Evol. 17:32-43)
+
+ Copyright, 1998, Ziheng Yang
+
+ cc -o yn00 -fast yn00.c tools.o -lm
+ cl -O2 yn00.c tools.o
+ yn00 <SequenceFileName>
+
+ Codon sequences are encoded as 0,1,...,61, as in codeml.c.
+*/
+#include "paml.h"
+#define NS 1000
+#define LSPNAME 30
+#define NCODE 64
+#define NGENE 2000
+
+int GetOptions (char *ctlf);
+int EncodeSeqCodon(void);
+int Statistics(FILE *fout, double space[]);
+int DistanceMatLWL85 (FILE *fout);
+int DistanceYN00(int is, int js, double*S, double*N, double*dS,double*dN,
+ double *SEdS, double *SEdN, double *t,double space[]);
+int GetKappa (void);
+int GetFreqs(int is1, int is2, double f3x4[], double pi[]);
+int CountSites(char z[],double pi[],double*Stot,double*Ntot,
+ double fbS[],double fbN[]);
+int GetPMatCodon(double P[],double t, double kappa, double omega, double space[]);
+int CountDiffs(char z1[],char z2[],
+ double*Sdts,double*Sdtv,double*Ndts,double*Ndtv,double PMat[]);
+int DistanceF84(double n, double P, double Q, double pi[],
+ double*k_HKY, double*t, double*SEt);
+double dsdnREV (int is, int js, double space[]);
+
+int ExpPattFreq(double t,double kappa,double omega,double pi[],double space[]);
+int ConsistencyMC(void);
+int InfiniteData(double t,double kappa,double omega,double f3x4_0[],
+ double space[]);
+void SimulateData2s64(FILE* fout, double f3x4_0[], double space[]);
+
+struct common_info {
+ unsigned char *z[NS];
+ char *spname[NS], seqf[512],outf[512];
+ int ns,ls,npatt,codonf,icode,ncode,getSE,*pose,verbose, seqtype, readpattern;
+ int cleandata, fcommon,kcommon, weighting, ndata, print;
+ double *fpatt, pi[NCODE], f3x4s[NS][12], kappa, omega;
+ int ngene,posG[NGENE+1],lgene[NGENE],fix_rgene, model;
+ double rgene[NGENE],piG[NGENE][NCODE], alpha;
+} com;
+
+
+int FROM61[64], FROM64[64], FourFold[4][4];
+double PMat[NCODE*NCODE];
+char *codonfreqs[]={"Fequal", "F1x4", "F3x4", "Fcodon"};
+enum {Fequal, F1x4, F3x4, Fcodon} CodonFreqs;
+
+FILE *frst, *frst1, *frub;
+extern char BASEs[], AAs[];
+extern int noisy, GeneticCode[][64];
+int Nsensecodon;
+enum {NODEBUG, KAPPA, SITES, DIFF} DebugFunctions;
+int debug=0;
+
+double omega_NG, dN_NG, dS_NG; /* what are these for? */
+
+
+#define YN00
+#define REALSEQUENCE
+#include "treesub.c"
+
+
+int main(int argc, char *argv[])
+{
+ char dsf[512]="2YN.dS", dnf[512]="2YN.dN", tf[512]="2YN.t";
+ FILE *fout, *fseq, *fds, *fdn, *ft;
+ char ctlf[96]="yn00.ctl", timestr[64];
+ int n=com.ncode, is,js, j, idata, wname=20, sspace;
+ double t=0.4, dS=0.1,dN=0.1, S,N, SEdS, SEdN, f3x4[12], *space=NULL;
+
+ /* ConsistencyMC(); */
+
+ printf("YN00 in %s\n", pamlVerStr);
+ starttimer();
+ if (argc>1) strcpy(ctlf, argv[1]);
+ com.seqtype=1; com.cleandata=1; /* works for clean data only? */
+ com.ndata=1; com.print=0;
+ noisy=1; com.icode=0; com.fcommon=0; com.kcommon=1;
+ GetOptions(ctlf);
+ setmark_61_64 ();
+ fout = fopen (com.outf, "w");
+ frst = fopen("rst", "w");
+ frst1 = fopen("rst1", "w");
+ frub = fopen ("rub", "w");
+ if (fout==NULL || frst==NULL) error2("outfile creation err.");
+ fds = (FILE*)fopen(dsf, "w");
+ fdn = (FILE*)fopen(dnf, "w");
+ ft = (FILE*)fopen(tf, "w");
+ if(fds==NULL || fdn==NULL || ft==NULL) error2("file open error");
+
+ if((fseq=fopen (com.seqf,"r"))==NULL) {
+ printf ("\n\nSequence file %s not found!\n", com.seqf);
+ exit(-1);
+ }
+ for (idata=0; idata<com.ndata; idata++) {
+ if (com.ndata>1) {
+ printf("\nData set %d\n", idata+1);
+ fprintf(fout, "\n\nData set %d\n", idata+1);
+ fprintf(frst, "\t%d", idata+1);
+ }
+
+ ReadSeq((com.verbose?fout:NULL), fseq, com.cleandata, 0);
+ SetMapAmbiguity();
+
+ sspace = max2(200000,64*com.ns*sizeof(double));
+ sspace = max2(sspace,64*64*5*sizeof(double));
+ if ((space=(double*)realloc(space,sspace))==NULL) error2("oom space");
+
+ com.kappa = 4.6;
+ com.omega = 1;
+ fprintf(fout,"YN00 %15s", com.seqf);
+ Statistics(fout, space);
+
+ if(noisy) printf("\n\n(A) Nei-Gojobori (1986) method\n");
+ fprintf(fout,"\n\n(A) Nei-Gojobori (1986) method\n");
+ DistanceMatNG86 (fout, NULL, NULL, NULL, 0);
+ fflush(fout);
+
+ if(noisy) printf("\n\n(B) Yang & Nielsen (2000) method\n\n");
+ fprintf(fout,"\n\n(B) Yang & Nielsen (2000) method\n\n");
+ fprintf(fout,"Yang Z, Nielsen R (2000) Estimating synonymous and nonsynonymous substitution rates under realistic evolutionary models. Mol. Biol. Evol. 17:32-43\n");
+ if(!com.weighting) fputs("\n(equal weighting of pathways)\n",fout);
+
+ if(com.fcommon) GetFreqs(-1, -1, f3x4, com.pi);
+ if(com.kcommon) {
+ GetKappa();
+ printf("kappa = %.2f\n\n",com.kappa);
+ /* puts("kappa?"); scanf("%lf", &com.kappa); */
+ }
+
+ fputs("\nseq. seq. S N t kappa omega dN +- SE dS +- SE\n\n",fout);
+ fprintf(fds,"%6d\n", com.ns);
+ fprintf(fdn,"%6d\n", com.ns);
+ fprintf(ft,"%6d\n", com.ns);
+ for(is=0; is<com.ns; is++) {
+ fprintf(fds,"%-*s ", wname,com.spname[is]);
+ fprintf(fdn,"%-*s ", wname,com.spname[is]);
+ fprintf(ft,"%-*s ", wname,com.spname[is]);
+ for(js=0; js<is; js++) {
+ if(noisy) printf("%3d vs. %3d\n", is+1, js+1);
+ fprintf(fout, " %3d %3d ", is+1, js+1);
+
+ if(!com.fcommon) GetFreqs(is, js, f3x4, com.pi);
+ if(!com.kcommon) GetKappa();
+ j = DistanceYN00(is, js, &S, &N, &dS,&dN, &SEdS, &SEdN, &t,space);
+
+ fprintf(fout,"%7.1f %7.1f %8.4f %7.4f %7.4f %6.4f +- %6.4f %7.4f +- %6.4f\n",
+ S,N,t,com.kappa,com.omega,dN,SEdN,dS,SEdS);
+ fprintf(frst," YN: %8.4f%8.4f%8.4f %6.4f +- %6.4f %7.4f +- %6.4f\n",
+ t,com.kappa,com.omega,dN,SEdN,dS,SEdS);
+
+ fprintf(fds," %7.4f",dS); fprintf(fdn," %7.4f",dN); fprintf(ft," %7.4f",t);
+ } /* for (js) */
+ FPN(fds); FPN(fdn); FPN(ft);
+ fflush(fds); fflush(fdn); fflush(ft);
+ } /* for (is) */
+ FPN(fds); FPN(fdn); FPN(ft);
+
+ if(noisy) printf("\n\n(C) LWL85, LPB93 & LWLm methods\n\n");
+ fprintf(fout,"\n\n(C) LWL85, LPB93 & LWLm methods\n\n");
+ fprintf(fout,"Li W.-H., C.-I. Wu, Luo (1985) A new method for estimating synonymous and nonsynonymous rates of nucleotide substitutions considering the relative likelihood of nucleotide and codon changes. Mol. Biol. Evol. 2: 150-174.\n");
+ fprintf(fout,"Li W-H (1993) Unbiased estimation of the rates of synonymous and nonsynonymous substitution. J. Mol. Evol. 36:96-99\n");
+ fprintf(fout,"Pamilo P, Bianchi NO (1993) Evolution of the Zfx and Zfy genes - rates and interdependence between the genes. Mol. Biol. Evol. 10:271-281\n");
+ fprintf(fout,"Yang Z (2006) Computational Molecular Evolution. Oxford University Press, Oxford. Eqs. 2.12 & 2.13\n");
+
+ DistanceMatLWL85(fout);
+
+ fflush(frst);
+ if(noisy) printf("\nTime used: %s\n", printtime(timestr));
+ }
+ return (0);
+}
+
+
+
+int GetOptions (char *ctlf)
+{
+ int i, nopt=9, lline=4096;
+ char line[4096], *pline, opt[20], comment='*';
+ char *optstr[]={"seqfile","outfile", "verbose", "noisy", "icode",
+ "weighting","commonkappa", "commonf3x4", "ndata"};
+ double t;
+ FILE *fctl;
+
+ if((fctl=fopen(ctlf,"r"))==NULL) error2("\nctl file open error.\n");
+ printf ("\nReading options from %s..\n", ctlf);
+ for (;;) {
+ if (fgets (line, lline, fctl) == NULL) break;
+ for (i=0,t=0,pline=line; i<lline&&line[i]; i++)
+ if (isalnum(line[i])) { t=1; break; }
+ else if (line[i]==comment) break;
+ if (t==0) continue;
+ sscanf (line, "%s%*s%lf", opt, &t);
+ if ((pline=strstr(line, "="))==NULL) error2("option file.");
+
+ for (i=0; i<nopt; i++) {
+ if (strncmp(opt, optstr[i], 8)==0) {
+ if (noisy>2)
+ printf ("\n%3d %15s | %-20s %6.2f", i+1,optstr[i],opt,t);
+ switch (i) {
+ case (0): sscanf(pline+2, "%s", com.seqf); break;
+ case (1): sscanf(pline+2, "%s", com.outf); break;
+ case (2): com.verbose=(int)t; break;
+ case (3): noisy=(int)t; break;
+ case (4): com.icode=(int)t; break;
+ case (5): com.weighting=(int)t; break;
+ case (6): com.kcommon=(int)t; break;
+ case (7): com.fcommon=(int)t; break;
+ case (8): com.ndata=(int)t; break;
+ }
+ break;
+ }
+ }
+ if (i==nopt)
+ { printf ("\noption %s in %s\n", opt, ctlf); exit (-1); }
+ }
+
+ for (i=0,Nsensecodon=0; i<64; i++)
+ if (GeneticCode[com.icode][i]!=-1) Nsensecodon++;
+ com.ncode = Nsensecodon;
+ fclose (fctl);
+ FPN(F0);
+ return (0);
+}
+
+int DistanceYN00(int is, int js, double*S, double*N, double*dS,double*dN,
+ double *SEdS, double *SEdN, double *t,double space[])
+{
+/* calculates dS, dN, w, t by weighting.
+ com.kappa & com.pi[] are calculated beforehand are not updated.
+*/
+ int j,k,ir,nround=10, status=0;
+ double fbS[4],fbN[4],fbSt[4],fbNt[4], St,Nt, Sdts,Sdtv,Ndts,Ndtv, kappaS,kappaN;
+ double w0=0,dS0=0,dN0=0, accu=5e-4, minomega=1e-5,maxomega=99;
+
+ if(*t==0) *t=.5;
+ if(com.omega<=0) com.omega=1;
+ for(k=0; k<4; k++) fbS[k] = fbN[k] = 0;
+ if(debug) printf("\nCountSites\n");
+ if(noisy>3) printf("\n");
+ for(k=0,*S=*N=0; k<2; k++) {
+ CountSites(com.z[k==0?is:js], com.pi, &St, &Nt, fbSt, fbNt);
+ *S += St/2;
+ *N += Nt/2;
+ for(j=0; j<4; j++) {
+ fbS[j] += fbSt[j]/2;
+ fbN[j] += fbNt[j]/2;
+ }
+ if(noisy>3) printf("Seq. %d: S = %9.3f N=%9.3f\n",k+1,St,Nt);
+ }
+ if(noisy>3) {
+ printf("Ave. : S = %9.3f N=%9.3f\n\n",*S,*N);
+ printf("Base freqs at syn & nonsyn sites\n%10s%10s%10s%10s\n", "T", "C", "A", "G");
+ for(k=0; k<4; k++) printf(" %9.6f", fbS[k]); FPN(F0);
+ for(k=0; k<4; k++) printf(" %9.6f", fbN[k]); FPN(F0);
+ }
+ if(noisy>3)
+ printf(" # Sdts Sdtv Ndts Ndtv | t kappa w dN dS | kappaS kappaN\n");
+
+ /* initial values? */
+ if(com.weighting) {
+ if(*t<0.001 || *t>5) *t=0.5;
+ if(com.omega<0.01 || com.omega>5) com.omega=.5;
+ }
+ for (ir=0; ir<(com.weighting?nround:1); ir++) { /* weighting or iteration */
+ if(com.weighting)
+ GetPMatCodon(PMat,*t,com.kappa,com.omega,space);
+ else
+ for(j=0; j<com.ncode*com.ncode; j++)
+ PMat[j] = 1;
+
+ CountDiffs(com.z[is], com.z[js], &Sdts, &Sdtv, &Ndts, &Ndtv, PMat);
+
+ if(DistanceF84(*S, Sdts/ *S, Sdtv/ *S, fbS, &kappaS, dS, SEdS)) status=-1;
+ if(DistanceF84(*N, Ndts/ *N, Ndtv/ *N, fbN, &kappaN, dN, SEdN)) status=-1;
+
+ if(*dS<1e-9) {
+ status = -1;
+ com.omega = maxomega;
+ }
+ else
+ com.omega= max2(minomega, *dN/ *dS);
+ *t = *dS * 3 * *S/(*S + *N) + *dN * 3 * *N/(*S + *N);
+ if(noisy>3) {
+ printf("%2d %7.2f%7.2f%7.2f%7.2f |", ir+1, Sdts,Sdtv,Ndts,Ndtv);
+ printf("%8.4f%8.4f%8.4f%8.4f%8.4f", *t, com.kappa,com.omega,*dN,*dS);
+ printf(" | %8.4f%8.4f\n", kappaS,kappaN);
+ }
+ if(fabs(*dS-dS0)<accu && fabs(*dN-dN0)<accu && fabs(com.omega-w0)<accu)
+ break;
+ dS0=*dS; dN0=*dN; w0=com.omega;
+ } /* for (ir) */
+ if(ir==nround) status=-2;
+ /* if(status) printf("\n\tstatus: %d\n", status); */
+ return(status);
+}
+
+
+
+int Statistics(FILE *fout, double space[])
+{
+/* This calculates base frequencies, using npatt & fpatt[]
+*/
+ int h, is,j, c[3], wname=20;
+ double f3x4tot[12], *fb3tot=com.pi, *fb3s=space;
+
+ if(fout) {
+ fprintf(fout, "\n\nns =%4d\tls =%4d", com.ns, com.ls);
+ fprintf(fout,"\n\nCodon position x base (3x4) table for each sequence.");
+ }
+ zero(f3x4tot,12); zero(fb3s,64*com.ns);
+ for(is=0; is<com.ns; is++) zero(com.f3x4s[is], 12);
+ for (is=0; is<com.ns; is++) {
+ for (h=0; h<com.npatt; h++) {
+ j = FROM61[com.z[is][h]];
+ c[0]=j/16; c[1]=(j%16)/4; c[2]=j%4;
+ fb3s[is*64+j] += com.fpatt[h];
+ for(j=0; j<3; j++)
+ com.f3x4s[is][j*4+c[j]] += com.fpatt[h]/com.ls;
+ }
+ for(j=0; j<12; j++) f3x4tot[j] += com.f3x4s[is][j]/com.ns;
+ if(fout) {
+ fprintf(fout,"\n\n%-*s", wname, com.spname[is]);
+ for(j=0; j<3; j++) {
+ fprintf (fout, "\nposition %2d:", j+1);
+ for(h=0; h<4; h++)
+ fprintf (fout,"%5c:%7.5f", BASEs[h], com.f3x4s[is][j*4+h]);
+ }
+ }
+ }
+ if(fout) {
+ fprintf (fout, "\n\nAverage");
+ for(j=0; j<3; j++) {
+ fprintf (fout, "\nposition %2d:", j+1);
+ for(h=0; h<4; h++)
+ fprintf (fout,"%5c:%7.5f", BASEs[h], f3x4tot[j*4+h]);
+ }
+ for(is=0,zero(fb3tot,64);is<com.ns;is++)
+ for(j=0; j<64; j++) fb3tot[j] += fb3s[is*64+j];
+ fprintf (fout, "\n\nCodon usage for each species\n");
+ printcums (fout, com.ns, fb3s, com.icode);
+ fprintf (fout, "\nSums\n");
+ printcums (fout, 1, fb3tot, com.icode);
+ }
+
+ return(0);
+}
+
+int GetFreqs(int is1, int is2, double f3x4[], double pi[])
+{
+/* uses com.fcommon and com.f3x4s to calculate f3x4[] and pi[].
+ Codon frequencies pi[] are calculated from the f3x4 table.
+ The calculation is duplicated when com.fcommon=1.
+*/
+ int n=com.ncode, j, k, ic, b[3];
+
+ if (com.fcommon)
+ for(j=0,zero(f3x4,12);j<com.ns;j++)
+ for(k=0; k<12; k++) f3x4[k]+=com.f3x4s[j][k]/com.ns;
+ else
+ for(k=0; k<12; k++)
+ f3x4[k] = (com.f3x4s[is1][k]+com.f3x4s[is2][k])/2;
+
+ if (noisy>=9)
+ matout(F0, f3x4, 3, 4);
+ for(j=0; j<n; j++) {
+ ic=FROM61[j]; b[0]=ic/16; b[1]=(ic%16)/4; b[2]=ic%4;
+ pi[j] = f3x4[b[0]] * f3x4[4+b[1]] * f3x4[8+b[2]];
+ }
+ abyx(1/sum(pi,n), pi, n);
+
+ return (0);
+}
+
+
+int DistanceMatLWL85 (FILE *fout)
+{
+/* This implements 3 methods: LWL85 (Li, Wu & Luo 1985), LPB (Li 1993,
+ Pamilo & Bianchi 1993), and LWL85m (equation 12 in book; check other refs).
+ alpha is not used.
+*/
+ int i,j,k, h, wname=15;
+ char *codon1, *codon2, str[4]=" ";
+ double L[3], sdiff[3], vdiff[3], Lt[3], sdifft[3], vdifft[3], A[3],B[3];
+ double P[3],Q[3], a,b, dS,dN, pS2, S,N, Sd,Nd;
+
+ for(i=0; i<com.ns; i++) {
+ for(j=0; j<i; j++) { /* pair i and j */
+ for(k=0; k<3; k++) L[k] = sdiff[k] = vdiff[k] = 0;
+
+ for (h=0; h<com.npatt; h++) {
+ codon1 = CODONs[com.z[i][h]];
+ codon2 = CODONs[com.z[j][h]];
+ difcodonLWL85(codon1, codon2, Lt, sdifft, vdifft, 0, com.icode);
+ for(k=0; k<3; k++) {
+ L[k] += Lt[k]*com.fpatt[h];
+ sdiff[k] += sdifft[k]*com.fpatt[h];
+ vdiff[k] += vdifft[k]*com.fpatt[h];
+ }
+ }
+
+ for(k=0; k<3; k++) {
+ P[k] = sdiff[k]/L[k];
+ Q[k] = vdiff[k]/L[k];
+ a = 1 - 2*P[k] - Q[k];
+ b = 1 - 2*Q[k];
+ A[k] = -log(a)/2 + log(b)/4;
+ B[k] = -log(b)/2;
+ }
+ if(fout) {
+ fprintf(fout, "\n%d (%s) vs. %d (%s)\n\n", i+1, com.spname[i], j+1, com.spname[j]);
+ fprintf(fout,"L(i): %9.1f %9.1f %9.1f sum=%9.1f\n", L[0],L[1],L[2],L[0]+L[1]+L[2]);
+ fprintf(fout,"Ns(i): %9.4f %9.4f %9.4f sum=%9.4f\n", sdiff[0],sdiff[1],sdiff[2], sdiff[0]+sdiff[1]+sdiff[2]);
+ fprintf(fout,"Nv(i): %9.4f %9.4f %9.4f sum=%9.4f\n", vdiff[0],vdiff[1],vdiff[2], vdiff[0]+vdiff[1]+vdiff[2]);
+ fprintf(fout,"A(i): %9.4f %9.4f %9.4f\n", A[0],A[1],A[2]);
+ fprintf(fout,"B(i): %9.4f %9.4f %9.4f\n", B[0],B[1],B[2]);
+
+ Sd = L[1]*A[1] + L[2]*(A[2]+B[2]);
+ Nd = L[1]*B[1] + L[0]*(A[0]+B[0]);
+ pS2 = 1/3.;
+ S = L[1]*pS2 + L[2];
+ N = L[1]*(1-pS2) + L[0];
+ dS = Sd/S;
+ dN = Nd/N;
+ fprintf(fout,"LWL85: dS = %7.4f dN = %7.4f w =%7.4f S =%7.1f N =%7.1f\n", dS,dN, dN/dS, S, N);
+ pS2 = A[2]/(A[2]+B[2]);
+ S = L[1]*pS2 + L[2];
+ N = L[1]*(1-pS2) + L[0];
+ dS = Sd/S;
+ dN = Nd/N;
+ fprintf(fout,"LWL85m: dS = %7.4f dN = %7.4f w =%7.4f S =%7.1f N =%7.1f (rho = %.3f)\n", dS,dN, dN/dS, S, N, pS2);
+
+ dS = (L[1]*A[1]+L[2]*A[2])/(L[1]+L[2]) + B[2];
+ dN = (L[0]*B[0]+L[1]*B[1])/(L[0]+L[1]) + A[0];
+ fprintf(fout,"LPB93: dS = %7.4f dN = %7.4f w =%7.4f\n", dS, dN, dN/dS);
+ }
+ }
+ if(noisy) printf(" %3d",i+1);
+ }
+ if(noisy) FPN(F0);
+ if(fout) FPN(fout);
+ return (0);
+}
+
+
+
+int GetKappa(void)
+{
+/* This calculates mutational transition/transversion rate ratio kappa
+ using 4-fold degenerate sites from pairwise comparisons
+ under HKY85, weighting estimates by the numbers of sites
+*/
+ int is,js,j,k,h, i1,pos,c[2],aa[2],b[2][3],a,ndeg,by[3]={16,4,1}, status=0;
+ double ka[2], F[2][16],S[2],wk[2], t,P,Q,pi[4];
+ /* F&S&wk [0]: non-degenerate; [1]:4-fold; S:sites */
+ double kdefault=(com.kappa>0?com.kappa:(com.icode==1?10:2));
+ char str1[4]=" ",str2[4]=" ", *sitestr[2]={"non-degenerate","4-fold"};
+
+ for(is=0,com.kappa=0;is<com.ns;is++) {
+ for(js=0; js<is; js++) {
+ if(noisy>=9) printf ("\n%4d vs. %3d", is+1, js+1);
+ for(k=0; k<2; k++) zero(F[k],16);
+ for(h=0; h<com.npatt; h++) {
+ c[0] = FROM61[com.z[is][h]];
+ c[1] = FROM61[com.z[js][h]];
+ for(k=0; k<2; k++) {
+ b[k][0] = c[k]/16;
+ b[k][1] = (c[k]%16)/4;
+ b[k][2] = c[k]%4;
+ aa[k] = GeneticCode[com.icode][c[k]];
+ }
+
+ /* find non-degenerate sites */
+ for(pos=0; pos<3; pos++) { /* check all positions */
+ for(k=0,ndeg=0;k<2;k++) { /* two codons */
+ for(i1=0; i1<4; i1++) {
+ if(i1==b[k][pos]) continue;
+ a = GeneticCode[com.icode][c[k]+(i1-b[k][pos])*by[pos]];
+ if(a==aa[k]) break;
+ }
+ if(i1==4) ndeg++;
+ }
+ if(ndeg==2) {
+ F[0][b[0][pos]*4+b[1][pos]] += .5*com.fpatt[h];
+ F[0][b[1][pos]*4+b[0][pos]] += .5*com.fpatt[h];
+ }
+
+ }
+ /* find 4-fold degenerate sites at 3rd positions */
+ for(k=0,ndeg=0;k<2;k++) { /* two codons */
+ for(j=0,i1=c[k]-b[k][2]; j<4; j++)
+ if(j!=b[k][2] && GeneticCode[com.icode][i1+j]!=aa[k]) break;
+ if(aa[0]==aa[1] && j==4) ndeg++;
+ }
+ if (ndeg<2) continue;
+ F[1][b[0][2]*4+b[1][2]] += .5*com.fpatt[h];
+ F[1][b[1][2]*4+b[0][2]] += .5*com.fpatt[h];
+ } /* for (h) */
+ for(k=0; k<2; k++) { /* two kinds of sites */
+ /*
+ if(noisy>3) printf("\n%s:\n",sitestr[k]);
+ */
+ S[k] = sum(F[k],16);
+ if(S[k]<=0) { wk[k]=0; continue; }
+ for(j=0; j<16; j++) F[k][j]/=S[k];
+ P = (F[k][0*4+1]+F[k][2*4+3])*2;
+ Q = 1-(F[k][0*4+0]+F[k][1*4+1]+F[k][2*4+2]+F[k][3*4+3]) - P;
+ for(j=0; j<4; j++)
+ pi[j] = sum(F[k]+j*4,4);
+ DistanceF84(S[k], P,Q,pi, &ka[k], &t, NULL);
+ wk[k] = (ka[k]>0?S[k]:0);
+
+ /* matout(F0,F[k],4,4); matout(F0,pi,1,4); */
+ /*
+ if(noisy>3)
+ printf("\nSPQkt:%9.4f%9.5f%9.5f%9.4f%9.4f\n",S[k],P,Q,ka[k],t);
+ */
+ }
+ if(wk[0]+wk[1]==0) {
+ status = -1;
+ ka[0] = kdefault;
+ if(noisy>3) printf("\ngot no kappa! fix it at %.4f\n",ka[0]);
+ }
+ else
+ ka[0] = (ka[0]*wk[0]+ka[1]*wk[1])/(wk[0]+wk[1]);
+ com.kappa += ka[0]/(com.ns*(com.ns-1.)/2);
+ } /* for(js) */
+ } /* for(is) */
+
+ return (status);
+}
+
+
+int CountSites(char z[],double pi[],double*Stot,double*Ntot,double fbS[],double fbN[])
+{
+/* This calculates the total numbers of synonymous and nonsynonymous sites
+ (Stot & Ntot) in the sequence z[] using com.kappa and pi[].
+ It also count the base frequencies at the synonymous and nonsynonymous
+ sites. Total number of sites is scaled to be equal to sequence length
+ even if some changes are to stop codons. Since pi[] is scaled to sum
+ to one, rates to stop codons are not considered.
+ The counting goes through the sequence codon by codon, and so is different
+ from the counting in codeml, which uses pi[] to count the sites.
+*/
+ int h, j,k, c[2],aa[2], b[3], by[3]={16,4,1};
+ double r, S,N, kappa=com.kappa;
+
+ *Stot = *Ntot = 0;
+ for(k=0; k<4; k++)
+ fbS[k] = fbN[k] = 0;
+ for (h=0; h<com.npatt; h++) {
+ c[0] = FROM61[z[h]];
+ b[0] = c[0]/16; b[1]=(c[0]%16)/4; b[2]=c[0]%4;
+ aa[0] = GeneticCode[com.icode][c[0]];
+ if (aa[0]==-1)
+ error2("stop codon");
+ for (j=0,S=N=0; j<3; j++) {
+ for(k=0; k<4; k++) { /* b[j] changes to k */
+ if (k==b[j]) continue;
+ c[1] = c[0]+(k-b[j])*by[j];
+ aa[1] = GeneticCode[com.icode][c[1]];
+ if(aa[1] == -1) continue;
+ r = pi[FROM64[c[1]]];
+ if (k+b[j]==1 || k+b[j]==5) r *= kappa; /* transition */
+ if (aa[0]==aa[1]) { S += r; fbS[b[j]] += r*com.fpatt[h]; }
+ else { N += r; fbN[b[j]] += r*com.fpatt[h]; }
+ }
+ }
+ *Stot += com.fpatt[h]*S;
+ *Ntot += com.fpatt[h]*N;
+ }
+ r = 3*com.ls/(*Stot+*Ntot); *Stot*=r; *Ntot*=r;
+ r = sum(fbS,4); for(k=0; k<4; k++) fbS[k] /= r;
+ r = sum(fbN,4); for(k=0; k<4; k++) fbN[k] /= r;
+ return (0);
+}
+
+
+int GetPMatCodon(double P[],double t, double kappa, double omega, double space[])
+{
+/* Get PMat=exp(Q*t) for weighting pathways
+*/
+ int nterms=100, n=com.ncode, ic1, ic2, i,j,k, aa[2],ndiff,pos=0,from[3],to[3];
+ double *Q=P, *U=space+n*n, *V=U+n*n, *Root=V+n*n, mr, spacesqrt[NCODE];
+
+ for(i=0; i<n*n; i++) Q[i] = 0;
+ for (i=0; i<n; i++) {
+ ic1=FROM61[i]; from[0]=ic1/16; from[1]=(ic1/4)%4; from[2]=ic1%4;
+ for(j=0; j<i; j++) {
+ ic2=FROM61[j]; to[0]=ic2/16; to[1]=(ic2/4)%4; to[2]=ic2%4;
+ aa[0] = GeneticCode[com.icode][ic1];
+ aa[1] = GeneticCode[com.icode][ic2];
+ if (aa[0]==-1 || aa[1]==-1) continue;
+ for (k=0,ndiff=0; k<3; k++)
+ if(from[k] != to[k]) { ndiff++; pos=k; }
+ if (ndiff!=1) continue;
+ Q[i*n+j] = 1;
+ if ((from[pos]+to[pos]-1)*(from[pos]+to[pos]-5)==0)
+ Q[i*n+j] *= kappa;
+ if(aa[0] != aa[1]) Q[i*n+j] *= omega;
+ Q[j*n+i] = Q[i*n+j];
+ }
+ }
+
+ for(i=0; i<n; i++) for(j=0; j<n; j++)
+ Q[i*n+j] *= com.pi[j];
+
+ for (i=0,mr=0; i<n; i++) {
+ Q[i*n+i] = -sum(Q+i*n,n);
+ mr -= com.pi[i]*Q[i*n+i];
+ }
+
+ eigenQREV(Q, com.pi, n, Root, U, V, spacesqrt);
+ for(i=0; i<n; i++) Root[i] /= mr;
+ PMatUVRoot(P, t, n, U, V, Root);
+ /*
+ testTransP(PMat, n);
+ fprintf(frub,"\a\nP(%.5f)\n", t);
+ for(i=0; i<n; i++,FPN(frub)) for(j=0; j<n; j++)
+ fprintf(frub, " %9.5g", PMat[i*n+j]);
+ fflush(frub);
+ */
+ return (0);
+}
+
+
+
+int CountDiffs(char z1[],char z2[], double*Sdts,double*Sdtv,double*Ndts,double*Ndtv,double PMat[])
+{
+/* Count the numbers of synonymous and nonsynonymous differences between
+ sequences z1 and z2, weighting pathways with PMat. No weighting if PMat=NULL
+ Modified from difcodon()
+ dmark[i] (=0,1,2) is the i_th different codon position (i=0,1,ndiff).
+ step[j] (=0,1,2) is the codon position to be changed at step j (j=0,1,ndiff).
+ b[i][j] (=0,1,2,3) is the nucleotide at position j (0,1,2) in codon i (0,1)
+ sts,stv,nts,ntv are syn ts & tv and nonsyn ts & tv at a codon site.
+ stspath[k] stvpath[k] ntspath[k] ntvpath[k] are syn ts & tv and
+ nonsyn ts & tv differences on path k (k=2,6).
+*/
+ char str[4]=" ";
+ int n=com.ncode, h,i1,i2,i,k, transi, c[2],ct[2],aa[2], by[3]={16,4,1};
+ int dmark[3], step[3], b[2][3], bt1[3], bt2[3];
+ int ndiff, npath, nstop, stspath[6],stvpath[6],ntspath[6],ntvpath[6];
+ double sts,stv,nts,ntv; /* syn ts & tv, nonsyn ts & tv for 2 codons */
+ double ppath[6], sump,p;
+
+ *Sdts = *Sdtv = *Ndts = *Ndtv = 0;
+ for (h=0; h<com.npatt; h++) {
+ c[0] = FROM61[z1[h]];
+ c[1] = FROM61[z2[h]];
+ if (c[0]==c[1]) continue;
+ for(i=0; i<2; i++) {
+ b[i][0]=c[i]/16; b[i][1]=(c[i]%16)/4; b[i][2]=c[i]%4;
+ aa[i] = GeneticCode[com.icode][c[i]];
+ }
+ if (aa[0]==-1 || aa[1]==-1)
+ error2("stop codon in sequence.");
+ ndiff=0; sts=stv=nts=ntv=0;
+ for(k=0; k<3; k++) dmark[k] = -1;
+ for(k=0; k<3; k++) if(b[0][k] != b[1][k]) dmark[ndiff++] = k;
+ npath=1;
+ if(ndiff>1) npath = (ndiff==2 ? 2 : 6);
+ if (ndiff==1) {
+ transi = b[0][dmark[0]]+b[1][dmark[0]];
+ transi = (transi==1 || transi==5);
+ if (aa[0]==aa[1]) { if (transi) sts++; else stv++; }
+ else { if (transi) nts++; else ntv++; }
+ }
+ else { /* ndiff=2 or 3 */
+ if(debug==DIFF) {
+ printf("\n\nh=%d %s (%c) .. ", h+1,getcodon(str,c[0]),AAs[aa[0]]);
+ printf("%s (%c): ", getcodon(str,c[1]), AAs[aa[1]]);
+ }
+ nstop=0;
+ for(k=0; k<npath; k++) {
+ if(debug==DIFF) printf("\npath %d: ", k+1);
+
+ for(i1=0; i1<3; i1++) step[i1] = -1;
+ if (ndiff==2) {
+ step[0] = dmark[k];
+ step[1] = dmark[1-k];
+ }
+ else {
+ step[0] = k/2;
+ step[1] = k%2;
+ if (step[0]<=step[1]) step[1]++;
+ step[2] = 3-step[0]-step[1];
+ }
+ for(i1=0; i1<3; i1++) bt1[i1] = bt2[i1]=b[0][i1];
+ stspath[k] = stvpath[k] = ntspath[k] = ntvpath[k] = 0;
+ /* mutations along each path */
+ for (i1=0,ppath[k]=1; i1<ndiff; i1++) {
+ bt2[step[i1]] = b[1][step[i1]];
+ for (i2=0,ct[0]=ct[1]=0; i2<3; i2++) {
+ ct[0] += bt1[i2]*by[i2];
+ ct[1] += bt2[i2]*by[i2];
+ }
+ ppath[k] *= PMat[ FROM64[ct[0]]*n + FROM64[ct[1]] ];
+ for(i2=0; i2<2; i2++) aa[i2] = GeneticCode[com.icode][ct[i2]];
+
+ if(debug==DIFF) printf("%s (%c) %.5f: ", getcodon(str,ct[1]),AAs[aa[1]],PMat[ct[0]*n+ct[1]]);
+
+ if (aa[1]==-1) {
+ nstop++; ppath[k]=0; break;
+ }
+ transi = b[0][step[i1]]+b[1][step[i1]];
+ transi = (transi==1 || transi==5); /* transition? */
+
+ if(aa[0]==aa[1]) { if(transi) stspath[k]++; else stvpath[k]++; }
+ else { if(transi) ntspath[k]++; else ntvpath[k]++; }
+ for(i2=0; i2<3; i2++) bt1[i2] = bt2[i2];
+ }
+
+ if(debug==DIFF) printf(" p =%.9f", ppath[k]);
+
+ } /* for(k,npath) */
+ if (npath==nstop) { /* all paths through stop codons */
+ puts ("all paths through stop codons..");
+ if (ndiff==2) { nts=.5; ntv=1.5; }
+ else { nts=.5; ntv=2.5; }
+ }
+ else {
+ sump = sum(ppath,npath);
+ if(sump<1e-20) {
+ printf("\nsump=0, npath=%4d\nh=%2d ", npath, h+1);
+ printf("(%s ", getcodon(str,c[0]));
+ printf("%s)", getcodon(str,c[1]));
+ for(k=0; k<npath; k++) printf(" %9.6g", ppath[k]); FPN(F0);
+ matout(frub, PMat, n, n);
+ exit(-1);
+
+ /*
+ sump=1; FOR(k,npath) if(ppath[k]) ppath[k]=1./(npath-nstop);
+ */
+ }
+ for(k=0; k<npath; k++) {
+ p = ppath[k]/sump;
+ sts += stspath[k]*p;
+ stv += stvpath[k]*p;
+ nts += ntspath[k]*p;
+ ntv += ntvpath[k]*p;
+ }
+
+ if(debug==DIFF) {
+ for(k=0; k<npath; k++) printf("\n p =%.5f", ppath[k]/sump); FPN(F0);
+ printf(" syn ts & tv, nonsyn ts & tv:%9.5f%9.5f%9.5f%9.5f\n",sts,stv,nts,ntv);
+ }
+ }
+
+ if(debug==DIFF) getchar();
+
+ } /* if (ndiff) */
+ *Sdts += com.fpatt[h]*sts;
+ *Sdtv += com.fpatt[h]*stv;
+ *Ndts += com.fpatt[h]*nts;
+ *Ndtv += com.fpatt[h]*ntv;
+ } /* for (h) */
+ return (0);
+}
+
+
+int DistanceF84(double n, double P, double Q, double pi[],
+ double*k_HKY, double*t, double*SEt)
+{
+/* This calculates kappa and d from P (proportion of transitions) & Q
+ (proportion of transversions) & pi under F84.
+ When F84 fails, we try to use K80. When K80 fails, we try
+ to use JC69. When JC69 fails, we set distance t to maxt.
+ Variance formula under F84 is from Tateno et al. (1994), and briefly
+ checked against simulated data sets.
+*/
+ int failF84=0,failK80=0,failJC69=0;
+ double tc,ag, Y,R, a=0,b=0, A=-1,B=-1,C=-1, k_F84;
+ double Qsmall=min2(1e-10,0.1/n), maxkappa=999,maxt=99;
+
+ *k_HKY=-1;
+ Y=pi[0]+pi[1]; R=pi[2]+pi[3]; tc=pi[0]*pi[1]; ag=pi[2]*pi[3];
+ if (P+Q>1) { *t=maxt; *k_HKY=1; return(3); }
+ if (P<-1e-10 || Q<-1e-10 || fabs(Y+R-1)>1e-8) {
+ printf("\nPQYR & pi[]: %9.5f%9.5f%9.5f%9.5f",P,Q,Y,R);
+ matout(F0,pi,1,4);
+ error2("DistanceF84: input err.");
+ }
+ if(Q<Qsmall) failF84=failK80=1;
+ else if(Y<=0 || R<=0 || (tc<=0 && ag<=0)) failF84=1;
+ else {
+ A=tc/Y+ag/R; B=tc+ag; C=Y*R;
+ a=(2*B+2*(tc*R/Y+ag*Y/R)*(1-Q/(2*C)) - P) / (2*A);
+ b=1-Q/(2*C);
+ if (a<=0 || b<=0) failF84=1;
+ }
+ if (!failF84) {
+ a=-.5*log(a); b=-.5*log(b);
+ if(b<=0) failF84=1;
+ else {
+ k_F84 = a/b-1;
+ *t = 4*b*(tc*(1+ k_F84/Y) + ag*(1+ k_F84/R)+C);
+ *k_HKY = (B + (tc/Y+ag/R)* k_F84)/B; /* k_F84=>k_HKY85 */
+ if(SEt) {
+ a = A*C/(A*C-C*P/2-(A-B)*Q/2);
+ b = A*(A-B)/(A*C-C*P/2-(A-B)*Q/2) - (A-B-C)/(C-Q/2);
+ *SEt = sqrt((a*a*P+b*b*Q-square(a*P+b*Q))/n);
+ }
+ }
+ }
+ if(failF84 && !failK80) { /* try K80 */
+ if (noisy>=9) printf("\na=%.5f b=%.5f, use K80\n", a,b);
+ a=1-2*P-Q; b=1-2*Q;
+ if (a<=0 || b<=0) failK80=1;
+ else {
+ a=-log(a); b=-log(b);
+ if(b<=0) failK80=1;
+ else {
+ *k_HKY=(.5*a-.25*b)/(.25*b);
+ *t = .5*a+.25*b;
+ }
+ if(SEt) {
+ a=1/(1-2*P-Q); b=(a+1/(1-2*Q))/2;
+ *SEt = sqrt((a*a*P+b*b*Q-square(a*P+b*Q))/n);
+ }
+ }
+ }
+ if(failK80) {
+ if((P+=Q)>=.75) { failJC69=1; P=.75*(n-1.)/n; }
+ *t = -.75*log(1-P*4/3.);
+ if(*t>maxt) *t=maxt;
+ if(SEt) {
+ *SEt = sqrt(9*P*(1-P)/n) / (3-4*P);
+ }
+ }
+ if(*k_HKY>maxkappa) *k_HKY=maxkappa;
+
+ return(failF84 + failK80 + failJC69);
+}
+
+
+
+#if 0
+
+double dsdnREV (int is, int js, double space[])
+{
+/* This calculates ds and dn by recovering the Q*t matrix using the equation
+ F(t) = PI * P(t) = PI * exp(Q*t)
+ This is found not to work well and is not published.
+ space[64*64*5]
+ The code here is broken since I changed the coding. Codons are now coded 0, 1, ..., 60.
+*/
+ int n=com.ncode, i,j, h;
+ double *F=PMat, *Qt=F;
+ double *Root=space+n*n,*pi=Root+n, *U=pi+n,*V=U+n*n;
+ double *T1=V+n*n,*T2=T1+n*n, t, small=1e-6;
+
+ fprintf(frst,"\npi in model\n");
+ matout(frst,com.pi,1,n);
+ FOR(i,n*n) F[i]=0;
+ FOR (h,com.npatt) {
+ F[com.z[is][h]*n+com.z[js][h]]+=com.fpatt[h]/(2*com.ls);
+ F[com.z[js][h]*n+com.z[is][h]]+=com.fpatt[h]/(2*com.ls);
+ }
+ if(fabs(1-sum(F,n*n))>1e-6) error2("Sum F != 1 in dsdnREV");
+
+ FOR (i,n) {
+ pi[i]=sum(F+i*n, n);
+/*
+ if (F[i*n+i]<=small || F[i*n+i]<pi[i]/4)
+*/
+ if (F[i*n+i]<=small) F[i*n+i]=1-pi[i]+F[i*n+i];
+ else abyx(1/pi[i], F+i*n, n);
+ }
+ if (eigen (1, F, n, Root, T1, U, V, T2)) error2 ("eigen jgl");
+ xtoy (U, V, n*n);
+ matinv (V, n, n, T1);
+
+fprintf(frst,"\npi in data\n");
+matout (frst, pi, 1, n); FPN(F0);
+matout (frst, Root, 1, n);
+
+ FOR (i,n) {
+ if (Root[i]<=0)
+ printf (" Root %d:%10.4f", i+1, Root[i]);
+ Root[i]=log(Root[i]);
+ }
+ FOR (i,n) FOR (j,n) T1[i*n+j]=U[i*n+j]*Root[j];
+ matby (T1, V, Qt, n, n, n);
+ for (i=0,t=0; i<n; i++) t-=pi[i]*Qt[i*n+i];
+ if (t<=0) puts ("err: dsdnREV");
+
+ FOR(i,n*n) Qt[i]+=1e-8; /* remove negative numbers from rounding errors */
+
+ matout(frst,Qt,n,n);
+printf("\nt = %.5f\n", t);
+
+ return (0);
+}
+
+
+#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/paml.git
More information about the debian-med-commit
mailing list