[med-svn] [fasttree] 01/03: Imported Upstream version 2.1.8
Roland Fehrenbacher
rfehren-guest at moszumanska.debian.org
Tue Mar 31 11:01:53 UTC 2015
This is an automated email from the git hooks/post-receive script.
rfehren-guest pushed a commit to branch master
in repository fasttree.
commit 34340d0376e84ee19e8171ac464dcaa64bc72f37
Author: Roland Fehrenbacher <rf at q-leap.de>
Date: Tue Mar 31 10:50:09 2015 +0000
Imported Upstream version 2.1.8
---
changelog | 9 ++++++
fasttree.c | 97 +++++++++++++++++++++++++++++++++++++++++++-------------------
2 files changed, 76 insertions(+), 30 deletions(-)
diff --git a/changelog b/changelog
index e0bfeea..d6f63f2 100644
--- a/changelog
+++ b/changelog
@@ -1,3 +1,12 @@
+Version 2.1.8: March 24, 2015
+
+ To provide useful branch lengths for very wide alignments of very
+ close closely-related sequences, the minimum branch lengths were
+ dramatically decreased when compiling with USE_DOUBLE. If using ML
+ on an alignment of closely-related and long sequences, issues a
+ warning, and recommends recompiling with USE_DOUBLE (if not
+ already using USE_DOUBLE).
+
Version 2.1.7: January 22, 2013
To avoid numerical problems that led to crashes in rare cases,
diff --git a/fasttree.c b/fasttree.c
index 12862e2..e8adc7a 100644
--- a/fasttree.c
+++ b/fasttree.c
@@ -2,15 +2,17 @@
* FastTree -- inferring approximately-maximum-likelihood trees for large
* multiple sequence alignments.
*
- * Morgan N. Price, 2008-2011
+ * Morgan N. Price
* http://www.microbesonline.org/fasttree/
*
* Thanks to Jim Hester of the Cleveland Clinic Foundation for
* providing the first parallel (OpenMP) code, Siavash Mirarab of
- * UT Austin for implementing the WAG option, and Samuel Shepard
- * at the CDC for suggesting and helping with the -quote option.
+ * UT Austin for implementing the WAG option, Samuel Shepard
+ * at the CDC for suggesting and helping with the -quote option, and
+ * Aaron Darling (University of Technology, Sydney) for numerical changes
+ * for wide alignments of closely-related sequences.
*
- * Copyright (C) 2008-2011 The Regents of the University of California
+ * Copyright (C) 2008-2015 The Regents of the University of California
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
@@ -305,7 +307,8 @@
/* By default, tries to compile with SSE instructions for greater speed.
But if compiled with -DUSE_DOUBLE, uses double precision instead of single-precision
- floating point (2x memroy required), and does not use SSE.
+ floating point (2x memory required), does not use SSE, and allows much shorter
+ branch lengths.
*/
#ifdef __SSE__
#if !defined(NO_SSE) && !defined(USE_DOUBLE)
@@ -340,7 +343,7 @@ typedef float numeric_t;
#endif /* USE_SSE3 */
-#define FT_VERSION "2.1.7"
+#define FT_VERSION "2.1.8"
char *usage =
" FastTree protein_alignment > tree\n"
@@ -848,14 +851,24 @@ const double LogLkUnderflow = 9.21034037197618; /* -log(LkUnderflowInv) */
const double Log2 = 0.693147180559945;
/* These are used to limit the optimization of branch lengths.
Also very short branch lengths can create numerical problems.
- In version 2.1.7., the minimum branch lengths (MLMinBranchLength and MLMinRelBranchLength)
+ In version 2.1.7, the minimum branch lengths (MLMinBranchLength and MLMinRelBranchLength)
were increased to prevent numerical problems in rare cases.
- If compiled with -DUSE_DOUBLE then these minimums could be decreased.
+ In version 2.1.8, to provide useful branch lengths for genome-wide alignments,
+ the minimum branch lengths were dramatically decreased if USE_DOUBLE is defined.
*/
+#ifndef USE_DOUBLE
const double MLMinBranchLengthTolerance = 1.0e-4; /* absolute tolerance for optimizing branch lengths */
const double MLFTolBranchLength = 0.001; /* fractional tolerance for optimizing branch lengths */
-const double MLMinBranchLength = 5.0e-4;
+const double MLMinBranchLength = 5.0e-4; /* minimum value for branch length */
const double MLMinRelBranchLength = 2.5e-4; /* minimum of rate * length */
+const double fPostTotalTolerance = 1.0e-10; /* posterior vector must sum to at least this before rescaling */
+#else
+const double MLMinBranchLengthTolerance = 1.0e-9;
+const double MLFTolBranchLength = 0.001;
+const double MLMinBranchLength = 5.0e-9;
+const double MLMinRelBranchLength = 2.5e-9;
+const double fPostTotalTolerance = 1.0e-20;
+#endif
int mlAccuracy = 1; /* Rounds of optimization of branch lengths; 1 means do 2nd round only if close */
double closeLogLkLimit = 5.0; /* If partial optimization of an NNI looks like it would decrease the log likelihood
@@ -2211,21 +2224,20 @@ int main(int argc, char **argv) {
UpdateBranchLengths(/*IN/OUT*/NJ);
LogTree("ME_Lengths",0, fpLog, NJ, aln->names, unique, bQuote);
- if(verbose>0 || fpLog) {
- double total_len = 0;
- int iNode;
- for (iNode = 0; iNode < NJ->maxnode; iNode++)
- total_len += fabs(NJ->branchlength[iNode]);
- if (verbose>0) {
- fprintf(stderr, "Total branch-length %.3f after %.2f sec\n",
- total_len, clockDiff(&clock_start));
- fflush(stderr);
- }
- if (fpLog) {
- fprintf(fpLog, "Total branch-length %.3f after %.2f sec\n",
- total_len, clockDiff(&clock_start));
- fflush(stderr);
- }
+ double total_len = 0;
+ int iNode;
+ for (iNode = 0; iNode < NJ->maxnode; iNode++)
+ total_len += fabs(NJ->branchlength[iNode]);
+
+ if (verbose>0) {
+ fprintf(stderr, "Total branch-length %.3f after %.2f sec\n",
+ total_len, clockDiff(&clock_start));
+ fflush(stderr);
+ }
+ if (fpLog) {
+ fprintf(fpLog, "Total branch-length %.3f after %.2f sec\n",
+ total_len, clockDiff(&clock_start));
+ fflush(stderr);
}
#ifdef TRACK_MEMORY
@@ -2238,7 +2250,27 @@ int main(int argc, char **argv) {
#endif
SplitCount_t splitcount = {0,0,0,0,0.0,0.0};
+
if (MLnniToDo > 0 || MLlen) {
+ bool warn_len = total_len/NJ->maxnode < 0.001 && MLMinBranchLengthTolerance > 1.0/aln->nPos;
+ bool warn = warn_len || (total_len/NJ->maxnode < 0.001 && aln->nPos >= 10000);
+ if (warn)
+ fprintf(stderr, "\nWARNING! This alignment consists of closely-related and very-long sequences.\n");
+ if (warn_len)
+ fprintf(stderr,
+ "This version of FastTree may not report reasonable branch lengths!\n"
+#ifdef USE_DOUBLE
+ "Consider changing MLMinBranchLengthTolerance.\n"
+#else
+ "Consider recompiling FastTree with -DUSE_DOUBLE.\n"
+#endif
+ "For more information, visit\n"
+ "http://www.microbesonline.org/fasttree/#BranchLen\n\n");
+ if (warn)
+ fprintf(stderr, "WARNING! FastTree (or other standard maximum-likelihood tools)\n"
+ "may not be appropriate for aligments of very closely-related sequences\n"
+ "like this one, as FastTree does not account for recombination or gene conversion\n\n");
+
/* Do maximum-likelihood computations */
/* Convert profiles to use the transition matrix */
distance_matrix_t *tmatAsDist = TransMatToDistanceMat(/*OPTIONAL*/NJ->transmat);
@@ -3561,14 +3593,19 @@ void PrintNJ(FILE *fp, NJ_t *NJ, char **names, uniquify_t *unique, bool bShowSup
fprintf(fp,")");
}
/* Print the branch length */
- fprintf(fp, ":%.5f", NJ->branchlength[node]);
+#ifdef USE_DOUBLE
+#define FP_FORMAT "%.9f"
+#else
+#define FP_FORMAT "%.5f"
+#endif
+ fprintf(fp, ":" FP_FORMAT, NJ->branchlength[node]);
} else if (end) {
if (node == NJ->root)
fprintf(fp, ")");
else if (bShowSupport)
- fprintf(fp, ")%.3f:%.5f", NJ->support[node], NJ->branchlength[node]);
+ fprintf(fp, ")%.3f:" FP_FORMAT, NJ->support[node], NJ->branchlength[node]);
else
- fprintf(fp, "):%.5f", NJ->branchlength[node]);
+ fprintf(fp, "):" FP_FORMAT, NJ->branchlength[node]);
} else {
if (node != NJ->root && NJ->child[NJ->parent[node]].child[0] != node) fprintf(fp, ",");
fprintf(fp, "(");
@@ -4370,7 +4407,7 @@ void NormalizeFreq(/*IN/OUT*/numeric_t *freq, distance_matrix_t *dmat) {
for (k = 0; k < nCodes; k++)
total_freq += freq[k];
}
- if (total_freq > 1e-10) {
+ if (total_freq > fPostTotalTolerance) {
numeric_t inverse_weight = 1.0/total_freq;
vector_multiply_by(/*IN/OUT*/freq, inverse_weight, nCodes);
} else {
@@ -4962,7 +4999,7 @@ profile_t *PosteriorProfile(profile_t *p1, profile_t *p2,
double fPostTot = 0;
for (j = 0; j < 4; j++)
fPostTot += fPost[j];
- assert(fPostTot > 1e-10);
+ assert(fPostTot > fPostTotalTolerance);
double fPostInv = 1.0/fPostTot;
#if 0 /* SSE3 is slower */
vector_multiply_by(fPost, fPostInv, 4);
@@ -5025,7 +5062,7 @@ profile_t *PosteriorProfile(profile_t *p1, profile_t *p2,
fPost[j] = value >= 0 ? value : 0;
}
double fPostTot = vector_sum(fPost, 20);
- assert(fPostTot > 1e-10);
+ assert(fPostTot > fPostTotalTolerance);
double fPostInv = 1.0/fPostTot;
vector_multiply_by(/*IN/OUT*/fPost, fPostInv, 20);
int ch = -1; /* the dominant character, if any */
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fasttree.git
More information about the debian-med-commit
mailing list