[med-svn] [fasttree] 01/02: Import Upstream version 2.1.10

Thorsten Alteholz alteholz at moszumanska.debian.org
Sat Sep 9 17:12:54 UTC 2017


This is an automated email from the git hooks/post-receive script.

alteholz pushed a commit to branch master
in repository fasttree.

commit caef722af6dc73fb315ed4b5da086305f5c54799
Author: Thorsten Alteholz <debian at alteholz.de>
Date:   Sat Sep 9 19:11:34 2017 +0200

    Import Upstream version 2.1.10
---
 changelog  | 11 +++++++++++
 fasttree.c | 22 ++++++++++++++++++----
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/changelog b/changelog
index 3e34002..e49d015 100644
--- a/changelog
+++ b/changelog
@@ -1,3 +1,14 @@
+Version 2.1.10: April 11, 2017
+
+	Fix a bug when using GTR models with huge alignments with over 2
+	billion non-gap characters. The frequencies of A, C, G, and T were
+	being computed with a 32-bit (signed) counter. This led to
+	negative frequencies and eventually to a crash with
+	"FastTree.c:9769: tqli: Assertion `iter < 30' failed.". SetMLGtr()
+	now uses 64-bit counters. Also, more information about the
+	optimization of the GTR model is savd in the log file (if using
+	the -log option). To support this, gtr_opt_t now includes fpLog.
+
 Version 2.1.9: March 29, 2016
 
 	Add the -lg option to use Le/Gascuel model for amino acid
diff --git a/fasttree.c b/fasttree.c
index 120477f..57c6fda 100644
--- a/fasttree.c
+++ b/fasttree.c
@@ -343,7 +343,7 @@ typedef float numeric_t;
 
 #endif /* USE_SSE3 */
 
-#define FT_VERSION "2.1.9"
+#define FT_VERSION "2.1.10"
 
 char *usage =
   "  FastTree protein_alignment > tree\n"
@@ -1380,6 +1380,7 @@ typedef struct {
   double freq[4];
   double rates[6];
   int iRate;			/* which rate to set x from */
+  FILE *fpLog; /* OPTIONAL WRITE */
 } gtr_opt_t;
 
 /* Returns -log_likelihood for the tree with the given rates
@@ -5848,11 +5849,14 @@ void SetMLGtr(/*IN/OUT*/NJ_t *NJ, /*OPTIONAL IN*/double *freq_in, /*OPTIONAL WRI
   assert(nCodes==4);
   gtr_opt_t gtr;
   gtr.NJ = NJ;
+  gtr.fpLog = fpLog;
   if (freq_in != NULL) {
     for (i=0; i<4; i++)
       gtr.freq[i]=freq_in[i];
   } else {
-    int n[4] = {1,1,1,1};	/* pseudocounts */
+    /* n[] and sum were int in FastTree 2.1.9 and earlier -- this
+       caused gtr analyses to fail on analyses with >2e9 positions */
+    long n[4] = {1,1,1,1};	/* pseudocounts */
     for (i=0; i<NJ->nSeq; i++) {
       unsigned char *codes = NJ->profiles[i]->codes;
       int iPos;
@@ -5860,7 +5864,7 @@ void SetMLGtr(/*IN/OUT*/NJ_t *NJ, /*OPTIONAL IN*/double *freq_in, /*OPTIONAL WRI
 	if (codes[iPos] < 4)
 	  n[codes[iPos]]++;
     }
-    int sum = n[0]+n[1]+n[2]+n[3];
+    long sum = n[0]+n[1]+n[2]+n[3];
     for (i=0; i<4; i++)
       gtr.freq[i] = n[i]/(double)sum;
   }
@@ -5903,6 +5907,7 @@ void SetMLGtr(/*IN/OUT*/NJ_t *NJ, /*OPTIONAL IN*/double *freq_in, /*OPTIONAL WRI
 }
 
 double GTRNegLogLk(double x, void *data) {
+  
   gtr_opt_t *gtr = (gtr_opt_t*)data;
   assert(nCodes == 4);
   assert(gtr->NJ != NULL);
@@ -5915,6 +5920,12 @@ double GTRNegLogLk(double x, void *data) {
     rates[i] = gtr->rates[i];
   rates[gtr->iRate] = x;
 
+  FILE *fpLog = gtr->fpLog;
+  if (fpLog)
+    fprintf(fpLog, "GTR_Opt\tfreq %.5f %.5f %.5f %.5f rates %.5f %.5f %.5f %.5f %.5f %.5f\n",
+          gtr->freq[0], gtr->freq[1], gtr->freq[2], gtr->freq[3],
+          rates[0], rates[1], rates[2], rates[3], rates[4], rates[5]);
+
   gtr->NJ->transmat = CreateGTR(rates, gtr->freq);
   RecomputeMLProfiles(/*IN/OUT*/gtr->NJ);
   double loglk = TreeLogLk(gtr->NJ, /*site_loglk*/NULL);
@@ -5923,7 +5934,10 @@ double GTRNegLogLk(double x, void *data) {
   /* Do not recompute profiles -- assume the caller will do that */
   if (verbose > 2)
     fprintf(stderr, "GTR LogLk(%.5f %.5f %.5f %.5f %.5f %.5f) = %f\n",
-	    rates[0], rates[1], rates[2], rates[3], rates[4], rates[5], loglk); 
+	    rates[0], rates[1], rates[2], rates[3], rates[4], rates[5], loglk);
+  if (fpLog)
+    fprintf(fpLog, "GTR_Opt\tGTR LogLk(%.5f %.5f %.5f %.5f %.5f %.5f) = %f\n",
+	    rates[0], rates[1], rates[2], rates[3], rates[4], rates[5], loglk);
   return(-loglk);
 }
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fasttree.git



More information about the debian-med-commit mailing list