[med-svn] [iqtree] 01/06: Imported Upstream version 1.3.13+dfsg

Andreas Tille tille at debian.org
Tue Feb 2 08:04:10 UTC 2016


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository iqtree.

commit b8172a00618c71ff83c9634f969168606585a1c6
Author: Andreas Tille <tille at debian.org>
Date:   Tue Feb 2 08:35:23 2016 +0100

    Imported Upstream version 1.3.13+dfsg
---
 CMakeLists.txt         |  2 +-
 model/modelfactory.h   |  2 +-
 model/ratefree.cpp     | 32 ++++++++++++++++++++++++++++++--
 phyloanalysis.cpp      | 19 ++++++-------------
 phylokernel.h          | 31 ++++++++++++++++++++++++++++++-
 phylokernelmixrate.h   | 26 ++++++++++++++++++++++++++
 phylokernelmixture.h   | 28 ++++++++++++++++++++++++++++
 phylosupertree.cpp     |  6 +++---
 phylosupertreeplen.cpp | 38 ++++++++++++++++++++++++--------------
 phylosupertreeplen.h   |  6 ++++++
 phylotesting.cpp       | 10 ++++++++--
 phylotree.cpp          |  8 ++++----
 phylotree.h            |  2 +-
 phylotreesse.cpp       | 24 ++++++++++++++++++++++++
 14 files changed, 192 insertions(+), 42 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index efda1d9..61cad8a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -45,7 +45,7 @@ add_definitions(-DIQ_TREE)
 # The version number.
 set (iqtree_VERSION_MAJOR 1)
 set (iqtree_VERSION_MINOR 3)
-set (iqtree_VERSION_PATCH "11.1") 
+set (iqtree_VERSION_PATCH "13") 
 
 set(BUILD_SHARED_LIBS OFF)
 
diff --git a/model/modelfactory.h b/model/modelfactory.h
index 7a0d455..6baf427 100644
--- a/model/modelfactory.h
+++ b/model/modelfactory.h
@@ -69,7 +69,7 @@ public:
 	*/
 	//string getModelName();
 
-	void writeInfo(ostream &out);
+	virtual void writeInfo(ostream &out) {}
 
 	/**
 		Start to store transition matrix for efficiency
diff --git a/model/ratefree.cpp b/model/ratefree.cpp
index 0acbf37..91fc0fd 100644
--- a/model/ratefree.cpp
+++ b/model/ratefree.cpp
@@ -426,6 +426,7 @@ double RateFree::optimizeWithEM() {
     size_t ptn, c;
     size_t nptn = phylo_tree->aln->getNPattern();
     size_t nmix = ncategory;
+    const double MIN_PROP = 1e-4;
     
 //    double *lk_ptn = aligned_alloc<double>(nptn);
     double *new_prop = aligned_alloc<double>(nmix);
@@ -451,6 +452,11 @@ double RateFree::optimizeWithEM() {
         // first compute _pattern_lh_cat
         double score;
         score = phylo_tree->computePatternLhCat(WSL_RATECAT);
+        if (score > 0.0) {
+            phylo_tree->printTree(cout, WT_BR_LEN+WT_NEWLINE);
+            writeInfo(cout);
+        }
+        assert(score < 0);
         memset(new_prop, 0, nmix*sizeof(double));
                 
         // E-step
@@ -461,6 +467,7 @@ double RateFree::optimizeWithEM() {
             for (c = 0; c < nmix; c++) {
                 lk_ptn += this_lk_cat[c];
             }
+            assert(lk_ptn != 0.0);
             lk_ptn = phylo_tree->ptn_freq[ptn] / lk_ptn;
             
             // transform _pattern_lh_cat into posterior probabilities of each category
@@ -472,14 +479,35 @@ double RateFree::optimizeWithEM() {
         } 
         
         // M-step, update weights according to (*)        
+        int maxpropid = 0;
+        for (c = 0; c < nmix; c++) {
+            new_prop[c] = new_prop[c] / phylo_tree->getAlnNSite();
+            if (new_prop[c] > new_prop[maxpropid])
+                maxpropid = c;
+        }
+        // regularize prop
+        bool zero_prop = false;
+        for (c = 0; c < nmix; c++) {
+            if (new_prop[c] < MIN_PROP) {
+                new_prop[maxpropid] -= (MIN_PROP - new_prop[c]);
+                new_prop[c] = MIN_PROP;
+                zero_prop = true;
+            }
+        }
+        // break if some probabilities too small
+        if (zero_prop) break;
         
         bool converged = true;
+        double sum_prop = 0.0;
         for (c = 0; c < nmix; c++) {
-            new_prop[c] = new_prop[c] / phylo_tree->getAlnNSite();
+//            new_prop[c] = new_prop[c] / phylo_tree->getAlnNSite();
             // check for convergence
+            sum_prop += new_prop[c];
             converged = converged && (fabs(prop[c]-new_prop[c]) < 1e-4);
             prop[c] = new_prop[c];
         }
+
+        assert(fabs(sum_prop-1.0) < MIN_PROP);
         
         // now optimize rates one by one
         double sum = 0.0;
@@ -506,7 +534,7 @@ double RateFree::optimizeWithEM() {
                 tree->ptn_freq[ptn] = this_lk_cat[ptn*nmix];
             double scaling = rates[c];
             tree->scaleLength(scaling);
-            tree->optimizeTreeLengthScaling(scaling, 0.001);
+            tree->optimizeTreeLengthScaling(MIN_PROP, scaling, 1.0/prop[c], 0.001);
             converged = converged && (fabs(rates[c] - scaling) < 1e-4);
             rates[c] = scaling;
             sum += prop[c] * rates[c];
diff --git a/phyloanalysis.cpp b/phyloanalysis.cpp
index 4f1e5d4..6b93d76 100644
--- a/phyloanalysis.cpp
+++ b/phyloanalysis.cpp
@@ -373,22 +373,15 @@ void reportTree(ofstream &out, Params &params, PhyloTree &tree, double tree_lh,
         
         out << endl
             << "**************************** WARNING ****************************" << endl
-            << "Number of parameters (K): " << df << endl
-            << "Sample size (n):          " << ssize << endl << endl
-            << "Given that K>=n, the model parameters are not identifiable." << endl
-            << "The program will still try to estimate the parameter values," << endl
-            << "but because of the small sample size, the parameter estimates" << endl 
-            << "are likely to be inaccurate." << endl << endl
-            
-            << "Phylogenetic estimates obtained under these conditions should be" << endl 
-            << "interpreted with extreme caution." << endl << endl 
+            << "Number of parameters (K, model parameters and branch lengths): " << df << endl
+            << "Sample size (n, alignment length): " << ssize << endl << endl
+            << "Given that K>=n, the parameter estimates might be inaccurate." << endl
+            << "Thus, phylogenetic estimates should be interpreted with caution." << endl << endl 
 
-            << "Ideally, it is desirable that n >> K. When selecting optimal" << endl
-            << "models," << endl
+            << "Ideally, it is desirable that n >> K. When selecting optimal models," << endl
             << "1. use AIC or BIC if n > 40K;" << endl 
             << "2. use AICc or BIC if 40K >= n > K;" << endl 
-            << "3. be extremely cautious if n <= K (because model parameters" << endl
-            << "   are not identifiable)." << endl << endl
+            << "3. be extremely cautious if n <= K" << endl << endl
 
             << "To improve the situation (3), consider the following options:" << endl
             << "  1. Increase the sample size (n)" << endl
diff --git a/phylokernel.h b/phylokernel.h
index 2a5a2c2..9b2799c 100644
--- a/phylokernel.h
+++ b/phylokernel.h
@@ -883,6 +883,12 @@ double PhyloTree::computeLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, Ph
 						vc_ptn[j] = mul_add(vc_val[i] * vc_tip_partial_lh[j*(nstates/VCSIZE)+i%(nstates/VCSIZE)],
 								vc_partial_lh_dad[j], vc_ptn[j]);
 					}
+                    
+                // bugfix 2016-01-21, prob_const can be rescaled
+                for (j = 0; j < VCSIZE; j++)
+                    if (dad_branch->scale_num[ptn+j] >= 1)
+                        vc_ptn[j] = vc_ptn[j] * SCALING_THRESHOLD;
+
 				// ptn_invar[ptn] is not aligned
 				lh_ptn = horizontal_add(vc_ptn) + VectorClass().load(&ptn_invar[ptn]);
 			}
@@ -973,6 +979,11 @@ double PhyloTree::computeLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, Ph
 					}
 				}
 
+                // bugfix 2016-01-21, prob_const can be rescaled
+                for (j = 0; j < VCSIZE; j++)
+                    if (dad_branch->scale_num[ptn+j] + node_branch->scale_num[ptn+j] >= 1)
+                        vc_ptn[j] = vc_ptn[j] * SCALING_THRESHOLD;
+
 				// ptn_invar[ptn] is not aligned
 				lh_ptn = horizontal_add(vc_ptn) + VectorClass().load(&ptn_invar[ptn]);
 				partial_lh_node += block*VCSIZE;
@@ -990,6 +1001,7 @@ double PhyloTree::computeLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, Ph
 
 	if (orig_nptn < nptn) {
     	// ascertainment bias correction
+        assert(prob_const < 1.0 && prob_const >= 0.0);
     	prob_const = log(1.0 - prob_const);
     	for (ptn = 0; ptn < orig_nptn; ptn++)
     		_pattern_lh[ptn] -= prob_const;
@@ -1108,7 +1120,19 @@ double PhyloTree::computeLikelihoodFromBufferEigenSIMD() {
 		lh_ptn = 0.0;
 		double prob_const;// df_const, ddf_const;
 		double *theta = &theta_all[orig_nptn*block];
-		for (ptn = orig_nptn; ptn < nptn; ptn+=VCSIZE) {
+
+        UBYTE sum_scale_num[nstates+VCSIZE];
+        memset(sum_scale_num, 0, sizeof(UBYTE)*(nstates+VCSIZE));
+        if (current_it->node->isLeaf())
+            memcpy(sum_scale_num, current_it_back->scale_num+orig_nptn, sizeof(UBYTE)*(nptn-orig_nptn));
+        else if (current_it_back->node->isLeaf())
+            memcpy(sum_scale_num, current_it->scale_num+orig_nptn, sizeof(UBYTE)*(nptn-orig_nptn));
+        else {
+            for (ptn = orig_nptn; ptn < nptn; ptn++)
+                sum_scale_num[ptn-orig_nptn] = current_it->scale_num[ptn] + current_it_back->scale_num[ptn];
+        }
+
+        for (ptn = orig_nptn; ptn < nptn; ptn+=VCSIZE) {
 			lh_final += lh_ptn;
 
 			// initialization
@@ -1123,6 +1147,11 @@ double PhyloTree::computeLikelihoodFromBufferEigenSIMD() {
 			}
 			theta += block*VCSIZE;
 
+            // bugfix 2016-01-21, prob_const can be rescaled
+            for (j = 0; j < VCSIZE; j++)
+                if (sum_scale_num[ptn+j-orig_nptn] >= 1)
+                    vc_ptn[j] = vc_ptn[j] * SCALING_THRESHOLD;
+
 			// ptn_invar[ptn] is not aligned
 			lh_ptn = horizontal_add(vc_ptn) + VectorClass().load(&ptn_invar[ptn]);
 
diff --git a/phylokernelmixrate.h b/phylokernelmixrate.h
index 02ffa4e..ecad5d1 100644
--- a/phylokernelmixrate.h
+++ b/phylokernelmixrate.h
@@ -838,6 +838,10 @@ double PhyloTree::computeMixrateLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_bra
 								VectorClass().load_a(&partial_lh_dad[i]), vc_ptn[j]);
 					}
 				}
+                // bugfix 2016-01-21, prob_const can be rescaled
+                for (j = 0; j < VCSIZE; j++)
+                    if (dad_branch->scale_num[ptn+j] >= 1)
+                        vc_ptn[j] = vc_ptn[j] * SCALING_THRESHOLD;
 				// ptn_invar[ptn] is not aligned
 				lh_ptn = horizontal_add(vc_ptn) + VectorClass().load(&ptn_invar[ptn]);
 			}
@@ -930,6 +934,11 @@ double PhyloTree::computeMixrateLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_bra
 					}
 				}
 
+                // bugfix 2016-01-21, prob_const can be rescaled
+                for (j = 0; j < VCSIZE; j++)
+                    if (dad_branch->scale_num[ptn+j] + node_branch->scale_num[ptn+j] >= 1)
+                        vc_ptn[j] = vc_ptn[j] * SCALING_THRESHOLD;
+
 				// ptn_invar[ptn] is not aligned
 				lh_ptn = horizontal_add(vc_ptn) + VectorClass().load(&ptn_invar[ptn]);
 				partial_lh_node += block*VCSIZE;
@@ -1065,6 +1074,18 @@ double PhyloTree::computeMixrateLikelihoodFromBufferEigenSIMD() {
 		lh_ptn = 0.0;
 		double prob_const;// df_const, ddf_const;
 		double *theta = &theta_all[orig_nptn*block];
+
+        UBYTE sum_scale_num[nstates+VCSIZE];
+        memset(sum_scale_num, 0, sizeof(UBYTE)*(nstates+VCSIZE));
+        if (current_it->node->isLeaf())
+            memcpy(sum_scale_num, current_it_back->scale_num+orig_nptn, sizeof(UBYTE)*(nptn-orig_nptn));
+        else if (current_it_back->node->isLeaf())
+            memcpy(sum_scale_num, current_it->scale_num+orig_nptn, sizeof(UBYTE)*(nptn-orig_nptn));
+        else {
+            for (ptn = orig_nptn; ptn < nptn; ptn++)
+                sum_scale_num[ptn-orig_nptn] = current_it->scale_num[ptn] + current_it_back->scale_num[ptn];
+        }
+
 		for (ptn = orig_nptn; ptn < nptn; ptn+=VCSIZE) {
 			lh_final += lh_ptn;
 
@@ -1080,6 +1101,11 @@ double PhyloTree::computeMixrateLikelihoodFromBufferEigenSIMD() {
 			}
 			theta += block*VCSIZE;
 
+            // bugfix 2016-01-21, prob_const can be rescaled
+            for (j = 0; j < VCSIZE; j++)
+                if (sum_scale_num[ptn+j-orig_nptn] >= 1)
+                    vc_ptn[j] = vc_ptn[j] * SCALING_THRESHOLD;
+
 			// ptn_invar[ptn] is not aligned
 			lh_ptn = horizontal_add(vc_ptn) + VectorClass().load(&ptn_invar[ptn]);
 
diff --git a/phylokernelmixture.h b/phylokernelmixture.h
index 068db03..6e50635 100644
--- a/phylokernelmixture.h
+++ b/phylokernelmixture.h
@@ -917,6 +917,12 @@ double PhyloTree::computeMixtureLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_bra
 //						vc_ptn[j] = mul_add(vc_val[i] * vc_tip_partial_lh[j*(nstates/VCSIZE)+i%(nstates/VCSIZE)],
 //								vc_partial_lh_dad[j], vc_ptn[j]);
 //					}
+
+                // bugfix 2016-01-21, prob_const can be rescaled
+                for (j = 0; j < VCSIZE; j++)
+                    if (dad_branch->scale_num[ptn+j] >= 1)
+                        vc_ptn[j] = vc_ptn[j] * SCALING_THRESHOLD;
+
 				// ptn_invar[ptn] is not aligned
 				lh_ptn = horizontal_add(vc_ptn) + VectorClass().load(&ptn_invar[ptn]);
 			}
@@ -1008,6 +1014,11 @@ double PhyloTree::computeMixtureLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_bra
 					}
 				}
 
+                // bugfix 2016-01-21, prob_const can be rescaled
+                for (j = 0; j < VCSIZE; j++)
+                    if (dad_branch->scale_num[ptn+j] + node_branch->scale_num[ptn+j] >= 1)
+                        vc_ptn[j] = vc_ptn[j] * SCALING_THRESHOLD;
+
 				// ptn_invar[ptn] is not aligned
 				lh_ptn = horizontal_add(vc_ptn) + VectorClass().load(&ptn_invar[ptn]);
 				partial_lh_node += block*VCSIZE;
@@ -1146,6 +1157,18 @@ double PhyloTree::computeMixtureLikelihoodFromBufferEigenSIMD() {
 		lh_ptn = 0.0;
 		double prob_const;// df_const, ddf_const;
 		double *theta = &theta_all[orig_nptn*block];
+
+        UBYTE sum_scale_num[nstates+VCSIZE];
+        memset(sum_scale_num, 0, sizeof(UBYTE)*(nstates+VCSIZE));
+        if (current_it->node->isLeaf())
+            memcpy(sum_scale_num, current_it_back->scale_num+orig_nptn, sizeof(UBYTE)*(nptn-orig_nptn));
+        else if (current_it_back->node->isLeaf())
+            memcpy(sum_scale_num, current_it->scale_num+orig_nptn, sizeof(UBYTE)*(nptn-orig_nptn));
+        else {
+            for (ptn = orig_nptn; ptn < nptn; ptn++)
+                sum_scale_num[ptn-orig_nptn] = current_it->scale_num[ptn] + current_it_back->scale_num[ptn];
+        }
+
 		for (ptn = orig_nptn; ptn < nptn; ptn+=VCSIZE) {
 			lh_final += lh_ptn;
 
@@ -1161,6 +1184,11 @@ double PhyloTree::computeMixtureLikelihoodFromBufferEigenSIMD() {
 			}
 			theta += block*VCSIZE;
 
+            // bugfix 2016-01-21, prob_const can be rescaled
+            for (j = 0; j < VCSIZE; j++)
+                if (sum_scale_num[ptn+j-orig_nptn] >= 1)
+                    vc_ptn[j] = vc_ptn[j] * SCALING_THRESHOLD;
+
 			// ptn_invar[ptn] is not aligned
 			lh_ptn = horizontal_add(vc_ptn) + VectorClass().load(&ptn_invar[ptn]);
 
diff --git a/phylosupertree.cpp b/phylosupertree.cpp
index 44d207e..5df6ba6 100644
--- a/phylosupertree.cpp
+++ b/phylosupertree.cpp
@@ -70,7 +70,7 @@ void PhyloSuperTree::readPartition(Params &params) {
 			getline(in, info.position_spec);
             trimString(info.sequence_type);
 			cout << endl << "Reading partition " << info.name << " (model=" << info.model_name << ", aln=" <<
-					info.aln_file << ", seq=" << info.sequence_type << ", pos=" << info.position_spec << ") ..." << endl;
+					info.aln_file << ", seq=" << info.sequence_type << ", pos=" << ((info.position_spec.length() >= 20) ? info.position_spec.substr(0,20)+"..." : info.position_spec) << ") ..." << endl;
 
 			//info.mem_ptnlh = NULL;
 			info.nniMoves[0].ptnlh = NULL;
@@ -182,7 +182,7 @@ void PhyloSuperTree::readPartitionRaxml(Params &params) {
                 outError("Please specify alignment positions for partition" + info.name);
             std::replace(info.position_spec.begin(), info.position_spec.end(), ',', ' ');
             
-			cout << "Reading partition " << info.name << " (model=" << info.model_name << ", seq=" << info.sequence_type << ", pos=" << info.position_spec << ") ..." << endl;
+			cout << "Reading partition " << info.name << " (model=" << info.model_name << ", seq=" << info.sequence_type << ", pos=" << ((info.position_spec.length() >= 20) ? info.position_spec.substr(0,20)+"..." : info.position_spec) << ") ..." << endl;
 
 			//info.mem_ptnlh = NULL;
 			info.nniMoves[0].ptnlh = NULL;
@@ -265,7 +265,7 @@ void PhyloSuperTree::readPartitionNexus(Params &params) {
 			info.position_spec = (*it)->position_spec;
 			trimString(info.sequence_type);
 			cout << endl << "Reading partition " << info.name << " (model=" << info.model_name << ", aln=" <<
-				info.aln_file << ", seq=" << info.sequence_type << ", pos=" << info.position_spec << ") ..." << endl;
+				info.aln_file << ", seq=" << info.sequence_type << ", pos=" << ((info.position_spec.length() >= 20) ? info.position_spec.substr(0,20)+"..." : info.position_spec) << ") ..." << endl;
             if (info.sequence_type != "" && Alignment::getSeqType(info.sequence_type.c_str()) == SEQ_UNKNOWN)
                 outError("Unknown sequence type " + info.sequence_type);
 			//info.mem_ptnlh = NULL;
diff --git a/phylosupertreeplen.cpp b/phylosupertreeplen.cpp
index 52d6ed6..75356db 100644
--- a/phylosupertreeplen.cpp
+++ b/phylosupertreeplen.cpp
@@ -121,6 +121,11 @@ double PartitionModelPlen::optimizeParameters(bool fixed_len, bool write_info, d
         }
         if (verbose_mode >= VB_MED)
             cout << "LnL after optimizing individual models: " << cur_lh << endl;
+        if (cur_lh <= tree_lh - 1.0) {
+            // more info for ASSERTION 
+            writeInfo(cout);
+            tree->printTree(cout, WT_BR_LEN+WT_NEWLINE);
+        }
         assert(cur_lh > tree_lh - 1.0 && "individual model opt reduces LnL");
         
     	tree->clearAllPartialLH();
@@ -129,11 +134,7 @@ double PartitionModelPlen::optimizeParameters(bool fixed_len, bool write_info, d
     		cur_lh = optimizeGeneRate(gradient_epsilon);
             if (verbose_mode >= VB_MED) {
                 cout << "LnL after optimizing partition-specific rates: " << cur_lh << endl;
-                cout << "Partition-specific rates: ";
-                for(int part = 0; part < ntrees; part++){
-                    cout << " " << tree->part_info[part].part_rate;
-                }
-                cout << endl;
+                writeInfo(cout);
             }
             assert(cur_lh > tree_lh - 1.0 && "partition rate opt reduces LnL");
     	}
@@ -156,33 +157,42 @@ double PartitionModelPlen::optimizeParameters(bool fixed_len, bool write_info, d
     	tree_lh = cur_lh;
     }
 //    cout <<"OPTIMIZE MODEL has finished"<< endl;
+    if (write_info)
+        writeInfo(cout);
+    cout << "Parameters optimization took " << i-1 << " rounds (" << getRealTime()-begin_time << " sec)" << endl << endl;
+
+    return tree_lh;
+}
+
+void PartitionModelPlen::writeInfo(ostream &out) {
+    PhyloSuperTreePlen *tree = (PhyloSuperTreePlen*)site_rate->getTree();
+    int ntrees = tree->size();
     if (!tree->fixed_rates) {
-        cout << "Partition-specific rates: ";
+        out << "Partition-specific rates: ";
         for(int part = 0; part < ntrees; part++){
-            cout << " " << tree->part_info[part].part_rate;
+            out << " " << tree->part_info[part].part_rate;
         }
-        cout << endl;
+        out << endl;
     }
-	cout << "Parameters optimization took " << i-1 << " rounds (" << getRealTime()-begin_time << " sec)" << endl << endl;
-
-    return tree_lh;
 }
-
-
+    
 double PartitionModelPlen::optimizeGeneRate(double gradient_epsilon)
 {
 	PhyloSuperTreePlen *tree = (PhyloSuperTreePlen*)site_rate->getTree();
     // BQM 22-05-2015: change to optimize individual rates
     int i;
     double score = 0.0;
+    double nsites = tree->getAlnNSite();
 
     if (tree->part_order.empty()) tree->computePartitionOrder();
+
     #ifdef _OPENMP
     #pragma omp parallel for reduction(+: score) private(i) schedule(dynamic) if(tree->size() >= tree->params->num_threads)
     #endif    
     for (int j = 0; j < tree->size(); j++) {
         int i = tree->part_order[j];
-        tree->part_info[i].cur_score = tree->at(i)->optimizeTreeLengthScaling(tree->part_info[i].part_rate, gradient_epsilon);
+        double max_scaling = nsites / tree->at(i)->getAlnNSite();
+        tree->part_info[i].cur_score = tree->at(i)->optimizeTreeLengthScaling(1.0/tree->at(i)->getAlnNSite(), tree->part_info[i].part_rate, max_scaling, gradient_epsilon);
         score += tree->part_info[i].cur_score;
     }
     // now normalize the rates
diff --git a/phylosupertreeplen.h b/phylosupertreeplen.h
index aab6642..39a7f67 100644
--- a/phylosupertreeplen.h
+++ b/phylosupertreeplen.h
@@ -107,6 +107,12 @@ public:
     virtual int getNDim();
 
 	/**
+		write information
+		@param out output stream
+	*/
+	virtual void writeInfo(ostream &out);
+
+	/**
 		optimize model parameters and tree branch lengths
 		@param fixed_len TRUE to fix branch lengths, default is false
 		@return the best likelihood
diff --git a/phylotesting.cpp b/phylotesting.cpp
index 3988bd7..1ac2c0a 100644
--- a/phylotesting.cpp
+++ b/phylotesting.cpp
@@ -837,9 +837,15 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, vector<ModelInf
         dist[i] = -((double)this_aln->getNSeq())*this_aln->getNPattern()*this_aln->num_states;
     }
     
-    if (params.num_threads > 1)
+    if (params.num_threads > 1) 
+    {
         quicksort(dist, 0, in_tree->size()-1, distID);
-
+        if (verbose_mode >= VB_MED) {
+            for (i = 0; i < in_tree->size(); i++) {
+                cout << i+1 << "\t" << in_tree->part_info[distID[i]].name << endl;
+            }
+        }
+    }
 
 #ifdef _OPENMP
 //        for (i = 0; i < in_tree->size(); i++)
diff --git a/phylotree.cpp b/phylotree.cpp
index 3670008..82c19b0 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -3068,16 +3068,16 @@ void PhyloTree::computeLikelihoodDervNaive(PhyloNeighbor *dad_branch, PhyloNode
  Branch length optimization by maximum likelihood
  ****************************************************************************/
 
-const double MIN_TREE_LENGTH_SCALE = 0.001;
-const double MAX_TREE_LENGTH_SCALE = 1000.0;
+//const double MIN_TREE_LENGTH_SCALE = 0.001;
+//const double MAX_TREE_LENGTH_SCALE = 100.0;
 const double TOL_TREE_LENGTH_SCALE = 0.001;
 
 
-double PhyloTree::optimizeTreeLengthScaling(double &scaling, double gradient_epsilon) {
+double PhyloTree::optimizeTreeLengthScaling(double min_scaling, double &scaling, double max_scaling, double gradient_epsilon) {
     is_opt_scaling = true;
     current_scaling = scaling;
     double negative_lh, ferror;
-    scaling = minimizeOneDimen(min(current_scaling/2.0, MIN_TREE_LENGTH_SCALE), scaling, max(current_scaling*2.0, MAX_TREE_LENGTH_SCALE), max(TOL_TREE_LENGTH_SCALE, gradient_epsilon), &negative_lh, &ferror);
+    scaling = minimizeOneDimen(min(scaling, min_scaling), scaling, max(max_scaling, scaling), max(TOL_TREE_LENGTH_SCALE, gradient_epsilon), &negative_lh, &ferror);
     if (scaling != current_scaling) {
         scaleLength(scaling / current_scaling);
         current_scaling = scaling;
diff --git a/phylotree.h b/phylotree.h
index afee186..a2c2480 100644
--- a/phylotree.h
+++ b/phylotree.h
@@ -1057,7 +1057,7 @@ public:
         @param gradient_epsilon gradient epsilon
         @return optimal tree log-likelihood
     */
-    double optimizeTreeLengthScaling(double &scaling, double gradient_epsilon);
+    double optimizeTreeLengthScaling(double min_scaling, double &scaling, double max_scaling, double gradient_epsilon);
 
 
      /****************************************************************************
diff --git a/phylotreesse.cpp b/phylotreesse.cpp
index 8447044..885878d 100644
--- a/phylotreesse.cpp
+++ b/phylotreesse.cpp
@@ -1008,6 +1008,10 @@ double PhyloTree::computeLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloN
 				_pattern_lh[ptn] = lh_ptn;
 				tree_lh += lh_ptn * ptn_freq[ptn];
 			} else {
+                // bugfix 2016-01-21, prob_const can be rescaled
+                if (dad_branch->scale_num[ptn] >= 1)
+                    lh_ptn *= SCALING_THRESHOLD;
+//				_pattern_lh[ptn] = lh_ptn;
 				prob_const += lh_ptn;
 			}
 		}
@@ -1040,6 +1044,10 @@ double PhyloTree::computeLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloN
 				_pattern_lh[ptn] = lh_ptn;
 				tree_lh += lh_ptn * ptn_freq[ptn];
 			} else {
+                // bugfix 2016-01-21, prob_const can be rescaled
+                if (dad_branch->scale_num[ptn] + node_branch->scale_num[ptn] >= 1)
+                    lh_ptn *= SCALING_THRESHOLD;
+//				_pattern_lh[ptn] = lh_ptn;
 				prob_const += lh_ptn;
 			}
 		}
@@ -1072,6 +1080,10 @@ double PhyloTree::computeLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloN
 
     if (orig_nptn < nptn) {
     	// ascertainment bias correction
+        if (prob_const >= 1.0 || prob_const < 0.0) {
+            printTree(cout, WT_TAXON_ID + WT_BR_LEN + WT_NEWLINE);
+            model->writeInfo(cout);
+        }
         assert(prob_const < 1.0 && prob_const >= 0.0);
 
         // BQM 2015-10-11: fix this those functions using _pattern_lh_cat
@@ -2051,6 +2063,9 @@ double PhyloTree::computeMixrateLikelihoodBranchEigen(PhyloNeighbor *dad_branch,
 				_pattern_lh[ptn] = lh_ptn;
 				tree_lh += lh_ptn * ptn_freq[ptn];
 			} else {
+                // bugfix 2016-01-21, prob_const can be rescaled
+                if (dad_branch->scale_num[ptn] >= 1)
+                    lh_ptn *= SCALING_THRESHOLD;
 				prob_const += lh_ptn;
 			}
 		}
@@ -2083,6 +2098,9 @@ double PhyloTree::computeMixrateLikelihoodBranchEigen(PhyloNeighbor *dad_branch,
 				_pattern_lh[ptn] = lh_ptn;
 				tree_lh += lh_ptn * ptn_freq[ptn];
 			} else {
+                // bugfix 2016-01-21, prob_const can be rescaled
+                if (dad_branch->scale_num[ptn] + node_branch->scale_num[ptn] >= 1)
+                    lh_ptn *= SCALING_THRESHOLD;
 				prob_const += lh_ptn;
 			}
 		}
@@ -2681,6 +2699,9 @@ double PhyloTree::computeMixtureLikelihoodBranchEigen(PhyloNeighbor *dad_branch,
 				_pattern_lh[ptn] = lh_ptn;
 				tree_lh += lh_ptn * ptn_freq[ptn];
 			} else {
+                // bugfix 2016-01-21, prob_const can be rescaled
+                if (dad_branch->scale_num[ptn] >= 1)
+                    lh_ptn *= SCALING_THRESHOLD;
 				prob_const += lh_ptn;
 			}
 		}
@@ -2722,6 +2743,9 @@ double PhyloTree::computeMixtureLikelihoodBranchEigen(PhyloNeighbor *dad_branch,
 				_pattern_lh[ptn] = lh_ptn;
 				tree_lh += lh_ptn * ptn_freq[ptn];
 			} else {
+                // bugfix 2016-01-21, prob_const can be rescaled
+                if (dad_branch->scale_num[ptn] + node_branch->scale_num[ptn] >= 1)
+                    lh_ptn *= SCALING_THRESHOLD;
 				prob_const += lh_ptn;
 			}
 		}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/iqtree.git



More information about the debian-med-commit mailing list