[med-svn] [iqtree] 01/06: New upstream version 1.5.3+dfsg

Andreas Tille tille at debian.org
Thu Jan 19 07:50:01 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository iqtree.

commit 173db0cf39ac0403d695f62b2493f420ed3d2edd
Author: Andreas Tille <tille at debian.org>
Date:   Thu Jan 19 08:31:10 2017 +0100

    New upstream version 1.5.3+dfsg
---
 CMakeLists.txt         |   2 +-
 alignment.cpp          |  52 ++++++++++++++++++++-----
 iqtree.cpp             |  16 ++++----
 model/modelfactory.cpp | 104 ++++++++++++++++++++++++++++++-------------------
 model/modelfactory.h   |   5 +--
 model/modelset.cpp     |  10 +++++
 model/ratefree.cpp     |   2 +-
 phyloanalysis.cpp      |  10 +++--
 phylokernelnew.h       |  45 +++++++++++----------
 phylotree.cpp          |   4 +-
 phylotreesse.cpp       |  35 +++++++++++++++--
 tools.cpp              |   3 ++
 12 files changed, 197 insertions(+), 91 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 767f309..6acf40b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,7 +50,7 @@ add_definitions(-DIQ_TREE)
 # The version number.
 set (iqtree_VERSION_MAJOR 1)
 set (iqtree_VERSION_MINOR 5)
-set (iqtree_VERSION_PATCH "2")
+set (iqtree_VERSION_PATCH "3")
 
 set(BUILD_SHARED_LIBS OFF)
 
diff --git a/alignment.cpp b/alignment.cpp
index f935d2b..44f0146 100644
--- a/alignment.cpp
+++ b/alignment.cpp
@@ -376,7 +376,8 @@ Alignment::Alignment(char *filename, char *sequence_type, InputType &intype) : v
     countConstSite();
 
     cout << "Alignment has " << getNSeq() << " sequences with " << getNSite() <<
-         " columns and " << getNPattern() << " patterns (" << num_informative_sites << " informative sites)" << endl;
+         " columns and " << getNPattern() << " patterns (" << num_informative_sites << " informative sites, " <<
+         (int)(frac_const_sites*getNSite()) << " constant sites)" << endl;
     buildSeqStates();
     checkSeqName();
     // OBSOLETE: identical sequences are handled later
@@ -617,10 +618,11 @@ void Alignment::computeConst(Pattern &pat) {
     bool is_informative = false;
     // critical fix: const_char was set wrongly to num_states in some data type (binary, codon),
     // causing wrong log-likelihood computation for +I or +I+G model
-    if (STATE_UNKNOWN == num_states)
-    	pat.const_char = STATE_UNKNOWN+1;
-    else
-    	pat.const_char = STATE_UNKNOWN;
+    pat.const_char = STATE_UNKNOWN+1;
+//    if (STATE_UNKNOWN == num_states)
+//    	pat.const_char = STATE_UNKNOWN+1;
+//    else
+//    	pat.const_char = STATE_UNKNOWN;
     StateBitset state_app;
     state_app.reset();
     int j;
@@ -628,7 +630,7 @@ void Alignment::computeConst(Pattern &pat) {
     	state_app[j] = 1;
 
     // number of appearance for each state, to compute is_informative
-    size_t *num_app = new size_t[num_states];
+    size_t num_app[num_states];
     memset(num_app, 0, num_states*sizeof(size_t));
 
     for (Pattern::iterator i = pat.begin(); i != pat.end(); i++) {
@@ -637,10 +639,11 @@ void Alignment::computeConst(Pattern &pat) {
     	state_app &= this_app;
         if (*i < num_states) { 
             num_app[(int)(*i)]++;
-        } else if (*i != STATE_UNKNOWN) {
-            // ambiguous characters
-            is_const = false;
         }
+//        else if (*i != STATE_UNKNOWN) {
+//            // ambiguous characters
+//            is_const = false;
+//        }
     }
     int count = 0; // number of states with >= 2 appearances
     pat.num_chars = 0; // number of states with >= 1 appearance
@@ -655,6 +658,7 @@ void Alignment::computeConst(Pattern &pat) {
     is_informative = (count >= 2);
 
     // compute is_const
+    /*
     is_const = is_const && (pat.num_chars <= 1); 
     if (is_const) {
         if (pat.num_chars == 0) // all-gap pattern
@@ -668,8 +672,36 @@ void Alignment::computeConst(Pattern &pat) {
                 }
         }
     }
+    */
+    is_const = (state_app.count() >= 1);
+    if (is_const) {
+        if (state_app.count() == num_states) {
+            pat.const_char = STATE_UNKNOWN;
+        } else if (state_app.count() == 1) {
+            for (j = 0; j < num_states; j++)
+                if (state_app[j]) {
+                    pat.const_char = j;
+                    break;
+                }
+        } else if (seq_type == SEQ_DNA) {
+            pat.const_char = num_states-1;
+            for (j = 0; j < num_states; j++)
+                if (state_app[j])
+                    pat.const_char += (1<<j);
+        } else if (seq_type == SEQ_PROTEIN) {
+            if (state_app[2] && state_app[3]) //4+8, // B = N or D
+                pat.const_char = num_states;
+            else if (state_app[5] && state_app[6]) //32+64, // Z = Q or E
+                pat.const_char = num_states+1;
+            else if (state_app[9] && state_app[10]) // 512+1024 // U = I or L
+                pat.const_char = num_states+2;
+            else ASSERT(0);
+        } else {
+            ASSERT(0);
+        }
+    }
 
-    delete [] num_app;
+//    delete [] num_app;
 
     // compute is_invariant
     is_invariant = (state_app.count() >= 1);
diff --git a/iqtree.cpp b/iqtree.cpp
index 979bc4a..e688afc 100644
--- a/iqtree.cpp
+++ b/iqtree.cpp
@@ -309,18 +309,20 @@ void IQTree::initSettings(Params &params) {
 //        max_candidate_trees = aln->getNSeq() * params.step_iterations;
     setRootNode(params.root);
 
-    string bootaln_name = params.out_prefix;
-    bootaln_name += ".bootaln";
-    if (params.print_bootaln) {
-        ofstream bootalnout;
-    	bootalnout.open(bootaln_name.c_str());
-    	bootalnout.close();
-    }
     size_t i;
 
     if (params.online_bootstrap && params.gbo_replicates > 0) {
         if (aln->getNSeq() < 4)
             outError("It makes no sense to perform bootstrap with less than 4 sequences.");
+
+        string bootaln_name = params.out_prefix;
+        bootaln_name += ".bootaln";
+        if (params.print_bootaln) {
+            ofstream bootalnout;
+            bootalnout.open(bootaln_name.c_str());
+            bootalnout.close();
+        }
+
         // 2015-12-17: initialize random stream for creating bootstrap samples
         // mainly so that checkpointing does not need to save bootstrap samples
         int *saved_randstream = randstream;
diff --git a/model/modelfactory.cpp b/model/modelfactory.cpp
index 88cb0ab..ae6a2e6 100644
--- a/model/modelfactory.cpp
+++ b/model/modelfactory.cpp
@@ -605,7 +605,7 @@ int ModelFactory::getNParameters() {
 	int df = model->getNDim() + model->getNDimFreq() + site_rate->getNDim() + site_rate->phylo_tree->branchNum;
 	return df;
 }
-
+/*
 double ModelFactory::initGTRGammaIParameters(RateHeterogeneity *rate, ModelSubst *model, double initAlpha,
                                            double initPInvar, double *initRates, double *initStateFreqs)  {
 
@@ -619,7 +619,7 @@ double ModelFactory::initGTRGammaIParameters(RateHeterogeneity *rate, ModelSubst
     site_rate->phylo_tree->clearAllPartialLH();
     return site_rate->phylo_tree->computeLikelihood();
 }
-
+*/
 double ModelFactory::optimizeParametersOnly(double gradient_epsilon) {
 	double logl;
 	/* Optimize substitution and heterogeneity rates independently */
@@ -700,16 +700,20 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info
 	DoubleVector bestLens;
 	tree->saveBranchLengths(initBranLens);
     bestLens = initBranLens;
-	int numRateEntries = tree->getModel()->getNumRateEntries();
-	double *rates = new double[numRateEntries];
-	double *bestRates = new double[numRateEntries];
-	tree->getModel()->getRateMatrix(rates);
-	int numStates = tree->aln->num_states;
-	double *state_freqs = new double[numStates];
-	tree->getModel()->getStateFrequency(state_freqs);
+//	int numRateEntries = tree->getModel()->getNumRateEntries();
+    Checkpoint *model_ckp = new Checkpoint;
+    Checkpoint *best_ckp = new Checkpoint;
+    Checkpoint *saved_ckp = model->getCheckpoint();
+    *model_ckp = *saved_ckp;
+//	double *rates = new double[numRateEntries];
+//	double *bestRates = new double[numRateEntries];
+//	tree->getModel()->getRateMatrix(rates);
+//	int numStates = tree->aln->num_states;
+//	double *state_freqs = new double[numStates];
+//	tree->getModel()->getStateFrequency(state_freqs);
 
 	/* Best estimates found */
-	double *bestStateFreqs =  new double[numStates];
+//	double *bestStateFreqs =  new double[numStates];
 	double bestLogl = -DBL_MAX;
 	double bestAlpha = 0.0;
 	double bestPInvar = 0.0;
@@ -727,8 +731,8 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info
                 cout << "Testing with init. pinv = " << initPInv << " / init. alpha = "  << initAlpha << endl;
             }
 
-            vector<double> estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon, tree, site_rate, rates, state_freqs,
-                                                                   initPInv, initAlpha, initBranLens);
+            vector<double> estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon,
+                                                                   initPInv, initAlpha, initBranLens, model_ckp);
 
 
             if (write_info) {
@@ -742,8 +746,13 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info
                 bestPInvar = estResults[0];
                 bestLens.clear();
                 tree->saveBranchLengths(bestLens);
-                tree->getModel()->getRateMatrix(bestRates);
-                tree->getModel()->getStateFrequency(bestStateFreqs);
+                model->setCheckpoint(best_ckp);
+                model->saveCheckpoint();
+                model->setCheckpoint(saved_ckp);
+//                *best_ckp = *saved_ckp;
+
+//                tree->getModel()->getRateMatrix(bestRates);
+//                tree->getModel()->getStateFrequency(bestStateFreqs);
                 if (estResults[0] < initPInv) {
                     initPInv = estResults[0] - testInterval;
                     if (initPInv < 0.0)
@@ -767,11 +776,11 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info
             }
             vector<double> estResults; // vector of p_inv, alpha and logl
             if (Params::getInstance().opt_gammai_keep_bran)
-                estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon, tree, site_rate, rates, state_freqs,
-                                                                          initPInv, initAlpha, bestLens);
+                estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon,
+                                                                          initPInv, initAlpha, bestLens, model_ckp);
             else
-                estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon, tree, site_rate, rates, state_freqs,
-                                                                      initPInv, initAlpha, initBranLens);
+                estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon,
+                                                                      initPInv, initAlpha, initBranLens, model_ckp);
             if (write_info) {
                 cout << "Est. p_inv: " << estResults[0] << " / Est. gamma shape: " << estResults[1]
                 << " / Logl: " << estResults[2] << endl;
@@ -785,33 +794,44 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info
                 bestPInvar = estResults[0];
                 bestLens.clear();
                 tree->saveBranchLengths(bestLens);
-                tree->getModel()->getRateMatrix(bestRates);
-                tree->getModel()->getStateFrequency(bestStateFreqs);
+                model->setCheckpoint(best_ckp);
+                model->saveCheckpoint();
+                model->setCheckpoint(saved_ckp);
+//                *best_ckp = *saved_ckp;
+
+//                tree->getModel()->getRateMatrix(bestRates);
+//                tree->getModel()->getStateFrequency(bestStateFreqs);
             }
         }
     }
 
     site_rate->setGammaShape(bestAlpha);
     site_rate->setPInvar(bestPInvar);
-	((ModelGTR*) tree->getModel())->setRateMatrix(bestRates);
-	((ModelGTR*) tree->getModel())->setStateFrequency(bestStateFreqs);
+    model->setCheckpoint(best_ckp);
+    model->restoreCheckpoint();
+    model->setCheckpoint(saved_ckp);
+//	((ModelGTR*) tree->getModel())->setRateMatrix(bestRates);
+//	((ModelGTR*) tree->getModel())->setStateFrequency(bestStateFreqs);
 	tree->restoreBranchLengths(bestLens);
-	tree->getModel()->decomposeRateMatrix();
+//	tree->getModel()->decomposeRateMatrix();
 
 	tree->clearAllPartialLH();
 	tree->setCurScore(tree->computeLikelihood());
-    assert(fabs(tree->getCurScore() - bestLogl) < 1.0);
-    if (write_info) {    
+    if (write_info) {
         cout << endl;
         cout << "Best p_inv: " << bestPInvar << " / best gamma shape: " << bestAlpha << " / ";
         cout << "Logl: " << tree->getCurScore() << endl;
     }
+    assert(fabs(tree->getCurScore() - bestLogl) < 1.0);
+
+//	delete [] rates;
+//	delete [] state_freqs;
+//	delete [] bestRates;
+//	delete [] bestStateFreqs;
+
+    delete model_ckp;
+    delete best_ckp;
 
-	delete [] rates;
-	delete [] state_freqs;
-	delete [] bestRates;
-	delete [] bestStateFreqs;
-    
 	double elapsed_secs = getRealTime() - begin_time;
 	if (write_info)
 		cout << "Parameters optimization took " << elapsed_secs << " sec" << endl;
@@ -824,21 +844,25 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info
 }
 
 vector<double> ModelFactory::optimizeGammaInvWithInitValue(int fixed_len, double logl_epsilon, double gradient_epsilon,
-                                                 PhyloTree *tree, RateHeterogeneity *site_rates, double *rates,
-                                                 double *state_freqs, double initPInv, double initAlpha,
-                                                 DoubleVector &lenvec) {
+                                                 double initPInv, double initAlpha,
+                                                 DoubleVector &lenvec, Checkpoint *model_ckp) {
+    PhyloTree *tree = site_rate->getTree();
     tree->restoreBranchLengths(lenvec);
-    ((ModelGTR*) tree->getModel())->setRateMatrix(rates);
-    ((ModelGTR*) tree->getModel())->setStateFrequency(state_freqs);
-    tree->getModel()->decomposeRateMatrix();
-    site_rates->setPInvar(initPInv);
-    site_rates->setGammaShape(initAlpha);
+    Checkpoint *saved_ckp = model->getCheckpoint();
+    model->setCheckpoint(model_ckp);
+    model->restoreCheckpoint();
+    model->setCheckpoint(saved_ckp);
+//    ((ModelGTR*) tree->getModel())->setRateMatrix(rates);
+//    ((ModelGTR*) tree->getModel())->setStateFrequency(state_freqs);
+//    tree->getModel()->decomposeRateMatrix();
+    site_rate->setPInvar(initPInv);
+    site_rate->setGammaShape(initAlpha);
     tree->clearAllPartialLH();
     optimizeParameters(fixed_len, false, logl_epsilon, gradient_epsilon);
 
     vector<double> estResults;
-    double estPInv = tree->getRate()->getPInvar();
-    double estAlpha = tree->getRate()->getGammaShape();
+    double estPInv = site_rate->getPInvar();
+    double estAlpha = site_rate->getGammaShape();
     double logl = tree->getCurScore();
     estResults.push_back(estPInv);
     estResults.push_back(estAlpha);
diff --git a/model/modelfactory.h b/model/modelfactory.h
index 5218f0b..9cdabf3 100644
--- a/model/modelfactory.h
+++ b/model/modelfactory.h
@@ -257,9 +257,8 @@ protected:
 	*/
 	virtual bool getVariables(double *variables);
 
-    vector<double> optimizeGammaInvWithInitValue(int fixed_len, double logl_epsilon, double gradient_epsilon, PhyloTree *tree,
-                                       RateHeterogeneity *site_rates, double *rates, double *state_freqs,
-                                       double initPInv, double initAlpha, DoubleVector &lenvec);
+    vector<double> optimizeGammaInvWithInitValue(int fixed_len, double logl_epsilon, double gradient_epsilon,
+                                       double initPInv, double initAlpha, DoubleVector &lenvec, Checkpoint *model_ckp);
 };
 
 #endif
diff --git a/model/modelset.cpp b/model/modelset.cpp
index 83feabb..bcc109f 100644
--- a/model/modelset.cpp
+++ b/model/modelset.cpp
@@ -158,6 +158,16 @@ void ModelSet::decomposeRateMatrix()
 	size_t vsize = phylo_tree->vector_size;
 	size_t states2 = num_states*num_states;
 	size_t ptn, i, x;
+
+    size_t max_size = get_safe_upper_limit(size());
+
+    // copy dummy values
+    for (size_t m = size(); m < max_size; m++) {
+        memcpy(&eigenvalues[m*num_states], &eigenvalues[(m-1)*num_states], sizeof(double)*num_states);
+        memcpy(&eigenvectors[m*states2], &eigenvectors[(m-1)*states2], sizeof(double)*states2);
+        memcpy(&inv_eigenvectors[m*states2], &inv_eigenvectors[(m-1)*states2], sizeof(double)*states2);
+    }
+
     double new_eval[num_states*vsize];
     double new_evec[states2*vsize];
     double new_inv_evec[states2*vsize];
diff --git a/model/ratefree.cpp b/model/ratefree.cpp
index 60c302f..c31dcba 100644
--- a/model/ratefree.cpp
+++ b/model/ratefree.cpp
@@ -210,7 +210,7 @@ double RateFree::optimizeParameters(double gradient_epsilon) {
 		cout << "Optimizing " << name << " model parameters by " << optimize_alg << " algorithm..." << endl;
 
     // TODO: turn off EM algorithm for +ASC model
-    if ((optimize_alg.find("EM") != string::npos && phylo_tree->getModelFactory()->unobserved_ptns.empty()) || getPInvar() <= MIN_PINVAR)
+    if ((optimize_alg.find("EM") != string::npos && phylo_tree->getModelFactory()->unobserved_ptns.empty()))
         return optimizeWithEM();
 
 	//if (freq_type == FREQ_ESTIMATE) scaleStateFreq(false);
diff --git a/phyloanalysis.cpp b/phyloanalysis.cpp
index 7017887..1e71528 100644
--- a/phyloanalysis.cpp
+++ b/phyloanalysis.cpp
@@ -1546,7 +1546,7 @@ void printMiscInfo(Params &params, IQTree &iqtree, double *pattern_lh) {
         printAncestralSequences(params.out_prefix, &iqtree, params.print_ancestral_sequence);
     }
     
-    if (params.print_site_state_freq != WSF_NONE) {
+    if (params.print_site_state_freq != WSF_NONE && !params.site_freq_file && !params.tree_freq_file) {
 		string site_freq_file = params.out_prefix;
 		site_freq_file += ".sitesf";
         printSiteStateFreq(site_freq_file.c_str(), &iqtree);
@@ -2409,8 +2409,12 @@ void runStandardBootstrap(Params &params, string &original_model, Alignment *ali
 			}
 		} else
 			boot_tree = new IQTree(bootstrap_alignment);
-		if (params.print_bootaln && MPIHelper::getInstance().isMaster())
-			bootstrap_alignment->printPhylip(bootaln_name.c_str(), true);
+		if (params.print_bootaln && MPIHelper::getInstance().isMaster()) {
+            if (bootstrap_alignment->isSuperAlignment())
+                ((SuperAlignment*)bootstrap_alignment)->printCombinedAlignment(bootaln_name.c_str(), true);
+            else
+                bootstrap_alignment->printPhylip(bootaln_name.c_str(), true);
+        }
 
         // set checkpoint
         boot_tree->setCheckpoint(tree->getCheckpoint());
diff --git a/phylokernelnew.h b/phylokernelnew.h
index 11c29eb..b68e46a 100644
--- a/phylokernelnew.h
+++ b/phylokernelnew.h
@@ -566,7 +566,7 @@ inline void scaleLikelihood(VectorClass &lh_max, double *invar, double *dad_part
 {
     if (SAFE_NUMERIC) {
         size_t x, i;
-        auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(invar) == 0.0));
+        auto underflown = ((lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(invar) == 0.0));
         if (horizontal_or(underflown)) { // at least one site has numerical underflown
             for (x = 0; x < VectorClass::size(); x++)
             if (underflown[x]) {
@@ -580,7 +580,7 @@ inline void scaleLikelihood(VectorClass &lh_max, double *invar, double *dad_part
         }
     } else {
         size_t x, i;
-        auto underflown = (lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(invar) == 0.0);
+        auto underflown = (lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(invar) == 0.0);
         if (horizontal_or(underflown)) { // at least one site has numerical underflown
             size_t block = ncat_mix * nstates;
             for (x = 0; x < VectorClass::size(); x++)
@@ -1286,7 +1286,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
                 }
                 // check if one should scale partial likelihoods
                 if (SAFE_NUMERIC) {
-                    auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
+                    auto underflown = ((lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
                     if (horizontal_or(underflown)) { // at least one site has numerical underflown
                         for (x = 0; x < VectorClass::size(); x++)
                         if (underflown[x]) {
@@ -1304,7 +1304,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
             }
 
             if (!SAFE_NUMERIC) {
-                auto underflown = (lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0);
+                auto underflown = (lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0);
                 if (horizontal_or(underflown)) { // at least one site has numerical underflown
                     for (x = 0; x < VectorClass::size(); x++)
                     if (underflown[x]) {
@@ -1474,7 +1474,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
 #endif
                     // check if one should scale partial likelihoods
                     if (SAFE_NUMERIC) {
-                        auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
+                        auto underflown = ((lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
                         if (horizontal_or(underflown)) { // at least one site has numerical underflown
                             for (x = 0; x < VectorClass::size(); x++)
                             if (underflown[x]) {
@@ -1537,7 +1537,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
     #endif
                     // check if one should scale partial likelihoods
                     if (SAFE_NUMERIC) {
-                        auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
+                        auto underflown = ((lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
                         if (horizontal_or(underflown)) { // at least one site has numerical underflown
                             for (x = 0; x < VectorClass::size(); x++)
                             if (underflown[x]) {
@@ -1557,7 +1557,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
             } // IF SITE_MODEL
 
             if (!SAFE_NUMERIC) {
-                auto underflown = (lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0);
+                auto underflown = (lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0);
                 if (horizontal_or(underflown)) { // at least one site has numerical underflown
                     for (x = 0; x < VectorClass::size(); x++)
                     if (underflown[x]) {
@@ -1660,7 +1660,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
 
                 // check if one should scale partial likelihoods
                 if (SAFE_NUMERIC) {
-                    auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
+                    auto underflown = ((lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
                     if (horizontal_or(underflown))
                         for (x = 0; x < VectorClass::size(); x++)
                         if (underflown[x]) {
@@ -1682,7 +1682,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
 
             if (!SAFE_NUMERIC) {
                 // check if one should scale partial likelihoods
-                auto underflown = (lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0);
+                auto underflown = (lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0);
                 if (horizontal_or(underflown)) { // at least one site has numerical underflown
                     for (x = 0; x < VectorClass::size(); x++)
                     if (underflown[x]) {
@@ -2024,7 +2024,7 @@ void PhyloTree::computeLikelihoodDervGenericSIMD(PhyloNeighbor *dad_branch, Phyl
         #endif
         
         for (ptn = ptn_lower; ptn < ptn_upper; ptn+=VectorClass::size()) {
-            VectorClass lh_ptn;
+            VectorClass lh_ptn(0.0);
             //lh_ptn.load_a(&ptn_invar[ptn]);
             VectorClass *theta = (VectorClass*)(theta_all + ptn*block);
             VectorClass df_ptn, ddf_ptn;
@@ -2055,7 +2055,8 @@ void PhyloTree::computeLikelihoodDervGenericSIMD(PhyloNeighbor *dad_branch, Phyl
                 dotProductTriple<VectorClass, double, FMA>(val0, val1, val2, theta, lh_ptn, df_ptn, ddf_ptn, block, nstates);
         #endif
             }
-            lh_ptn = abs(lh_ptn + VectorClass().load_a(&ptn_invar[ptn]));
+            // Sum later to avoid underflow of invariant sites
+            lh_ptn = abs(lh_ptn) + VectorClass().load_a(&ptn_invar[ptn]);
             
             if (ptn < orig_nptn) {
                 lh_ptn = 1.0 / lh_ptn;
@@ -2319,8 +2320,8 @@ double PhyloTree::computeLikelihoodBranchGenericSIMD(PhyloNeighbor *dad_branch,
             double *vec_tip = buffer_partial_lh_ptr + block*VectorClass::size()*thread_id;
 
             for (ptn = ptn_lower; ptn < ptn_upper; ptn+=VectorClass::size()) {
-                VectorClass lh_ptn;
-                lh_ptn.load_a(&ptn_invar[ptn]);
+                VectorClass lh_ptn(0.0);
+//                lh_ptn.load_a(&ptn_invar[ptn]);
                 VectorClass *lh_cat = (VectorClass*)(_pattern_lh_cat + ptn*ncat_mix);
                 VectorClass *partial_lh_dad = (VectorClass*)(dad_branch->partial_lh + ptn*block);
                 VectorClass *lh_node = SITE_MODEL ? (VectorClass*)&partial_lh_node[ptn*nstates] : (VectorClass*)vec_tip;
@@ -2411,7 +2412,8 @@ double PhyloTree::computeLikelihoodBranchGenericSIMD(PhyloNeighbor *dad_branch,
                 }
                 vc_min_scale *= LOG_SCALING_THRESHOLD;
 
-                lh_ptn = abs(lh_ptn);
+                // Sum later to avoid underflow of invariant sites
+                lh_ptn = abs(lh_ptn) + VectorClass().load_a(&ptn_invar[ptn]);
                 if (ptn < orig_nptn) {
                     lh_ptn = log(lh_ptn) + vc_min_scale;
                     lh_ptn.store_a(&_pattern_lh[ptn]);
@@ -2466,8 +2468,8 @@ double PhyloTree::computeLikelihoodBranchGenericSIMD(PhyloNeighbor *dad_branch,
                 computePartialLikelihood(*it, ptn_lower, ptn_upper, thread_id);
 
             for (ptn = ptn_lower; ptn < ptn_upper; ptn+=VectorClass::size()) {
-                VectorClass lh_ptn;
-                lh_ptn.load_a(&ptn_invar[ptn]);
+                VectorClass lh_ptn(0.0);
+//                lh_ptn.load_a(&ptn_invar[ptn]);
                 VectorClass *lh_cat = (VectorClass*)(_pattern_lh_cat + ptn*ncat_mix);
                 VectorClass *partial_lh_dad = (VectorClass*)(dad_branch->partial_lh + ptn*block);
                 VectorClass *partial_lh_node = (VectorClass*)(node_branch->partial_lh + ptn*block);
@@ -2541,7 +2543,8 @@ double PhyloTree::computeLikelihoodBranchGenericSIMD(PhyloNeighbor *dad_branch,
                 } // if SAFE_NUMERIC
                 vc_min_scale *= LOG_SCALING_THRESHOLD;
 
-                lh_ptn = abs(lh_ptn);
+                // Sum later to avoid underflow of invariant sites
+                lh_ptn = abs(lh_ptn) + VectorClass().load_a(&ptn_invar[ptn]);
 
                 if (ptn < orig_nptn) {
                     lh_ptn = log(lh_ptn) + vc_min_scale;
@@ -2708,11 +2711,11 @@ double PhyloTree::computeLikelihoodFromBufferGenericSIMD()
 #pragma omp for schedule(static) nowait
 #endif
     for (ptn = 0; ptn < nptn; ptn+=VectorClass::size()) {
-		VectorClass lh_ptn;
+		VectorClass lh_ptn(0.0);
 		VectorClass *theta = (VectorClass*)(theta_all + ptn*block);
         if (SITE_MODEL) {
             VectorClass *eval_ptr = (VectorClass*)&eval[ptn*nstates];
-            lh_ptn.load_a(&ptn_invar[ptn]);
+//            lh_ptn.load_a(&ptn_invar[ptn]);
             for (c = 0; c < ncat; c++) {
                 VectorClass lh_cat;
 #ifdef KERNEL_FIX_STATES
@@ -2725,9 +2728,11 @@ double PhyloTree::computeLikelihoodFromBufferGenericSIMD()
             }
         } else {
             dotProductVec<VectorClass, double, FMA>(val0, theta, lh_ptn, block);
-            lh_ptn += VectorClass().load_a(&ptn_invar[ptn]);
         }
 
+        // Sum later to avoid underflow of invariant sites
+        lh_ptn = abs(lh_ptn) + VectorClass().load_a(&ptn_invar[ptn]);
+
         if (ptn < orig_nptn) {
             lh_ptn = log(abs(lh_ptn)) + VectorClass().load_a(&buffer_scale_all[ptn]);
             lh_ptn.store_a(&_pattern_lh[ptn]);
diff --git a/phylotree.cpp b/phylotree.cpp
index 5cd4c55..ffa9886 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -692,8 +692,8 @@ size_t PhyloTree::getBufferPartialLhSize() {
     const size_t VECTOR_SIZE = 8; // TODO, adjusted
     size_t ncat_mix = site_rate->getNRate() * ((model_factory->fused_mix_rate)? 1 : model->getNMixtures());
     size_t block = model->num_states * ncat_mix;
-    size_t buffer_size = get_safe_upper_limit(block * model->num_states * 2 * aln->getNSeq());
-    buffer_size += get_safe_upper_limit(block * (aln->getNSeq()+1) * (aln->STATE_UNKNOWN+1));
+    size_t buffer_size = get_safe_upper_limit(block * model->num_states * 2) * aln->getNSeq();
+    buffer_size += get_safe_upper_limit(block *(aln->STATE_UNKNOWN+1)) * (aln->getNSeq()+1);
     buffer_size += (block*2+model->num_states)*VECTOR_SIZE*num_threads;
     return buffer_size;
 }
diff --git a/phylotreesse.cpp b/phylotreesse.cpp
index b0f9c01..32f3ecb 100644
--- a/phylotreesse.cpp
+++ b/phylotreesse.cpp
@@ -230,7 +230,7 @@ void PhyloTree::computeTipPartialLikelihood() {
 
                 double *inv_evec = &model->getInverseEigenvectors()[ptn*nstates*nstates];
                 for (v = 0; v < vector_size; v++) {
-                    int state = aln->STATE_UNKNOWN;
+                    int state = 0;
                     if (ptn+v < nptn)
                         state = aln->at(ptn+v)[nodeid];
     //                double *partial_lh = node_partial_lh + ptn*nstates;
@@ -395,6 +395,13 @@ void PhyloTree::computePtnInvar() {
 	size_t nptn = aln->getNPattern(), ptn;
 	size_t maxptn = get_safe_upper_limit(nptn)+get_safe_upper_limit(model_factory->unobserved_ptns.size());
 	int nstates = aln->num_states;
+    int x;
+    // ambiguous characters
+    int ambi_aa[] = {
+        4+8, // B = N or D
+        32+64, // Z = Q or E
+        512+1024 // U = I or L
+    };
 
     double *state_freq = aligned_alloc<double>(nstates);
     model->getStateFrequency(state_freq);
@@ -402,11 +409,31 @@ void PhyloTree::computePtnInvar() {
 	double p_invar = site_rate->getPInvar();
 	if (p_invar != 0.0) {
 		for (ptn = 0; ptn < nptn; ptn++) {
-			if ((*aln)[ptn].const_char == nstates)
+            if ((*aln)[ptn].const_char > aln->STATE_UNKNOWN)
+                continue;
+
+			if ((*aln)[ptn].const_char == aln->STATE_UNKNOWN) {
 				ptn_invar[ptn] = p_invar;
-			else if ((*aln)[ptn].const_char < nstates) {
+			} else if ((*aln)[ptn].const_char < nstates) {
 				ptn_invar[ptn] = p_invar * state_freq[(int) (*aln)[ptn].const_char];
-			}
+			} else if (aln->seq_type == SEQ_DNA) {
+                // 2016-12-21: handling ambiguous state
+                ptn_invar[ptn] = 0.0;
+                int cstate = (*aln)[ptn].const_char-nstates+1;
+                for (x = 0; x < nstates; x++) {
+                    if ((cstate) & (1 << x))
+                        ptn_invar[ptn] += state_freq[x];
+                }
+                ptn_invar[ptn] *= p_invar;
+            } else if (aln->seq_type == SEQ_PROTEIN) {
+                ptn_invar[ptn] = 0.0;
+                int cstate = (*aln)[ptn].const_char-nstates;
+                assert(cstate <= 2);
+                for (x = 0; x < 11; x++)
+                    if (ambi_aa[cstate] & (1 << x))
+                        ptn_invar[ptn] += state_freq[x];
+                ptn_invar[ptn] *= p_invar;
+            } else ASSERT(0);
 		}
 //		// ascertmain bias correction
 //		for (ptn = 0; ptn < model_factory->unobserved_ptns.size(); ptn++)
diff --git a/tools.cpp b/tools.cpp
index 4e5af85..fecb07f 100644
--- a/tools.cpp
+++ b/tools.cpp
@@ -3221,6 +3221,9 @@ void parseArg(int argc, char *argv[], Params &params) {
     if (params.lh_mem_save == LM_MEM_SAVE && params.partition_file)
         outError("-mem option does not work with partition models yet");
 
+    if (params.gbo_replicates && params.num_bootstrap_samples)
+        outError("UFBoot (-bb) and standard bootstrap (-b) must not be specified together");
+
     if (!params.out_prefix) {
     	if (params.eco_dag_file)
     		params.out_prefix = params.eco_dag_file;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/iqtree.git



More information about the debian-med-commit mailing list