[med-svn] [iqtree] 01/06: New upstream version 1.5.3+dfsg
Andreas Tille
tille at debian.org
Thu Jan 19 07:50:01 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository iqtree.
commit 173db0cf39ac0403d695f62b2493f420ed3d2edd
Author: Andreas Tille <tille at debian.org>
Date: Thu Jan 19 08:31:10 2017 +0100
New upstream version 1.5.3+dfsg
---
CMakeLists.txt | 2 +-
alignment.cpp | 52 ++++++++++++++++++++-----
iqtree.cpp | 16 ++++----
model/modelfactory.cpp | 104 ++++++++++++++++++++++++++++++-------------------
model/modelfactory.h | 5 +--
model/modelset.cpp | 10 +++++
model/ratefree.cpp | 2 +-
phyloanalysis.cpp | 10 +++--
phylokernelnew.h | 45 +++++++++++----------
phylotree.cpp | 4 +-
phylotreesse.cpp | 35 +++++++++++++++--
tools.cpp | 3 ++
12 files changed, 197 insertions(+), 91 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 767f309..6acf40b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,7 +50,7 @@ add_definitions(-DIQ_TREE)
# The version number.
set (iqtree_VERSION_MAJOR 1)
set (iqtree_VERSION_MINOR 5)
-set (iqtree_VERSION_PATCH "2")
+set (iqtree_VERSION_PATCH "3")
set(BUILD_SHARED_LIBS OFF)
diff --git a/alignment.cpp b/alignment.cpp
index f935d2b..44f0146 100644
--- a/alignment.cpp
+++ b/alignment.cpp
@@ -376,7 +376,8 @@ Alignment::Alignment(char *filename, char *sequence_type, InputType &intype) : v
countConstSite();
cout << "Alignment has " << getNSeq() << " sequences with " << getNSite() <<
- " columns and " << getNPattern() << " patterns (" << num_informative_sites << " informative sites)" << endl;
+ " columns and " << getNPattern() << " patterns (" << num_informative_sites << " informative sites, " <<
+ (int)(frac_const_sites*getNSite()) << " constant sites)" << endl;
buildSeqStates();
checkSeqName();
// OBSOLETE: identical sequences are handled later
@@ -617,10 +618,11 @@ void Alignment::computeConst(Pattern &pat) {
bool is_informative = false;
// critical fix: const_char was set wrongly to num_states in some data type (binary, codon),
// causing wrong log-likelihood computation for +I or +I+G model
- if (STATE_UNKNOWN == num_states)
- pat.const_char = STATE_UNKNOWN+1;
- else
- pat.const_char = STATE_UNKNOWN;
+ pat.const_char = STATE_UNKNOWN+1;
+// if (STATE_UNKNOWN == num_states)
+// pat.const_char = STATE_UNKNOWN+1;
+// else
+// pat.const_char = STATE_UNKNOWN;
StateBitset state_app;
state_app.reset();
int j;
@@ -628,7 +630,7 @@ void Alignment::computeConst(Pattern &pat) {
state_app[j] = 1;
// number of appearance for each state, to compute is_informative
- size_t *num_app = new size_t[num_states];
+ size_t num_app[num_states];
memset(num_app, 0, num_states*sizeof(size_t));
for (Pattern::iterator i = pat.begin(); i != pat.end(); i++) {
@@ -637,10 +639,11 @@ void Alignment::computeConst(Pattern &pat) {
state_app &= this_app;
if (*i < num_states) {
num_app[(int)(*i)]++;
- } else if (*i != STATE_UNKNOWN) {
- // ambiguous characters
- is_const = false;
}
+// else if (*i != STATE_UNKNOWN) {
+// // ambiguous characters
+// is_const = false;
+// }
}
int count = 0; // number of states with >= 2 appearances
pat.num_chars = 0; // number of states with >= 1 appearance
@@ -655,6 +658,7 @@ void Alignment::computeConst(Pattern &pat) {
is_informative = (count >= 2);
// compute is_const
+ /*
is_const = is_const && (pat.num_chars <= 1);
if (is_const) {
if (pat.num_chars == 0) // all-gap pattern
@@ -668,8 +672,36 @@ void Alignment::computeConst(Pattern &pat) {
}
}
}
+ */
+ is_const = (state_app.count() >= 1);
+ if (is_const) {
+ if (state_app.count() == num_states) {
+ pat.const_char = STATE_UNKNOWN;
+ } else if (state_app.count() == 1) {
+ for (j = 0; j < num_states; j++)
+ if (state_app[j]) {
+ pat.const_char = j;
+ break;
+ }
+ } else if (seq_type == SEQ_DNA) {
+ pat.const_char = num_states-1;
+ for (j = 0; j < num_states; j++)
+ if (state_app[j])
+ pat.const_char += (1<<j);
+ } else if (seq_type == SEQ_PROTEIN) {
+ if (state_app[2] && state_app[3]) //4+8, // B = N or D
+ pat.const_char = num_states;
+ else if (state_app[5] && state_app[6]) //32+64, // Z = Q or E
+ pat.const_char = num_states+1;
+ else if (state_app[9] && state_app[10]) // 512+1024 // U = I or L
+ pat.const_char = num_states+2;
+ else ASSERT(0);
+ } else {
+ ASSERT(0);
+ }
+ }
- delete [] num_app;
+// delete [] num_app;
// compute is_invariant
is_invariant = (state_app.count() >= 1);
diff --git a/iqtree.cpp b/iqtree.cpp
index 979bc4a..e688afc 100644
--- a/iqtree.cpp
+++ b/iqtree.cpp
@@ -309,18 +309,20 @@ void IQTree::initSettings(Params ¶ms) {
// max_candidate_trees = aln->getNSeq() * params.step_iterations;
setRootNode(params.root);
- string bootaln_name = params.out_prefix;
- bootaln_name += ".bootaln";
- if (params.print_bootaln) {
- ofstream bootalnout;
- bootalnout.open(bootaln_name.c_str());
- bootalnout.close();
- }
size_t i;
if (params.online_bootstrap && params.gbo_replicates > 0) {
if (aln->getNSeq() < 4)
outError("It makes no sense to perform bootstrap with less than 4 sequences.");
+
+ string bootaln_name = params.out_prefix;
+ bootaln_name += ".bootaln";
+ if (params.print_bootaln) {
+ ofstream bootalnout;
+ bootalnout.open(bootaln_name.c_str());
+ bootalnout.close();
+ }
+
// 2015-12-17: initialize random stream for creating bootstrap samples
// mainly so that checkpointing does not need to save bootstrap samples
int *saved_randstream = randstream;
diff --git a/model/modelfactory.cpp b/model/modelfactory.cpp
index 88cb0ab..ae6a2e6 100644
--- a/model/modelfactory.cpp
+++ b/model/modelfactory.cpp
@@ -605,7 +605,7 @@ int ModelFactory::getNParameters() {
int df = model->getNDim() + model->getNDimFreq() + site_rate->getNDim() + site_rate->phylo_tree->branchNum;
return df;
}
-
+/*
double ModelFactory::initGTRGammaIParameters(RateHeterogeneity *rate, ModelSubst *model, double initAlpha,
double initPInvar, double *initRates, double *initStateFreqs) {
@@ -619,7 +619,7 @@ double ModelFactory::initGTRGammaIParameters(RateHeterogeneity *rate, ModelSubst
site_rate->phylo_tree->clearAllPartialLH();
return site_rate->phylo_tree->computeLikelihood();
}
-
+*/
double ModelFactory::optimizeParametersOnly(double gradient_epsilon) {
double logl;
/* Optimize substitution and heterogeneity rates independently */
@@ -700,16 +700,20 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info
DoubleVector bestLens;
tree->saveBranchLengths(initBranLens);
bestLens = initBranLens;
- int numRateEntries = tree->getModel()->getNumRateEntries();
- double *rates = new double[numRateEntries];
- double *bestRates = new double[numRateEntries];
- tree->getModel()->getRateMatrix(rates);
- int numStates = tree->aln->num_states;
- double *state_freqs = new double[numStates];
- tree->getModel()->getStateFrequency(state_freqs);
+// int numRateEntries = tree->getModel()->getNumRateEntries();
+ Checkpoint *model_ckp = new Checkpoint;
+ Checkpoint *best_ckp = new Checkpoint;
+ Checkpoint *saved_ckp = model->getCheckpoint();
+ *model_ckp = *saved_ckp;
+// double *rates = new double[numRateEntries];
+// double *bestRates = new double[numRateEntries];
+// tree->getModel()->getRateMatrix(rates);
+// int numStates = tree->aln->num_states;
+// double *state_freqs = new double[numStates];
+// tree->getModel()->getStateFrequency(state_freqs);
/* Best estimates found */
- double *bestStateFreqs = new double[numStates];
+// double *bestStateFreqs = new double[numStates];
double bestLogl = -DBL_MAX;
double bestAlpha = 0.0;
double bestPInvar = 0.0;
@@ -727,8 +731,8 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info
cout << "Testing with init. pinv = " << initPInv << " / init. alpha = " << initAlpha << endl;
}
- vector<double> estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon, tree, site_rate, rates, state_freqs,
- initPInv, initAlpha, initBranLens);
+ vector<double> estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon,
+ initPInv, initAlpha, initBranLens, model_ckp);
if (write_info) {
@@ -742,8 +746,13 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info
bestPInvar = estResults[0];
bestLens.clear();
tree->saveBranchLengths(bestLens);
- tree->getModel()->getRateMatrix(bestRates);
- tree->getModel()->getStateFrequency(bestStateFreqs);
+ model->setCheckpoint(best_ckp);
+ model->saveCheckpoint();
+ model->setCheckpoint(saved_ckp);
+// *best_ckp = *saved_ckp;
+
+// tree->getModel()->getRateMatrix(bestRates);
+// tree->getModel()->getStateFrequency(bestStateFreqs);
if (estResults[0] < initPInv) {
initPInv = estResults[0] - testInterval;
if (initPInv < 0.0)
@@ -767,11 +776,11 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info
}
vector<double> estResults; // vector of p_inv, alpha and logl
if (Params::getInstance().opt_gammai_keep_bran)
- estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon, tree, site_rate, rates, state_freqs,
- initPInv, initAlpha, bestLens);
+ estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon,
+ initPInv, initAlpha, bestLens, model_ckp);
else
- estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon, tree, site_rate, rates, state_freqs,
- initPInv, initAlpha, initBranLens);
+ estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon,
+ initPInv, initAlpha, initBranLens, model_ckp);
if (write_info) {
cout << "Est. p_inv: " << estResults[0] << " / Est. gamma shape: " << estResults[1]
<< " / Logl: " << estResults[2] << endl;
@@ -785,33 +794,44 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info
bestPInvar = estResults[0];
bestLens.clear();
tree->saveBranchLengths(bestLens);
- tree->getModel()->getRateMatrix(bestRates);
- tree->getModel()->getStateFrequency(bestStateFreqs);
+ model->setCheckpoint(best_ckp);
+ model->saveCheckpoint();
+ model->setCheckpoint(saved_ckp);
+// *best_ckp = *saved_ckp;
+
+// tree->getModel()->getRateMatrix(bestRates);
+// tree->getModel()->getStateFrequency(bestStateFreqs);
}
}
}
site_rate->setGammaShape(bestAlpha);
site_rate->setPInvar(bestPInvar);
- ((ModelGTR*) tree->getModel())->setRateMatrix(bestRates);
- ((ModelGTR*) tree->getModel())->setStateFrequency(bestStateFreqs);
+ model->setCheckpoint(best_ckp);
+ model->restoreCheckpoint();
+ model->setCheckpoint(saved_ckp);
+// ((ModelGTR*) tree->getModel())->setRateMatrix(bestRates);
+// ((ModelGTR*) tree->getModel())->setStateFrequency(bestStateFreqs);
tree->restoreBranchLengths(bestLens);
- tree->getModel()->decomposeRateMatrix();
+// tree->getModel()->decomposeRateMatrix();
tree->clearAllPartialLH();
tree->setCurScore(tree->computeLikelihood());
- assert(fabs(tree->getCurScore() - bestLogl) < 1.0);
- if (write_info) {
+ if (write_info) {
cout << endl;
cout << "Best p_inv: " << bestPInvar << " / best gamma shape: " << bestAlpha << " / ";
cout << "Logl: " << tree->getCurScore() << endl;
}
+ assert(fabs(tree->getCurScore() - bestLogl) < 1.0);
+
+// delete [] rates;
+// delete [] state_freqs;
+// delete [] bestRates;
+// delete [] bestStateFreqs;
+
+ delete model_ckp;
+ delete best_ckp;
- delete [] rates;
- delete [] state_freqs;
- delete [] bestRates;
- delete [] bestStateFreqs;
-
double elapsed_secs = getRealTime() - begin_time;
if (write_info)
cout << "Parameters optimization took " << elapsed_secs << " sec" << endl;
@@ -824,21 +844,25 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info
}
vector<double> ModelFactory::optimizeGammaInvWithInitValue(int fixed_len, double logl_epsilon, double gradient_epsilon,
- PhyloTree *tree, RateHeterogeneity *site_rates, double *rates,
- double *state_freqs, double initPInv, double initAlpha,
- DoubleVector &lenvec) {
+ double initPInv, double initAlpha,
+ DoubleVector &lenvec, Checkpoint *model_ckp) {
+ PhyloTree *tree = site_rate->getTree();
tree->restoreBranchLengths(lenvec);
- ((ModelGTR*) tree->getModel())->setRateMatrix(rates);
- ((ModelGTR*) tree->getModel())->setStateFrequency(state_freqs);
- tree->getModel()->decomposeRateMatrix();
- site_rates->setPInvar(initPInv);
- site_rates->setGammaShape(initAlpha);
+ Checkpoint *saved_ckp = model->getCheckpoint();
+ model->setCheckpoint(model_ckp);
+ model->restoreCheckpoint();
+ model->setCheckpoint(saved_ckp);
+// ((ModelGTR*) tree->getModel())->setRateMatrix(rates);
+// ((ModelGTR*) tree->getModel())->setStateFrequency(state_freqs);
+// tree->getModel()->decomposeRateMatrix();
+ site_rate->setPInvar(initPInv);
+ site_rate->setGammaShape(initAlpha);
tree->clearAllPartialLH();
optimizeParameters(fixed_len, false, logl_epsilon, gradient_epsilon);
vector<double> estResults;
- double estPInv = tree->getRate()->getPInvar();
- double estAlpha = tree->getRate()->getGammaShape();
+ double estPInv = site_rate->getPInvar();
+ double estAlpha = site_rate->getGammaShape();
double logl = tree->getCurScore();
estResults.push_back(estPInv);
estResults.push_back(estAlpha);
diff --git a/model/modelfactory.h b/model/modelfactory.h
index 5218f0b..9cdabf3 100644
--- a/model/modelfactory.h
+++ b/model/modelfactory.h
@@ -257,9 +257,8 @@ protected:
*/
virtual bool getVariables(double *variables);
- vector<double> optimizeGammaInvWithInitValue(int fixed_len, double logl_epsilon, double gradient_epsilon, PhyloTree *tree,
- RateHeterogeneity *site_rates, double *rates, double *state_freqs,
- double initPInv, double initAlpha, DoubleVector &lenvec);
+ vector<double> optimizeGammaInvWithInitValue(int fixed_len, double logl_epsilon, double gradient_epsilon,
+ double initPInv, double initAlpha, DoubleVector &lenvec, Checkpoint *model_ckp);
};
#endif
diff --git a/model/modelset.cpp b/model/modelset.cpp
index 83feabb..bcc109f 100644
--- a/model/modelset.cpp
+++ b/model/modelset.cpp
@@ -158,6 +158,16 @@ void ModelSet::decomposeRateMatrix()
size_t vsize = phylo_tree->vector_size;
size_t states2 = num_states*num_states;
size_t ptn, i, x;
+
+ size_t max_size = get_safe_upper_limit(size());
+
+ // copy dummy values
+ for (size_t m = size(); m < max_size; m++) {
+ memcpy(&eigenvalues[m*num_states], &eigenvalues[(m-1)*num_states], sizeof(double)*num_states);
+ memcpy(&eigenvectors[m*states2], &eigenvectors[(m-1)*states2], sizeof(double)*states2);
+ memcpy(&inv_eigenvectors[m*states2], &inv_eigenvectors[(m-1)*states2], sizeof(double)*states2);
+ }
+
double new_eval[num_states*vsize];
double new_evec[states2*vsize];
double new_inv_evec[states2*vsize];
diff --git a/model/ratefree.cpp b/model/ratefree.cpp
index 60c302f..c31dcba 100644
--- a/model/ratefree.cpp
+++ b/model/ratefree.cpp
@@ -210,7 +210,7 @@ double RateFree::optimizeParameters(double gradient_epsilon) {
cout << "Optimizing " << name << " model parameters by " << optimize_alg << " algorithm..." << endl;
// TODO: turn off EM algorithm for +ASC model
- if ((optimize_alg.find("EM") != string::npos && phylo_tree->getModelFactory()->unobserved_ptns.empty()) || getPInvar() <= MIN_PINVAR)
+ if ((optimize_alg.find("EM") != string::npos && phylo_tree->getModelFactory()->unobserved_ptns.empty()))
return optimizeWithEM();
//if (freq_type == FREQ_ESTIMATE) scaleStateFreq(false);
diff --git a/phyloanalysis.cpp b/phyloanalysis.cpp
index 7017887..1e71528 100644
--- a/phyloanalysis.cpp
+++ b/phyloanalysis.cpp
@@ -1546,7 +1546,7 @@ void printMiscInfo(Params ¶ms, IQTree &iqtree, double *pattern_lh) {
printAncestralSequences(params.out_prefix, &iqtree, params.print_ancestral_sequence);
}
- if (params.print_site_state_freq != WSF_NONE) {
+ if (params.print_site_state_freq != WSF_NONE && !params.site_freq_file && !params.tree_freq_file) {
string site_freq_file = params.out_prefix;
site_freq_file += ".sitesf";
printSiteStateFreq(site_freq_file.c_str(), &iqtree);
@@ -2409,8 +2409,12 @@ void runStandardBootstrap(Params ¶ms, string &original_model, Alignment *ali
}
} else
boot_tree = new IQTree(bootstrap_alignment);
- if (params.print_bootaln && MPIHelper::getInstance().isMaster())
- bootstrap_alignment->printPhylip(bootaln_name.c_str(), true);
+ if (params.print_bootaln && MPIHelper::getInstance().isMaster()) {
+ if (bootstrap_alignment->isSuperAlignment())
+ ((SuperAlignment*)bootstrap_alignment)->printCombinedAlignment(bootaln_name.c_str(), true);
+ else
+ bootstrap_alignment->printPhylip(bootaln_name.c_str(), true);
+ }
// set checkpoint
boot_tree->setCheckpoint(tree->getCheckpoint());
diff --git a/phylokernelnew.h b/phylokernelnew.h
index 11c29eb..b68e46a 100644
--- a/phylokernelnew.h
+++ b/phylokernelnew.h
@@ -566,7 +566,7 @@ inline void scaleLikelihood(VectorClass &lh_max, double *invar, double *dad_part
{
if (SAFE_NUMERIC) {
size_t x, i;
- auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(invar) == 0.0));
+ auto underflown = ((lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(invar) == 0.0));
if (horizontal_or(underflown)) { // at least one site has numerical underflown
for (x = 0; x < VectorClass::size(); x++)
if (underflown[x]) {
@@ -580,7 +580,7 @@ inline void scaleLikelihood(VectorClass &lh_max, double *invar, double *dad_part
}
} else {
size_t x, i;
- auto underflown = (lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(invar) == 0.0);
+ auto underflown = (lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(invar) == 0.0);
if (horizontal_or(underflown)) { // at least one site has numerical underflown
size_t block = ncat_mix * nstates;
for (x = 0; x < VectorClass::size(); x++)
@@ -1286,7 +1286,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
}
// check if one should scale partial likelihoods
if (SAFE_NUMERIC) {
- auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
+ auto underflown = ((lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
if (horizontal_or(underflown)) { // at least one site has numerical underflown
for (x = 0; x < VectorClass::size(); x++)
if (underflown[x]) {
@@ -1304,7 +1304,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
}
if (!SAFE_NUMERIC) {
- auto underflown = (lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0);
+ auto underflown = (lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0);
if (horizontal_or(underflown)) { // at least one site has numerical underflown
for (x = 0; x < VectorClass::size(); x++)
if (underflown[x]) {
@@ -1474,7 +1474,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
#endif
// check if one should scale partial likelihoods
if (SAFE_NUMERIC) {
- auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
+ auto underflown = ((lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
if (horizontal_or(underflown)) { // at least one site has numerical underflown
for (x = 0; x < VectorClass::size(); x++)
if (underflown[x]) {
@@ -1537,7 +1537,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
#endif
// check if one should scale partial likelihoods
if (SAFE_NUMERIC) {
- auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
+ auto underflown = ((lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
if (horizontal_or(underflown)) { // at least one site has numerical underflown
for (x = 0; x < VectorClass::size(); x++)
if (underflown[x]) {
@@ -1557,7 +1557,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
} // IF SITE_MODEL
if (!SAFE_NUMERIC) {
- auto underflown = (lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0);
+ auto underflown = (lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0);
if (horizontal_or(underflown)) { // at least one site has numerical underflown
for (x = 0; x < VectorClass::size(); x++)
if (underflown[x]) {
@@ -1660,7 +1660,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
// check if one should scale partial likelihoods
if (SAFE_NUMERIC) {
- auto underflown = ((lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
+ auto underflown = ((lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
if (horizontal_or(underflown))
for (x = 0; x < VectorClass::size(); x++)
if (underflown[x]) {
@@ -1682,7 +1682,7 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
if (!SAFE_NUMERIC) {
// check if one should scale partial likelihoods
- auto underflown = (lh_max < SCALING_THRESHOLD) & (lh_max != 0.0) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0);
+ auto underflown = (lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0);
if (horizontal_or(underflown)) { // at least one site has numerical underflown
for (x = 0; x < VectorClass::size(); x++)
if (underflown[x]) {
@@ -2024,7 +2024,7 @@ void PhyloTree::computeLikelihoodDervGenericSIMD(PhyloNeighbor *dad_branch, Phyl
#endif
for (ptn = ptn_lower; ptn < ptn_upper; ptn+=VectorClass::size()) {
- VectorClass lh_ptn;
+ VectorClass lh_ptn(0.0);
//lh_ptn.load_a(&ptn_invar[ptn]);
VectorClass *theta = (VectorClass*)(theta_all + ptn*block);
VectorClass df_ptn, ddf_ptn;
@@ -2055,7 +2055,8 @@ void PhyloTree::computeLikelihoodDervGenericSIMD(PhyloNeighbor *dad_branch, Phyl
dotProductTriple<VectorClass, double, FMA>(val0, val1, val2, theta, lh_ptn, df_ptn, ddf_ptn, block, nstates);
#endif
}
- lh_ptn = abs(lh_ptn + VectorClass().load_a(&ptn_invar[ptn]));
+ // Sum later to avoid underflow of invariant sites
+ lh_ptn = abs(lh_ptn) + VectorClass().load_a(&ptn_invar[ptn]);
if (ptn < orig_nptn) {
lh_ptn = 1.0 / lh_ptn;
@@ -2319,8 +2320,8 @@ double PhyloTree::computeLikelihoodBranchGenericSIMD(PhyloNeighbor *dad_branch,
double *vec_tip = buffer_partial_lh_ptr + block*VectorClass::size()*thread_id;
for (ptn = ptn_lower; ptn < ptn_upper; ptn+=VectorClass::size()) {
- VectorClass lh_ptn;
- lh_ptn.load_a(&ptn_invar[ptn]);
+ VectorClass lh_ptn(0.0);
+// lh_ptn.load_a(&ptn_invar[ptn]);
VectorClass *lh_cat = (VectorClass*)(_pattern_lh_cat + ptn*ncat_mix);
VectorClass *partial_lh_dad = (VectorClass*)(dad_branch->partial_lh + ptn*block);
VectorClass *lh_node = SITE_MODEL ? (VectorClass*)&partial_lh_node[ptn*nstates] : (VectorClass*)vec_tip;
@@ -2411,7 +2412,8 @@ double PhyloTree::computeLikelihoodBranchGenericSIMD(PhyloNeighbor *dad_branch,
}
vc_min_scale *= LOG_SCALING_THRESHOLD;
- lh_ptn = abs(lh_ptn);
+ // Sum later to avoid underflow of invariant sites
+ lh_ptn = abs(lh_ptn) + VectorClass().load_a(&ptn_invar[ptn]);
if (ptn < orig_nptn) {
lh_ptn = log(lh_ptn) + vc_min_scale;
lh_ptn.store_a(&_pattern_lh[ptn]);
@@ -2466,8 +2468,8 @@ double PhyloTree::computeLikelihoodBranchGenericSIMD(PhyloNeighbor *dad_branch,
computePartialLikelihood(*it, ptn_lower, ptn_upper, thread_id);
for (ptn = ptn_lower; ptn < ptn_upper; ptn+=VectorClass::size()) {
- VectorClass lh_ptn;
- lh_ptn.load_a(&ptn_invar[ptn]);
+ VectorClass lh_ptn(0.0);
+// lh_ptn.load_a(&ptn_invar[ptn]);
VectorClass *lh_cat = (VectorClass*)(_pattern_lh_cat + ptn*ncat_mix);
VectorClass *partial_lh_dad = (VectorClass*)(dad_branch->partial_lh + ptn*block);
VectorClass *partial_lh_node = (VectorClass*)(node_branch->partial_lh + ptn*block);
@@ -2541,7 +2543,8 @@ double PhyloTree::computeLikelihoodBranchGenericSIMD(PhyloNeighbor *dad_branch,
} // if SAFE_NUMERIC
vc_min_scale *= LOG_SCALING_THRESHOLD;
- lh_ptn = abs(lh_ptn);
+ // Sum later to avoid underflow of invariant sites
+ lh_ptn = abs(lh_ptn) + VectorClass().load_a(&ptn_invar[ptn]);
if (ptn < orig_nptn) {
lh_ptn = log(lh_ptn) + vc_min_scale;
@@ -2708,11 +2711,11 @@ double PhyloTree::computeLikelihoodFromBufferGenericSIMD()
#pragma omp for schedule(static) nowait
#endif
for (ptn = 0; ptn < nptn; ptn+=VectorClass::size()) {
- VectorClass lh_ptn;
+ VectorClass lh_ptn(0.0);
VectorClass *theta = (VectorClass*)(theta_all + ptn*block);
if (SITE_MODEL) {
VectorClass *eval_ptr = (VectorClass*)&eval[ptn*nstates];
- lh_ptn.load_a(&ptn_invar[ptn]);
+// lh_ptn.load_a(&ptn_invar[ptn]);
for (c = 0; c < ncat; c++) {
VectorClass lh_cat;
#ifdef KERNEL_FIX_STATES
@@ -2725,9 +2728,11 @@ double PhyloTree::computeLikelihoodFromBufferGenericSIMD()
}
} else {
dotProductVec<VectorClass, double, FMA>(val0, theta, lh_ptn, block);
- lh_ptn += VectorClass().load_a(&ptn_invar[ptn]);
}
+ // Sum later to avoid underflow of invariant sites
+ lh_ptn = abs(lh_ptn) + VectorClass().load_a(&ptn_invar[ptn]);
+
if (ptn < orig_nptn) {
lh_ptn = log(abs(lh_ptn)) + VectorClass().load_a(&buffer_scale_all[ptn]);
lh_ptn.store_a(&_pattern_lh[ptn]);
diff --git a/phylotree.cpp b/phylotree.cpp
index 5cd4c55..ffa9886 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -692,8 +692,8 @@ size_t PhyloTree::getBufferPartialLhSize() {
const size_t VECTOR_SIZE = 8; // TODO, adjusted
size_t ncat_mix = site_rate->getNRate() * ((model_factory->fused_mix_rate)? 1 : model->getNMixtures());
size_t block = model->num_states * ncat_mix;
- size_t buffer_size = get_safe_upper_limit(block * model->num_states * 2 * aln->getNSeq());
- buffer_size += get_safe_upper_limit(block * (aln->getNSeq()+1) * (aln->STATE_UNKNOWN+1));
+ size_t buffer_size = get_safe_upper_limit(block * model->num_states * 2) * aln->getNSeq();
+ buffer_size += get_safe_upper_limit(block *(aln->STATE_UNKNOWN+1)) * (aln->getNSeq()+1);
buffer_size += (block*2+model->num_states)*VECTOR_SIZE*num_threads;
return buffer_size;
}
diff --git a/phylotreesse.cpp b/phylotreesse.cpp
index b0f9c01..32f3ecb 100644
--- a/phylotreesse.cpp
+++ b/phylotreesse.cpp
@@ -230,7 +230,7 @@ void PhyloTree::computeTipPartialLikelihood() {
double *inv_evec = &model->getInverseEigenvectors()[ptn*nstates*nstates];
for (v = 0; v < vector_size; v++) {
- int state = aln->STATE_UNKNOWN;
+ int state = 0;
if (ptn+v < nptn)
state = aln->at(ptn+v)[nodeid];
// double *partial_lh = node_partial_lh + ptn*nstates;
@@ -395,6 +395,13 @@ void PhyloTree::computePtnInvar() {
size_t nptn = aln->getNPattern(), ptn;
size_t maxptn = get_safe_upper_limit(nptn)+get_safe_upper_limit(model_factory->unobserved_ptns.size());
int nstates = aln->num_states;
+ int x;
+ // ambiguous characters
+ int ambi_aa[] = {
+ 4+8, // B = N or D
+ 32+64, // Z = Q or E
+ 512+1024 // U = I or L
+ };
double *state_freq = aligned_alloc<double>(nstates);
model->getStateFrequency(state_freq);
@@ -402,11 +409,31 @@ void PhyloTree::computePtnInvar() {
double p_invar = site_rate->getPInvar();
if (p_invar != 0.0) {
for (ptn = 0; ptn < nptn; ptn++) {
- if ((*aln)[ptn].const_char == nstates)
+ if ((*aln)[ptn].const_char > aln->STATE_UNKNOWN)
+ continue;
+
+ if ((*aln)[ptn].const_char == aln->STATE_UNKNOWN) {
ptn_invar[ptn] = p_invar;
- else if ((*aln)[ptn].const_char < nstates) {
+ } else if ((*aln)[ptn].const_char < nstates) {
ptn_invar[ptn] = p_invar * state_freq[(int) (*aln)[ptn].const_char];
- }
+ } else if (aln->seq_type == SEQ_DNA) {
+ // 2016-12-21: handling ambiguous state
+ ptn_invar[ptn] = 0.0;
+ int cstate = (*aln)[ptn].const_char-nstates+1;
+ for (x = 0; x < nstates; x++) {
+ if ((cstate) & (1 << x))
+ ptn_invar[ptn] += state_freq[x];
+ }
+ ptn_invar[ptn] *= p_invar;
+ } else if (aln->seq_type == SEQ_PROTEIN) {
+ ptn_invar[ptn] = 0.0;
+ int cstate = (*aln)[ptn].const_char-nstates;
+ assert(cstate <= 2);
+ for (x = 0; x < 11; x++)
+ if (ambi_aa[cstate] & (1 << x))
+ ptn_invar[ptn] += state_freq[x];
+ ptn_invar[ptn] *= p_invar;
+ } else ASSERT(0);
}
// // ascertmain bias correction
// for (ptn = 0; ptn < model_factory->unobserved_ptns.size(); ptn++)
diff --git a/tools.cpp b/tools.cpp
index 4e5af85..fecb07f 100644
--- a/tools.cpp
+++ b/tools.cpp
@@ -3221,6 +3221,9 @@ void parseArg(int argc, char *argv[], Params ¶ms) {
if (params.lh_mem_save == LM_MEM_SAVE && params.partition_file)
outError("-mem option does not work with partition models yet");
+ if (params.gbo_replicates && params.num_bootstrap_samples)
+ outError("UFBoot (-bb) and standard bootstrap (-b) must not be specified together");
+
if (!params.out_prefix) {
if (params.eco_dag_file)
params.out_prefix = params.eco_dag_file;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/iqtree.git
More information about the debian-med-commit
mailing list