[med-svn] [iqtree] 01/04: Imported Upstream version 1.4.3+dfsg

Andreas Tille tille at debian.org
Mon Aug 15 20:37:21 UTC 2016


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository iqtree.

commit d67f0c4bfca8e0fd8475b648d97c71984a432b24
Author: Andreas Tille <tille at debian.org>
Date:   Mon Aug 15 22:22:56 2016 +0200

    Imported Upstream version 1.4.3+dfsg
---
 CMakeLists.txt              |    2 +-
 alignment.cpp               |   88 +++-
 alignment.h                 |   14 +-
 checkpoint.cpp              |    4 +-
 iqtree.cpp                  |   45 +-
 model/modelcodon.cpp        |   75 +++
 model/modelcodon.h          |   10 +
 model/modeldna.cpp          |   10 +-
 model/modelfactory.cpp      |  414 +++++++--------
 model/modelfactory.h        |    8 +-
 model/modelgtr.cpp          |    8 +-
 model/modelgtr.h            |    2 +-
 model/modelmixture.cpp      |    2 +-
 model/modelnonrev.cpp       |    6 +-
 model/ratefree.cpp          |   28 +-
 model/ratefreeinvar.cpp     |    1 +
 model/ratefreeinvar.h       |    2 +-
 model/rategamma.cpp         |   29 +-
 model/rategammainvar.cpp    |  239 ++++++---
 model/rategammainvar.h      |   53 +-
 model/rateheterogeneity.cpp |   18 +-
 model/rateheterogeneity.h   |   13 +
 ncl/nxscharactersblock.cpp  |    9 +-
 ngs.cpp                     |    4 +-
 ngs.h                       |    2 +-
 pda.cpp                     |    2 -
 phyloanalysis.cpp           |   53 +-
 phylokernel.h               |   10 +-
 phylokernelmixrate.h        |    6 +
 phylokernelmixture.h        |    6 +
 phylosupertreeplen.cpp      |    6 +-
 phylotesting.cpp            |   69 ++-
 phylotree.cpp               | 1162 ++----------------------------------------
 phylotree.h                 |  152 +-----
 phylotreesse.cpp            | 1185 ++++++++++++++++---------------------------
 quartet.cpp                 |    4 +
 tools.cpp                   |  122 +++--
 tools.h                     |   33 +-
 38 files changed, 1425 insertions(+), 2471 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 22a4864..497a66f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -45,7 +45,7 @@ add_definitions(-DIQ_TREE)
 # The version number.
 set (iqtree_VERSION_MAJOR 1)
 set (iqtree_VERSION_MINOR 4)
-set (iqtree_VERSION_PATCH "2") 
+set (iqtree_VERSION_PATCH "3") 
 
 set(BUILD_SHARED_LIBS OFF)
 
diff --git a/alignment.cpp b/alignment.cpp
index 4a20602..25214c2 100644
--- a/alignment.cpp
+++ b/alignment.cpp
@@ -347,7 +347,10 @@ Alignment::Alignment(char *filename, char *sequence_type, InputType &intype) : v
             readFasta(filename, sequence_type);
         } else if (intype == IN_PHYLIP) {
             cout << "Phylip format detected" << endl;
-            readPhylip(filename, sequence_type);
+            if (Params::getInstance().phylip_sequential_format)
+                readPhylipSequential(filename, sequence_type);
+            else
+                readPhylip(filename, sequence_type);
         } else if (intype == IN_CLUSTAL) {
             cout << "Clustal format detected" << endl;
             readClustal(filename, sequence_type);
@@ -1464,6 +1467,76 @@ int Alignment::readPhylip(char *filename, char *sequence_type) {
     return buildPattern(sequences, sequence_type, nseq, nsite);
 }
 
+int Alignment::readPhylipSequential(char *filename, char *sequence_type) {
+
+    StrVector sequences;
+    ostringstream err_str;
+    ifstream in;
+    int line_num = 1;
+    // set the failbit and badbit
+    in.exceptions(ios::failbit | ios::badbit);
+    in.open(filename);
+    int nseq = 0, nsite = 0;
+    int seq_id = 0;
+    string line;
+    // remove the failbit
+    in.exceptions(ios::badbit);
+    num_states = 0;
+
+    for (; !in.eof(); line_num++) {
+        getline(in, line);
+        line = line.substr(0, line.find_first_of("\n\r"));
+        if (line == "") continue;
+
+        //cout << line << endl;
+        if (nseq == 0) { // read number of sequences and sites
+            istringstream line_in(line);
+            if (!(line_in >> nseq >> nsite))
+                throw "Invalid PHYLIP format. First line must contain number of sequences and sites";
+            //cout << "nseq: " << nseq << "  nsite: " << nsite << endl;
+            if (nseq < 3)
+                throw "There must be at least 3 sequences";
+            if (nsite < 1)
+                throw "No alignment columns";
+
+            seq_names.resize(nseq, "");
+            sequences.resize(nseq, "");
+
+        } else { // read sequence contents
+            if (seq_id >= nseq)
+                throw "Line " + convertIntToString(line_num) + ": Too many sequences detected";
+                
+            if (seq_names[seq_id] == "") { // cut out the sequence name
+                string::size_type pos = line.find_first_of(" \t");
+                if (pos == string::npos) pos = 10; //  assume standard phylip
+                seq_names[seq_id] = line.substr(0, pos);
+                line.erase(0, pos);
+            }
+            for (string::iterator it = line.begin(); it != line.end(); it++) {
+                if ((*it) <= ' ') continue;
+                if (isalnum(*it) || (*it) == '-' || (*it) == '?'|| (*it) == '.' || (*it) == '*' || (*it) == '~')
+                    sequences[seq_id].append(1, toupper(*it));
+                else {
+                    err_str << "Line " << line_num <<": Unrecognized character " << *it;
+                    throw err_str.str();
+                }
+            }
+            if (sequences[seq_id].length() > nsite)
+                throw ("Line " + convertIntToString(line_num) + ": Sequence " + seq_names[seq_id] + " is too long (" + convertIntToString(sequences[seq_id].length()) + ")");
+            if (sequences[seq_id].length() == nsite) {
+                seq_id++;
+            }                
+        }
+        //sequences.
+    }
+    in.clear();
+    // set the failbit again
+    in.exceptions(ios::failbit | ios::badbit);
+    in.close();
+
+    return buildPattern(sequences, sequence_type, nseq, nsite);
+}
+
 int Alignment::readFasta(char *filename, char *sequence_type) {
 
     StrVector sequences;
@@ -3140,7 +3213,7 @@ void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double
 	convfreq(state_freq);
 }
 
-void Alignment::computeEmpiricalRate (double *rates) {
+void Alignment::computeDivergenceMatrix(double *rates) {
     int i, j, k;
     assert(rates);
     int nseqs = getNSeq();
@@ -3181,8 +3254,8 @@ void Alignment::computeEmpiricalRate (double *rates) {
         for (j = i+1; j < num_states; j++) {
             rates[k++] = (pair_rates[i*num_states+j] + pair_rates[j*num_states+i]) / last_rate;
             // BIG WARNING: zero rates might cause numerical instability!
-            if (rates[k-1] <= 0.0001) rates[k-1] = 0.01;
-            if (rates[k-1] > 100.0) rates[k-1] = 50.0;
+//            if (rates[k-1] <= 0.0001) rates[k-1] = 0.01;
+//            if (rates[k-1] > 100.0) rates[k-1] = 50.0;
         }
     rates[k-1] = 1;
     if (verbose_mode >= VB_MAX) {
@@ -3199,11 +3272,11 @@ void Alignment::computeEmpiricalRate (double *rates) {
     delete [] pair_rates;
 }
 
-void Alignment::computeEmpiricalRateNonRev (double *rates) {
+void Alignment::computeDivergenceMatrixNonRev (double *rates) {
     double *rates_mat = new double[num_states*num_states];
     int i, j, k;
 
-    computeEmpiricalRate(rates);
+    computeDivergenceMatrix(rates);
 
     for (i = 0, k = 0; i < num_states-1; i++)
         for (j = i+1; j < num_states; j++)
@@ -3472,3 +3545,6 @@ double Alignment::multinomialProb (IntVector &pattern_freq)
     }
     return (fac - sumFac + sumProb);
 }
+
+
+
diff --git a/alignment.h b/alignment.h
index 1b736c9..fdee2c8 100644
--- a/alignment.h
+++ b/alignment.h
@@ -111,7 +111,7 @@ public:
     int buildPattern(StrVector &sequences, char *sequence_type, int nseq, int nsite);
 
     /**
-            read the alignment in PHYLIP format
+            read the alignment in PHYLIP format (interleaved)
             @param filename file name
             @param sequence_type type of the sequence, either "BIN", "DNA", "AA", or NULL
             @return 1 on success, 0 on failure
@@ -119,6 +119,14 @@ public:
     int readPhylip(char *filename, char *sequence_type);
 
     /**
+            read the alignment in sequential PHYLIP format
+            @param filename file name
+            @param sequence_type type of the sequence, either "BIN", "DNA", "AA", or NULL
+            @return 1 on success, 0 on failure
+     */
+    int readPhylipSequential(char *filename, char *sequence_type);
+
+    /**
             read the alignment in FASTA format
             @param filename file name
             @param sequence_type type of the sequence, either "BIN", "DNA", "AA", or NULL
@@ -542,13 +550,13 @@ public:
             compute empirical rates between state pairs
             @param rates (OUT) vector of size num_states*(num_states-1)/2 for the rates
      */
-    virtual void computeEmpiricalRate(double *rates);
+    virtual void computeDivergenceMatrix(double *rates);
 
     /**
             compute non-reversible empirical rates between state pairs
             @param rates (OUT) vector of size num_states*(num_states-1) for the rates
      */
-    virtual void computeEmpiricalRateNonRev(double *rates);
+    virtual void computeDivergenceMatrixNonRev(double *rates);
 
     /**
             count the fraction of constant sites in the alignment, update the variable frac_const_sites
diff --git a/checkpoint.cpp b/checkpoint.cpp
index 19312a6..1229d3f 100644
--- a/checkpoint.cpp
+++ b/checkpoint.cpp
@@ -99,7 +99,9 @@ void Checkpoint::setDumpInterval(double interval) {
 
 
 void Checkpoint::dump(bool force) {
-	assert(filename != "");
+    if (filename == "")
+        return;
+        
     if (!force && getRealTime() < prev_dump_time + dump_interval) {
         return;
     }
diff --git a/iqtree.cpp b/iqtree.cpp
index 2179c96..cf8dc72 100644
--- a/iqtree.cpp
+++ b/iqtree.cpp
@@ -314,6 +314,19 @@ void IQTree::initSettings(Params &params) {
                 boot_trees_brlen.resize(params.gbo_replicates);
         } else {
             cout << "CHECKPOINT: " << boot_trees.size() << " UFBoot trees and " << boot_splits.size() << " UFBootSplits restored" << endl;
+            // TODO: quick and dirty fix, no branch lengths are saved after checkpointing
+            if (params.print_ufboot_trees == 2) {
+                boot_trees_brlen.resize(params.gbo_replicates);
+                string ufboot_file = params.out_prefix + string(".ufboot");
+                if (fileExists(ufboot_file)) {
+                    ifstream in(ufboot_file.c_str());
+                    for (i = 0; i < params.gbo_replicates && !in.eof(); i++)
+                        in >> boot_trees_brlen[i];
+                    in.close();
+                } else {
+                    outWarning("Cannot properly restore bootstrap trees with branch lengths");
+                }
+            }
         }
         VerboseMode saved_mode = verbose_mode;
         verbose_mode = VB_QUIET;
@@ -1720,8 +1733,8 @@ extern pllUFBootData * pllUFBootDataPtr;
 string IQTree::optimizeModelParameters(bool printInfo, double logl_epsilon) {
 	if (logl_epsilon == -1)
 		logl_epsilon = params->modeps;
-//    if (params->test_param)
-//        logl_epsilon = 1.0;
+//    if (params->opt_gammai)
+//        logl_epsilon = 0.1;
     cout << "Estimate model parameters (epsilon = " << logl_epsilon << ")" << endl;
 	double stime = getRealTime();
 	string newTree;
@@ -1746,13 +1759,9 @@ string IQTree::optimizeModelParameters(bool printInfo, double logl_epsilon) {
             cout << etime - stime << " seconds (logl: " << curScore << ")" << endl;
 	} else {
         double modOptScore;
-        if (params->test_param) { // DO RESTART ON ALPHA AND P_INVAR
-            double stime = getRealTime();
+        if (params->opt_gammai) { // DO RESTART ON ALPHA AND P_INVAR
             modOptScore = getModelFactory()->optimizeParametersGammaInvar(params->fixed_branch_length, printInfo, logl_epsilon);
-            double etime = getRealTime();
-            cout << "Testing param took: " << etime -stime << " CPU seconds" << endl;
-            cout << endl;
-            params->test_param = false;
+            params->opt_gammai = false;
         } else {
             modOptScore = getModelFactory()->optimizeParameters(params->fixed_branch_length, printInfo, logl_epsilon);
         }
@@ -1915,6 +1924,10 @@ double IQTree::doTreeSearch() {
     	 * Perturb the tree
     	 *---------------------------------------*/
         double perturbScore = 0.0;
+        int numStableBranches = aln->getNSeq() - 3 - candidateTrees.getStableSplits().size();
+        // Change from floor to ceil to make sure perturbing at least 1 branch
+        int numPerturb = ceil(searchinfo.curPerStrength * numStableBranches);
+        bool treechanged = false;
         if (iqp_assess_quartet == IQP_BOOTSTRAP) {
             // create bootstrap sample
             Alignment* bootstrap_alignment;
@@ -1929,8 +1942,6 @@ double IQTree::doTreeSearch() {
             curScore = optimizeAllBranches();
         } else {
             if (params->snni) {
-            	int numStableBranches = aln->getNSeq() - 3 - candidateTrees.getStableSplits().size();
-                int numNNI = floor(searchinfo.curPerStrength * numStableBranches);
 //                string candidateTree = candidateTrees.getRandCandTree();
 //                readTreeString(candidateTree);
                 readTreeString(candidateTrees.getRandCandTree());
@@ -1939,7 +1950,7 @@ double IQTree::doTreeSearch() {
                 if (params->iqp) {
                     doIQP();
                 } else {
-                    doRandomNNIs(numNNI);
+                    doRandomNNIs(numPerturb);
                 }
             } else {
             	readTreeString(candidateTrees.getBestTrees()[0]);
@@ -1957,8 +1968,11 @@ double IQTree::doTreeSearch() {
                 }
             }
 
+            double oldScore = curScore;
             computeLogL();
             perturbScore = curScore;
+            if (perturbScore < oldScore - 0.01)
+                treechanged = true;
         }
 
     	/*----------------------------------------
@@ -1968,6 +1982,10 @@ double IQTree::doTreeSearch() {
         int nni_steps = 0;
 
         imd_tree = doNNISearch(nni_count, nni_steps);
+        
+        if (nni_count == 0 && params->snni && numPerturb > 0 && treechanged) {
+            assert(0 && "BUG: NNI could not improved perturbed tree");
+        }
 
         if (iqp_assess_quartet == IQP_BOOTSTRAP) {
             // restore alignment
@@ -2274,8 +2292,9 @@ double IQTree::optimizeNNI(int &nni_count, int &nni_steps) {
             numNNIs = 1;
             curScore = oldScore;
         }
-        if (curScore - oldScore < 0.1)
-        	break;
+        // BUG in following line, causing premature break by rollBack! that's why commented out 
+//        if (curScore - oldScore < 0.1)
+//        	break;
     }
 
     if (nni_count == 0 && verbose_mode >= VB_MED) {
diff --git a/model/modelcodon.cpp b/model/modelcodon.cpp
index 205b38a..b3c0fa0 100644
--- a/model/modelcodon.cpp
+++ b/model/modelcodon.cpp
@@ -9,6 +9,9 @@
 #include <string>
 
 
+const double MIN_OMEGA_KAPPA = 0.001;
+const double MAX_OMEGA_KAPPA = 50.0;
+
 /* Empirical codon model restricted (Kosiol et al. 2007), source: http://www.ebi.ac.uk/goldman/ECM/ */
 string model_ECMrest1 =
 "11.192024 \
@@ -454,6 +457,7 @@ StateFreqType ModelCodon::initGY94(bool fix_kappa, CodonKappaStyle kappa_style)
     if (fix_kappa)
         kappa = 1.0;
     fix_kappa2 = true;
+    codon_freq_style = CF_TARGET_CODON;
     this->codon_kappa_style = kappa_style;
     if (kappa_style == CK_TWO_KAPPA)
         fix_kappa2 = false;
@@ -890,6 +894,77 @@ void ModelCodon::setVariables(double *variables) {
 	}
 }
 
+void ModelCodon::setBounds(double *lower_bound, double *upper_bound, bool *bound_check) {
+	int i, ndim = getNDim();
+
+	for (i = 1; i <= ndim; i++) {
+		//cout << variables[i] << endl;
+		lower_bound[i] = MIN_OMEGA_KAPPA;
+		upper_bound[i] = MAX_OMEGA_KAPPA;
+		bound_check[i] = false;
+	}
+
+	if (freq_type == FREQ_ESTIMATE) {
+		for (i = ndim-num_states+2; i <= ndim; i++) {
+//            lower_bound[i] = MIN_FREQUENCY/state_freq[highest_freq_state];
+//			upper_bound[i] = state_freq[highest_freq_state]/MIN_FREQUENCY;
+            lower_bound[i]  = MIN_FREQUENCY;
+//            upper_bound[i] = 100.0;
+            upper_bound[i] = 1.0;
+            bound_check[i] = false;
+        }
+	}
+}
+
+double ModelCodon::optimizeParameters(double gradient_epsilon) {
+	int ndim = getNDim();
+	
+	// return if nothing to be optimized
+	if (ndim == 0) return 0.0;
+    
+	if (verbose_mode >= VB_MAX)
+		cout << "Optimizing " << name << " model parameters..." << endl;
+
+
+	double *variables = new double[ndim+1];
+	double *upper_bound = new double[ndim+1];
+	double *lower_bound = new double[ndim+1];
+	bool *bound_check = new bool[ndim+1];
+	double score;
+
+    for (int i = 0; i < num_states; i++)
+        if (state_freq[i] > state_freq[highest_freq_state])
+            highest_freq_state = i;
+
+	// by BFGS algorithm
+	setVariables(variables);
+	setBounds(lower_bound, upper_bound, bound_check);
+//    if (phylo_tree->params->optimize_alg.find("BFGS-B") == string::npos)
+//        score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(gradient_epsilon, TOL_RATE));
+//    else
+        score = -L_BFGS_B(ndim, variables+1, lower_bound+1, upper_bound+1, max(gradient_epsilon, TOL_RATE));
+
+	bool changed = getVariables(variables);
+    // BQM 2015-09-07: normalize state_freq
+	if (freq_type == FREQ_ESTIMATE) { 
+        scaleStateFreq(true);
+        changed = true;
+    }
+    if (changed) {
+        decomposeRateMatrix();
+        phylo_tree->clearAllPartialLH();
+        score = phylo_tree->computeLikelihood();
+    }
+	
+	delete [] bound_check;
+	delete [] lower_bound;
+	delete [] upper_bound;
+	delete [] variables;
+
+	return score;
+}
+
+
 void ModelCodon::writeInfo(ostream &out) {
     if (name.find('_') == string::npos)
         out << "Nonsynonymous/synonymous ratio (omega): " << omega << endl;
diff --git a/model/modelcodon.h b/model/modelcodon.h
index 134d72a..b36a6be 100644
--- a/model/modelcodon.h
+++ b/model/modelcodon.h
@@ -115,6 +115,16 @@ public:
     /** compute the corrected empirical omega (Kosiol et al 2007) */
     double computeEmpiricalOmega();
     
+	/**
+	 * setup the bounds for joint optimization with BFGS
+	 */
+	virtual void setBounds(double *lower_bound, double *upper_bound, bool *bound_check);
+
+	/**
+		optimize model parameters
+		@return the best likelihood
+	*/
+	virtual double optimizeParameters(double gradient_epsilon);
 
 	/** 3x4 matrix of nucleotide frequencies at 1st,2nd,3rd codon position */
 	double *ntfreq;
diff --git a/model/modeldna.cpp b/model/modeldna.cpp
index f3ce873..f881bbd 100644
--- a/model/modeldna.cpp
+++ b/model/modeldna.cpp
@@ -219,10 +219,14 @@ void ModelDNA::readRates(string str) throw(const char*) {
 		if (str[end_pos] == '?') {
 			param_fixed[i+1] = false;
 			end_pos++;
-			rate = i + 0.4;
+			rate = 1.0;
 			num_params++;
 		} else {
-			param_fixed[i+1] = true;
+            if (Params::getInstance().optimize_rate_matrix) {
+                num_params++;
+                param_fixed[i+1] = false;
+            } else
+                param_fixed[i+1] = true;
 			try {
 				rate = convert_double(str.substr(end_pos).c_str(), new_end_pos);
 			} catch (string str) {
@@ -233,7 +237,7 @@ void ModelDNA::readRates(string str) throw(const char*) {
 		if (rate < 0.0)
 			outError("Negative rates found");
 		if (i == nrates-1 && end_pos < str.length())
-			outError("String too long ", str);
+			outError("More than " + convertIntToString(nrates) + " rate parameters specified in " + str);
 		if (i < nrates-1 && end_pos >= str.length())
 			outError("Unexpected end of string ", str);
 		if (end_pos < str.length() && str[end_pos] != ',')
diff --git a/model/modelfactory.cpp b/model/modelfactory.cpp
index bb7efbd..ad015c6 100644
--- a/model/modelfactory.cpp
+++ b/model/modelfactory.cpp
@@ -102,8 +102,7 @@ ModelFactory::ModelFactory(Params &params, PhyloTree *tree, ModelsBlock *models_
 	is_storing = false;
 	joint_optimize = params.optimize_model_rate_joint;
 	fused_mix_rate = false;
-
-	string model_str = params.model_name;
+    string model_str = params.model_name;
 	string rate_str;
 
 	try {
@@ -472,57 +471,57 @@ ModelFactory::ModelFactory(Params &params, PhyloTree *tree, ModelsBlock *models_
 		//string rate_str = model_str.substr(pos);
 		if (posI != string::npos && posG != string::npos) {
 			site_rate = new RateGammaInvar(num_rate_cats, gamma_shape, params.gamma_median,
-					p_invar_sites, params.optimize_model_rate_joint, tree);
+					p_invar_sites, params.optimize_alg_gammai, tree, false);
 		} else if (posI != string::npos && posR != string::npos) {
-			site_rate = new RateFreeInvar(num_rate_cats, gamma_shape, freerate_params, p_invar_sites, !fused_mix_rate, params.optimize_alg, tree);
+			site_rate = new RateFreeInvar(num_rate_cats, gamma_shape, freerate_params, !fused_mix_rate, p_invar_sites, params.optimize_alg, tree);
 		} else if (posI != string::npos) {
 			site_rate = new RateInvar(p_invar_sites, tree);
 		} else if (posG != string::npos) {
 			site_rate = new RateGamma(num_rate_cats, gamma_shape, params.gamma_median, tree);
 		} else if (posR != string::npos) {
 			site_rate = new RateFree(num_rate_cats, gamma_shape, freerate_params, !fused_mix_rate, params.optimize_alg, tree);
-		} else if ((posX = rate_str.find("+M")) != string::npos) {
-			tree->setLikelihoodKernel(LK_NORMAL);
-			params.rate_mh_type = true;
-			if (rate_str.length() > posX+2 && isdigit(rate_str[posX+2])) {
-				num_rate_cats = convert_int(rate_str.substr(posX+2).c_str());
-				if (num_rate_cats < 0) outError("Wrong number of rate categories");
-			} else num_rate_cats = -1;
-			if (num_rate_cats >= 0)
-				site_rate = new RateMeyerDiscrete(num_rate_cats, params.mcat_type, 
-					params.rate_file, tree, params.rate_mh_type);
-			else
-				site_rate = new RateMeyerHaeseler(params.rate_file, tree, params.rate_mh_type);
-			site_rate->setTree(tree);
-		} else if ((posX = rate_str.find("+D")) != string::npos) {
-			tree->setLikelihoodKernel(LK_NORMAL);
-			params.rate_mh_type = false;
-			if (rate_str.length() > posX+2 && isdigit(rate_str[posX+2])) {
-				num_rate_cats = convert_int(rate_str.substr(posX+2).c_str());
-				if (num_rate_cats < 0) outError("Wrong number of rate categories");
-			} else num_rate_cats = -1;
-			if (num_rate_cats >= 0)
-				site_rate = new RateMeyerDiscrete(num_rate_cats, params.mcat_type, 
-					params.rate_file, tree, params.rate_mh_type);
-			else
-				site_rate = new RateMeyerHaeseler(params.rate_file, tree, params.rate_mh_type);
-			site_rate->setTree(tree);
-		} else if ((posX = rate_str.find("+NGS")) != string::npos) {
-			tree->setLikelihoodKernel(LK_NORMAL);
-			if (rate_str.length() > posX+4 && isdigit(rate_str[posX+4])) {
-				num_rate_cats = convert_int(rate_str.substr(posX+4).c_str());
-				if (num_rate_cats < 0) outError("Wrong number of rate categories");
-			} else num_rate_cats = -1;
-			site_rate = new NGSRateCat(tree, num_rate_cats);
-			site_rate->setTree(tree);
-		} else if ((posX = rate_str.find("+NGS")) != string::npos) {
-			tree->setLikelihoodKernel(LK_NORMAL);
-			if (rate_str.length() > posX+4 && isdigit(rate_str[posX+4])) {
-				num_rate_cats = convert_int(rate_str.substr(posX+4).c_str());
-				if (num_rate_cats < 0) outError("Wrong number of rate categories");
-			} else num_rate_cats = -1;
-			site_rate = new NGSRate(tree);
-			site_rate->setTree(tree);
+//		} else if ((posX = rate_str.find("+M")) != string::npos) {
+//			tree->setLikelihoodKernel(LK_NORMAL);
+//			params.rate_mh_type = true;
+//			if (rate_str.length() > posX+2 && isdigit(rate_str[posX+2])) {
+//				num_rate_cats = convert_int(rate_str.substr(posX+2).c_str());
+//				if (num_rate_cats < 0) outError("Wrong number of rate categories");
+//			} else num_rate_cats = -1;
+//			if (num_rate_cats >= 0)
+//				site_rate = new RateMeyerDiscrete(num_rate_cats, params.mcat_type, 
+//					params.rate_file, tree, params.rate_mh_type);
+//			else
+//				site_rate = new RateMeyerHaeseler(params.rate_file, tree, params.rate_mh_type);
+//			site_rate->setTree(tree);
+//		} else if ((posX = rate_str.find("+D")) != string::npos) {
+//			tree->setLikelihoodKernel(LK_NORMAL);
+//			params.rate_mh_type = false;
+//			if (rate_str.length() > posX+2 && isdigit(rate_str[posX+2])) {
+//				num_rate_cats = convert_int(rate_str.substr(posX+2).c_str());
+//				if (num_rate_cats < 0) outError("Wrong number of rate categories");
+//			} else num_rate_cats = -1;
+//			if (num_rate_cats >= 0)
+//				site_rate = new RateMeyerDiscrete(num_rate_cats, params.mcat_type, 
+//					params.rate_file, tree, params.rate_mh_type);
+//			else
+//				site_rate = new RateMeyerHaeseler(params.rate_file, tree, params.rate_mh_type);
+//			site_rate->setTree(tree);
+//		} else if ((posX = rate_str.find("+NGS")) != string::npos) {
+//			tree->setLikelihoodKernel(LK_NORMAL);
+//			if (rate_str.length() > posX+4 && isdigit(rate_str[posX+4])) {
+//				num_rate_cats = convert_int(rate_str.substr(posX+4).c_str());
+//				if (num_rate_cats < 0) outError("Wrong number of rate categories");
+//			} else num_rate_cats = -1;
+//			site_rate = new NGSRateCat(tree, num_rate_cats);
+//			site_rate->setTree(tree);
+//		} else if ((posX = rate_str.find("+NGS")) != string::npos) {
+//			tree->setLikelihoodKernel(LK_NORMAL);
+//			if (rate_str.length() > posX+4 && isdigit(rate_str[posX+4])) {
+//				num_rate_cats = convert_int(rate_str.substr(posX+4).c_str());
+//				if (num_rate_cats < 0) outError("Wrong number of rate categories");
+//			} else num_rate_cats = -1;
+//			site_rate = new NGSRate(tree);
+//			site_rate->setTree(tree);
 		} else if ((posX = rate_str.find("+K")) != string::npos) {
 			if (rate_str.length() > posX+2 && isdigit(rate_str[posX+2])) {
 				num_rate_cats = convert_int(rate_str.substr(posX+2).c_str());
@@ -705,80 +704,20 @@ double ModelFactory::initGTRGammaIParameters(RateHeterogeneity *rate, ModelSubst
 }
 
 double ModelFactory::optimizeParametersOnly(double gradient_epsilon) {
-    double logl;
-    if (Params::getInstance().fai && site_rate != NULL && model != NULL) {
-        cout << "Optimize substitutional and site rates with restart ..." << endl;
-        PhyloTree* tree = site_rate->phylo_tree;
-        double initAlpha = 0.1;
-        double maxInitAlpha = 1.0;
-        double alphaStep = 0.1;
-        double bestLogl = -DBL_MAX;
-        double bestAlpha = 0.0;
-        double bestPInvar = 0.0;
-        double initPInvar = site_rate->getPInvar();
-        int numRateEntries = model->getNumRateEntries();
-        double *initRates = new double[numRateEntries];
-        double *bestRates = new double[numRateEntries];
-        model->getRateMatrix(initRates);
-        int numStates = model->num_states;
-        double *initStateFreqs = new double[numStates];
-        model->getStateFrequency(initStateFreqs);
-        double *bestStateFreqs =  new double[numStates];
-        DoubleVector initBranchLengths;
-        DoubleVector bestBranchLengths;
-        tree->saveBranchLengths(initBranchLengths);
-
-        while (initAlpha <= maxInitAlpha) {
-            tree->restoreBranchLengths(initBranchLengths);
-            double initLogl = initGTRGammaIParameters(site_rate, model, initAlpha, initPInvar, initRates, initStateFreqs);
-            if (joint_optimize) {
-                logl = optimizeAllParameters(gradient_epsilon);
-            } else {
-                model->optimizeParameters(gradient_epsilon);
-                site_rate->optimizeParameters(gradient_epsilon);
-                logl = tree->optimizeAllBranches(1);
-            }
-            RateHeterogeneity* rateGammaInvar = site_rate;
-            ModelGTR* modelGTR = (ModelGTR*)(model);
-            double curAlpha = rateGammaInvar->getGammaShape();
-            double curPInvar = rateGammaInvar->getPInvar();
-            if (logl > bestLogl) {
-                bestLogl = logl;
-                bestAlpha = curAlpha;
-                bestPInvar = curPInvar;
-                modelGTR->getRateMatrix(bestRates);
-                modelGTR->getStateFrequency(bestStateFreqs);
-                tree->saveBranchLengths(bestBranchLengths);
-            }
-            if (verbose_mode >= VB_MED) {
-                cout << "Init. alpha = " << initAlpha << " / Init. PInvar = " << initPInvar << " / Init. Logl = " <<
-                initLogl << " / Est. alpha = " << curAlpha
-                << "/ Est. pinv = " << curPInvar << " / Final Logl = " << logl << endl;
-            }
-            initAlpha = initAlpha + alphaStep;
-        }
-        cout << "Best alpha = " << bestAlpha << " / best p_invar = " << bestPInvar << endl;
-        tree->restoreBranchLengths(bestBranchLengths);
-        logl = initGTRGammaIParameters(site_rate, model, bestAlpha, bestPInvar, bestRates, bestStateFreqs);
-        delete [] initRates;
-        delete [] bestRates;
-        delete [] initStateFreqs;
-        delete [] bestStateFreqs;
-    } else {
-        /* Optimize substitutional and heterogeneity rates independetly */
-        if (!joint_optimize) {
-            double model_lh = model->optimizeParameters(gradient_epsilon);
-            double rate_lh = site_rate->optimizeParameters(gradient_epsilon);
-            if (rate_lh == 0.0)
-                logl = model_lh;
-            else
-                logl = rate_lh;
-        } else {
-            /* Optimize substitutional and heterogeneity rates jointly using BFGS */
-            logl = optimizeAllParameters(gradient_epsilon);
-        }
-    }
-    return logl;
+	double logl;
+	/* Optimize substitution and heterogeneity rates independently */
+	if (!joint_optimize) {
+		double model_lh = model->optimizeParameters(gradient_epsilon);
+		double rate_lh = site_rate->optimizeParameters(gradient_epsilon);
+		if (rate_lh == 0.0)
+			logl = model_lh;
+		else
+			logl = rate_lh;
+	} else {
+		/* Optimize substitution and heterogeneity rates jointly using BFGS */
+		logl = optimizeAllParameters(gradient_epsilon);
+	}
+	return logl;
 }
 
 double ModelFactory::optimizeAllParameters(double gradient_epsilon) {
@@ -830,27 +769,20 @@ double ModelFactory::optimizeAllParameters(double gradient_epsilon) {
 }
 
 double ModelFactory::optimizeParametersGammaInvar(bool fixed_len, bool write_info, double logl_epsilon, double gradient_epsilon) {
-	PhyloTree *tree = site_rate->getTree();
-
-	RateHeterogeneity* site_rates = tree->getRate();
-	if (site_rates == NULL) {
-//		outError("The model must be +I+G");
-        // model is not +I+G, call conventional function instead
-		return optimizeParameters(fixed_len, write_info, logl_epsilon, gradient_epsilon);
-	}
-
+    if (!site_rate->isGammai())
+        return optimizeParameters(fixed_len, write_info, logl_epsilon, gradient_epsilon);
+        
+	double begin_time = getRealTime();
+        
+    PhyloTree *tree = site_rate->getTree();
 	double frac_const = tree->aln->frac_const_sites;
-	if (fixed_len) {
-		tree->setCurScore(tree->computeLikelihood());
-	} else {
-		tree->optimizeAllBranches(1);
-	}
-
+    tree->setCurScore(tree->computeLikelihood());
 
 	/* Back up branch lengths and substitutional rates */
-	DoubleVector lenvec;
+	DoubleVector initBranLens;
 	DoubleVector bestLens;
-	tree->saveBranchLengths(lenvec);
+	tree->saveBranchLengths(initBranLens);
+    bestLens = initBranLens;
 	int numRateEntries = tree->getModel()->getNumRateEntries();
 	double *rates = new double[numRateEntries];
 	double *bestRates = new double[numRateEntries];
@@ -861,64 +793,100 @@ double ModelFactory::optimizeParametersGammaInvar(bool fixed_len, bool write_inf
 
 	/* Best estimates found */
 	double *bestStateFreqs =  new double[numStates];
-	double bestLogl = tree->getCurScore();
+	double bestLogl = -DBL_MAX;
 	double bestAlpha = 0.0;
 	double bestPInvar = 0.0;
 
-	double testInterval = (frac_const - MIN_PINVAR*2) / 10;
+	double testInterval = (frac_const - MIN_PINVAR * 2) / 9;
 	double initPInv = MIN_PINVAR;
-	double initAlpha = site_rates->getGammaShape();
+	double initAlpha = site_rate->getGammaShape();
+
+    if (Params::getInstance().opt_gammai_fast) {
+        initPInv = frac_const/2;
+        bool stop = false;
+        while(!stop) {
+            if (write_info) {
+                cout << endl;
+                cout << "Testing with init. pinv = " << initPInv << " / init. alpha = "  << initAlpha << endl;
+            }
+
+            vector<double> estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon, tree, site_rate, rates, state_freqs,
+                                                                   initPInv, initAlpha, initBranLens);
 
-    if (write_info)
-        cout << "testInterval: " << testInterval << endl;
 
-	// Now perform testing different inital p_inv values
-	while (initPInv <= frac_const) {
-        if (write_info) {
-            cout << endl;
-            cout << "Testing with init. pinv = " << initPInv << " / init. alpha = "  << initAlpha << endl;
+            if (write_info) {
+                cout << "Est. p_inv: " << estResults[0] << " / Est. gamma shape: " << estResults[1]
+                << " / Logl: " << estResults[2] << endl;
+            }
+
+            if (estResults[2] > bestLogl) {
+                bestLogl = estResults[2];
+                bestAlpha = estResults[1];
+                bestPInvar = estResults[0];
+                bestLens.clear();
+                tree->saveBranchLengths(bestLens);
+                tree->getModel()->getRateMatrix(bestRates);
+                tree->getModel()->getStateFrequency(bestStateFreqs);
+                if (estResults[0] < initPInv) {
+                    initPInv = estResults[0] - testInterval;
+                    if (initPInv < 0.0)
+                        initPInv = 0.0;
+                } else {
+                    initPInv = estResults[0] + testInterval;
+                    if (initPInv > frac_const)
+                        initPInv = frac_const;
+                }
+                //cout << "New initPInv = " << initPInv << endl;
+            }  else {
+                stop = true;
+            }
         }
-		tree->restoreBranchLengths(lenvec);
-		((ModelGTR*) tree->getModel())->setRateMatrix(rates);
-		((ModelGTR*) tree->getModel())->setStateFrequency(state_freqs);
-		tree->getModel()->decomposeRateMatrix();
-		site_rates->setPInvar(initPInv);
-		site_rates->setGammaShape(initAlpha);
-		tree->clearAllPartialLH();
-		optimizeParameters(fixed_len, write_info, logl_epsilon, gradient_epsilon);
-		double estAlpha = tree->getRate()->getGammaShape();
-		double estPInv = tree->getRate()->getPInvar();
-		double logl = tree->getCurScore();
-        if (write_info) {
-            cout << "Est. alpha: " << estAlpha << " / Est. pinv: " << estPInv
-            << " / Logl: " << logl << endl;
+    } else {
+        // Now perform testing different initial p_inv values
+        while (initPInv <= frac_const) {
+            if (write_info) {
+                cout << endl;
+                cout << "Testing with init. pinv = " << initPInv << " / init. alpha = "  << initAlpha << endl;
+            }
+            vector<double> estResults; // vector of p_inv, alpha and logl
+            if (Params::getInstance().opt_gammai_keep_bran)
+                estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon, tree, site_rate, rates, state_freqs,
+                                                                          initPInv, initAlpha, bestLens);
+            else
+                estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon, tree, site_rate, rates, state_freqs,
+                                                                      initPInv, initAlpha, initBranLens);
+            if (write_info) {
+                cout << "Est. p_inv: " << estResults[0] << " / Est. gamma shape: " << estResults[1]
+                << " / Logl: " << estResults[2] << endl;
+            }
+
+            initPInv = initPInv + testInterval;
+
+            if (estResults[2] > bestLogl + logl_epsilon) {
+                bestLogl = estResults[2];
+                bestAlpha = estResults[1];
+                bestPInvar = estResults[0];
+                bestLens.clear();
+                tree->saveBranchLengths(bestLens);
+                tree->getModel()->getRateMatrix(bestRates);
+                tree->getModel()->getStateFrequency(bestStateFreqs);
+            }
         }
-		initPInv = initPInv + testInterval;
-
-		if (tree->getCurScore() > bestLogl) {
-			bestLogl = logl;
-			bestAlpha = estAlpha;
-			bestPInvar = estPInv;
-			bestLens.clear();
-			tree->saveBranchLengths(bestLens);
-			tree->getModel()->getRateMatrix(bestRates);
-			tree->getModel()->getStateFrequency(bestStateFreqs);
-		}
-	}
+    }
 
-	site_rates->setGammaShape(bestAlpha);
-//	site_rates->setFixGammaShape(false);
-	site_rates->setPInvar(bestPInvar);
-//	site_rates->setFixPInvar(false);
+    site_rate->setGammaShape(bestAlpha);
+    site_rate->setPInvar(bestPInvar);
 	((ModelGTR*) tree->getModel())->setRateMatrix(bestRates);
 	((ModelGTR*) tree->getModel())->setStateFrequency(bestStateFreqs);
 	tree->restoreBranchLengths(bestLens);
 	tree->getModel()->decomposeRateMatrix();
+
 	tree->clearAllPartialLH();
 	tree->setCurScore(tree->computeLikelihood());
+    assert(fabs(tree->getCurScore() - bestLogl) < 1.0);
     if (write_info) {    
         cout << endl;
-        cout << "Best initial alpha: " << bestAlpha << " / initial pinv: " << bestPInvar << " / ";
+        cout << "Best p_inv: " << bestPInvar << " / best gamma shape: " << bestAlpha << " / ";
         cout << "Logl: " << tree->getCurScore() << endl;
     }
 
@@ -927,6 +895,10 @@ double ModelFactory::optimizeParametersGammaInvar(bool fixed_len, bool write_inf
 	delete [] bestRates;
 	delete [] bestStateFreqs;
     
+	double elapsed_secs = getRealTime() - begin_time;
+	if (write_info)
+		cout << "Parameters optimization took " << elapsed_secs << " sec" << endl;
+    
     // updating global variable is not safe!
 //	Params::getInstance().testAlpha = false;
     
@@ -934,6 +906,29 @@ double ModelFactory::optimizeParametersGammaInvar(bool fixed_len, bool write_inf
     return tree->getCurScore();
 }
 
+vector<double> ModelFactory::optimizeGammaInvWithInitValue(bool fixed_len, double logl_epsilon, double gradient_epsilon,
+                                                 PhyloTree *tree, RateHeterogeneity *site_rates, double *rates,
+                                                 double *state_freqs, double initPInv, double initAlpha,
+                                                 DoubleVector &lenvec) {
+    tree->restoreBranchLengths(lenvec);
+    ((ModelGTR*) tree->getModel())->setRateMatrix(rates);
+    ((ModelGTR*) tree->getModel())->setStateFrequency(state_freqs);
+    tree->getModel()->decomposeRateMatrix();
+    site_rates->setPInvar(initPInv);
+    site_rates->setGammaShape(initAlpha);
+    tree->clearAllPartialLH();
+    optimizeParameters(fixed_len, false, logl_epsilon, gradient_epsilon);
+
+    vector<double> estResults;
+    double estPInv = tree->getRate()->getPInvar();
+    double estAlpha = tree->getRate()->getGammaShape();
+    double logl = tree->getCurScore();
+    estResults.push_back(estPInv);
+    estResults.push_back(estAlpha);
+    estResults.push_back(logl);
+    return estResults;
+}
+
 
 double ModelFactory::optimizeParameters(bool fixed_len, bool write_info,
                                         double logl_epsilon, double gradient_epsilon) {
@@ -948,19 +943,9 @@ double ModelFactory::optimizeParameters(bool fixed_len, bool write_info,
 	assert(tree);
 
 	stopStoringTransMatrix();
-        // modified by Thomas Wong on Sept 11, 15
-        // no optimization of branch length in the first round
-        cur_lh = tree->computeLikelihood();
-        /*
-	if (fixed_len || tree->params->num_param_iterations == 0)
-		cur_lh = tree->computeLikelihood();
-	else {
-        if (!Params::getInstance().testAlpha && !Params::getInstance().fai)
-		    cur_lh = tree->optimizeAllBranches(1);
-        else
-            cur_lh = tree->computeLikelihood();
-	}
-        */
+    // modified by Thomas Wong on Sept 11, 15
+    // no optimization of branch length in the first round
+    cur_lh = tree->computeLikelihood();
     tree->setCurScore(cur_lh);
 	if (verbose_mode >= VB_MED || write_info) 
 		cout << "1. Initial log-likelihood: " << cur_lh << endl;
@@ -977,31 +962,11 @@ double ModelFactory::optimizeParameters(bool fixed_len, bool write_info,
 	//bool optimize_rate = true;
 //	double gradient_epsilon = min(logl_epsilon, 0.01); // epsilon for parameters starts at epsilon for logl
 	for (i = 2; i < tree->params->num_param_iterations; i++) {
-//        if (gradient_epsilon < 0.001)
-//            gradient_epsilon = 0.001;
-		/*
-		double model_lh = model->optimizeParameters(param_epsilon);
-		double rate_lh = 0.0;
-		if (optimize_rate) {
-			rate_lh = site_rate->optimizeParameters(param_epsilon);
-			if (rate_lh < model_lh+1e-6 && model_lh != 0.0) optimize_rate = false;
-		}
-		if (model_lh == 0.0 && rate_lh == 0.0) {
-			if (!fixed_len) cur_lh = tree->optimizeAllBranches(100, logl_epsilon);
-			break;
-		}
-		double new_lh = (rate_lh != 0.0) ? rate_lh : model_lh;
-		*/
         double new_lh;
 
-        if (Params::getInstance().fai && i > 2) {
-            Params::getInstance().fai = false;
-        }
-
-                // changed to opimise edge length first, and then Q,W,R inside the loop by Thomas on Sept 11, 15
+        // changed to opimise edge length first, and then Q,W,R inside the loop by Thomas on Sept 11, 15
 		if (!fixed_len)
 			new_lh = tree->optimizeAllBranches(min(i,3), logl_epsilon);  // loop only 3 times in total (previously in v0.9.6 5 times)
-
         new_lh = optimizeParametersOnly(gradient_epsilon);
 
 		if (new_lh == 0.0) {
@@ -1013,18 +978,6 @@ double ModelFactory::optimizeParameters(bool fixed_len, bool write_info,
 			site_rate->writeInfo(cout);
 		}
 		if (new_lh > cur_lh + logl_epsilon) {
-			if (Params::getInstance().testAlpha && Params::getInstance().testAlphaEpsAdaptive) {
-				if (i == 3) {
-					double newEpsilon = (new_lh - cur_lh) * 0.01;
-					if (newEpsilon > defaultEpsilon) {
-						logl_epsilon = newEpsilon;
-						cout << "Estimate model parameters with new epsilon = " << logl_epsilon << endl;
-					}
-				}
-			}
-
-//			if (gradient_epsilon > (new_lh - cur_lh) * logl_epsilon)
-//				gradient_epsilon = (new_lh - cur_lh) * logl_epsilon;
 			cur_lh = new_lh;
 			if (verbose_mode >= VB_MED || write_info)
 				cout << i << ". Current log-likelihood: " << cur_lh << endl;
@@ -1036,12 +989,17 @@ double ModelFactory::optimizeParameters(bool fixed_len, bool write_info,
 	}
 
 	// normalize rates s.t. branch lengths are #subst per site
-    double mean_rate = site_rate->rescaleRates();
-    if (mean_rate != 1.0) {
-		tree->scaleLength(mean_rate);
-		tree->clearAllPartialLH();
+//    if (Params::getInstance().optimize_alg_gammai != "EM") 
+    {
+        double mean_rate = site_rate->rescaleRates();
+        if (mean_rate != 1.0) {
+            tree->scaleLength(mean_rate);
+            tree->clearAllPartialLH();
+        }
     }
     
+
+    
 	if (verbose_mode >= VB_MED || write_info)
 		cout << "Optimal log-likelihood: " << cur_lh << endl;
 
diff --git a/model/modelfactory.h b/model/modelfactory.h
index baedb6e..842d69c 100644
--- a/model/modelfactory.h
+++ b/model/modelfactory.h
@@ -166,7 +166,7 @@ public:
 		@return the best likelihood 
 	*/
 	virtual double optimizeParameters(bool fixed_len = false, bool write_info = true,
-                                      double logl_epsilon = 0.1, double gradient_epsilon = 0.001);
+                                      double logl_epsilon = 0.1, double gradient_epsilon = 0.0001);
 
 	/**
 	 *  optimize model parameters and tree branch lengths for the +I+G model
@@ -174,7 +174,8 @@ public:
 	 * 	@param fixed_len TRUE to fix branch lengths, default is false
 	 *	@return the best likelihood
 	 */
-	virtual double optimizeParametersGammaInvar(bool fixed_len = false, bool write_info = true, double logl_epsilon = 0.1, double gradient_epsilon = 0.001);
+	virtual double optimizeParametersGammaInvar(bool fixed_len = false, bool write_info = true,
+												double logl_epsilon = 0.1, double gradient_epsilon = 0.001);
 
 	/**
 	 * @return TRUE if parameters are at the boundary that may cause numerical unstability
@@ -259,6 +260,9 @@ protected:
 	*/
 	virtual bool getVariables(double *variables);
 
+    vector<double> optimizeGammaInvWithInitValue(bool fixed_len, double logl_epsilon, double gradient_epsilon, PhyloTree *tree,
+                                       RateHeterogeneity *site_rates, double *rates, double *state_freqs,
+                                       double initPInv, double initAlpha, DoubleVector &lenvec);
 };
 
 #endif
diff --git a/model/modelgtr.cpp b/model/modelgtr.cpp
index 557a291..7a7b27d 100644
--- a/model/modelgtr.cpp
+++ b/model/modelgtr.cpp
@@ -55,9 +55,9 @@ ModelGTR::ModelGTR(PhyloTree *tree, bool count_rates)
 		
 	eigen_coeff = aligned_alloc<double>(ncoeff);
 
-	if (count_rates) 
-		phylo_tree->aln->computeEmpiricalRate(rates);
-	else
+//	if (count_rates) 
+//		computeEmpiricalRate();
+//	else
 		for (i=0; i < nrate; i++) rates[i] = 1.0;
 	//eigen_coeff_derv1 = new double[ncoeff];
 	//eigen_coeff_derv2 = new double[ncoeff];
@@ -558,7 +558,7 @@ double ModelGTR::optimizeParameters(double gradient_epsilon) {
 	
 	// return if nothing to be optimized
 	if (ndim == 0) return 0.0;
-
+    
 	if (verbose_mode >= VB_MAX)
 		cout << "Optimizing " << name << " model parameters..." << endl;
 
diff --git a/model/modelgtr.h b/model/modelgtr.h
index c26bc52..a237168 100644
--- a/model/modelgtr.h
+++ b/model/modelgtr.h
@@ -50,7 +50,7 @@ public:
 		@param tree associated tree for the model
 	*/
     ModelGTR(PhyloTree *tree, bool count_rates = true);
-	
+
 	/**
 		init the model and decompose the rate matrix. This function should always be called
 		after creating the class. Otherwise it will not work properly.
diff --git a/model/modelmixture.cpp b/model/modelmixture.cpp
index 2923b3a..0381513 100644
--- a/model/modelmixture.cpp
+++ b/model/modelmixture.cpp
@@ -721,7 +721,7 @@ frequency Fclass1 = 0.02549352 0.01296012 0.005545202 0.006005566 0.01002193 0.0
 frequency Fclass2 = 0.09596966 0.008786096 0.02805857 0.01880183 0.005026264 0.006454635 0.01582725 0.7215719 0.003379354 0.002257725 0.003013483 0.01343441 0.001511657 0.002107865 0.006751404 0.04798539 0.01141559 0.000523736 0.002188483 0.004934972;\n\
 frequency Fclass3 = 0.01726065 0.005467988 0.01092937 0.3627871 0.001046402 0.01984758 0.5149206 0.004145081 0.002563289 0.002955213 0.005286931 0.01558693 0.002693098 0.002075771 0.003006167 0.01263069 0.01082144 0.000253451 0.001144787 0.004573568;\n\
 frequency Fclass4 = 0.1263139 0.09564027 0.07050061 0.03316681 0.02095119 0.05473468 0.02790523 0.009007538 0.03441334 0.005855319 0.008061884 0.1078084 0.009019514 0.05018693 0.07948 0.09447839 0.09258897 0.01390669 0.05367769 0.01230413;\n\
-frequency CF4 = FMIX{empirical,Fclass1,Fclass2,Fclass3,Fclass4};\n\
+model CF4 = POISSON+FMIX{empirical,Fclass1,Fclass2,Fclass3,Fclass4}+G;\n\
 model JTTCF4G = JTT+FMIX{empirical,Fclass1,Fclass2,Fclass3,Fclass4}+G;\n\
 \n\
 [ ---------------------------------------------------------\n\
diff --git a/model/modelnonrev.cpp b/model/modelnonrev.cpp
index 315c43b..f63b155 100644
--- a/model/modelnonrev.cpp
+++ b/model/modelnonrev.cpp
@@ -27,9 +27,9 @@ ModelNonRev::ModelNonRev(PhyloTree *tree, bool count_rates)
     delete [] rates;
     rates = new double [num_params+1];
     memset(rates, 0, sizeof(double) * (num_params+1));
-	if (count_rates)
-		phylo_tree->aln->computeEmpiricalRateNonRev(rates);
-	else 
+//	if (count_rates)
+//		computeEmpiricalRate();
+//	else 
 		for (int i = 0; i <= num_params; i++) 
 			rates[i] = 1.0;
     name = "UNREST";
diff --git a/model/ratefree.cpp b/model/ratefree.cpp
index 3265a2b..4e84f81 100644
--- a/model/ratefree.cpp
+++ b/model/ratefree.cpp
@@ -87,7 +87,7 @@ void RateFree::setNCategory(int ncat) {
 
     int i;
 	for (i = 0; i < ncategory; i++)
-        prop[i] = 1.0/ncategory;
+        prop[i] = (1.0-getPInvar())/ncategory;
     
 //	double sum_prop = (ncategory)*(ncategory+1)/2.0;
 //	double sum = 0.0;
@@ -110,6 +110,7 @@ void RateFree::setNCategory(int ncat) {
 
 void RateFree::setRateAndProp(RateFree *input) {
     assert(input->ncategory == ncategory-1);
+    setPInvar(input->getPInvar());
     int k = 0, i;
     // get the category k with largest proportion
     for (i = 1; i < ncategory-1; i++)
@@ -272,7 +273,7 @@ void RateFree::setBounds(double *lower_bound, double *upper_bound, bool *bound_c
         }
     } else if (optimizing_params == 1){
         // rates
-        for (i = 1; i <= ncategory; i++) {
+        for (i = 1; i < ncategory; i++) {
             lower_bound[i] = MIN_FREE_RATE;
             upper_bound[i] = MAX_FREE_RATE;
             bound_check[i] = false;
@@ -471,7 +472,7 @@ double RateFree::optimizeWithEM() {
     model_fac->site_rate = site_rate;
     tree->model_factory = model_fac;
     tree->setParams(phylo_tree->params);
-        
+    double old_score = 0.0;
     // EM algorithm loop described in Wang, Li, Susko, and Roger (2008)
     for (int step = 0; step < ncategory; step++) {
         // first compute _pattern_lh_cat
@@ -482,13 +483,19 @@ double RateFree::optimizeWithEM() {
             writeInfo(cout);
         }
         assert(score < 0);
+        
+        if (step > 0)
+            assert(score > old_score-0.1);
+            
+        old_score = score;
+        
         memset(new_prop, 0, nmix*sizeof(double));
                 
         // E-step
         // decoupled weights (prop) from _pattern_lh_cat to obtain L_ci and compute pattern likelihood L_i
         for (ptn = 0; ptn < nptn; ptn++) {
             double *this_lk_cat = phylo_tree->_pattern_lh_cat + ptn*nmix;
-            double lk_ptn = 0.0;
+            double lk_ptn = phylo_tree->ptn_invar[ptn];
             for (c = 0; c < nmix; c++) {
                 lk_ptn += this_lk_cat[c];
             }
@@ -505,6 +512,7 @@ double RateFree::optimizeWithEM() {
         
         // M-step, update weights according to (*)        
         int maxpropid = 0;
+        double new_pinvar = 0.0;    
         for (c = 0; c < nmix; c++) {
             new_prop[c] = new_prop[c] / phylo_tree->getAlnNSite();
             if (new_prop[c] > new_prop[maxpropid])
@@ -530,9 +538,19 @@ double RateFree::optimizeWithEM() {
             sum_prop += new_prop[c];
             converged = converged && (fabs(prop[c]-new_prop[c]) < 1e-4);
             prop[c] = new_prop[c];
+            new_pinvar += new_prop[c];
         }
 
-        assert(fabs(sum_prop-1.0) < MIN_PROP);
+        new_pinvar = 1.0 - new_pinvar;
+
+        if (new_pinvar != 0.0) {
+            converged = converged && (fabs(getPInvar()-new_pinvar) < 1e-4);
+            setPInvar(new_pinvar);
+//            setOptimizePInvar(false);
+            phylo_tree->computePtnInvar();
+        }
+        
+        assert(fabs(sum_prop+new_pinvar-1.0) < MIN_PROP);
         
         // now optimize rates one by one
         double sum = 0.0;
diff --git a/model/ratefreeinvar.cpp b/model/ratefreeinvar.cpp
index 0ec731c..b70ebb4 100644
--- a/model/ratefreeinvar.cpp
+++ b/model/ratefreeinvar.cpp
@@ -13,6 +13,7 @@ RateFreeInvar::RateFreeInvar(int ncat, double start_alpha, string params, bool s
 	cur_optimize = 0;
 	name = "+I" + name;
 	full_name = "Invar+" + full_name;
+    setNCategory(ncat);
 }
 
 void RateFreeInvar::saveCheckpoint() {
diff --git a/model/ratefreeinvar.h b/model/ratefreeinvar.h
index cb1422c..fbd912c 100644
--- a/model/ratefreeinvar.h
+++ b/model/ratefreeinvar.h
@@ -42,7 +42,7 @@ public:
 		@param category category ID from 0 to #category-1
 		@return the proportion of the specified category
 	*/
-	virtual double getProp(int category) { return (1.0-p_invar)*prop[category]; }
+	virtual double getProp(int category) { return prop[category]; }
 
 	/**
 		get the rate of a specified category. Default returns 1.0 since it is homogeneous model
diff --git a/model/rategamma.cpp b/model/rategamma.cpp
index 48e0814..030abdc 100644
--- a/model/rategamma.cpp
+++ b/model/rategamma.cpp
@@ -1,6 +1,8 @@
 /***************************************************************************
- *   Copyright (C) 2009 by BUI Quang Minh   *
- *   minh.bui at univie.ac.at   *
+ *   Copyright (C) 2009-2015 by                                            *
+ *   BUI Quang Minh <minh.bui at univie.ac.at>                                *
+ *   Lam-Tung Nguyen <nltung at gmail.com>                                    *
+ *                                                                         *
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
  *   it under the terms of the GNU General Public License as published by  *
@@ -68,6 +70,8 @@ void RateGamma::setNCategory(int ncat) {
 	ncategory = ncat;
 	if (rates) delete [] rates;
 	rates = new double[ncategory];
+    for (int cat = 0; cat < ncategory; cat++)
+        rates[cat] = 1.0;
 	name = "+G" + convertIntToString(ncategory);
 	full_name = "Gamma with " + convertIntToString(ncategory) + " categories";
 	computeRates();
@@ -95,6 +99,10 @@ void RateGamma::computeRates() {
 		return;
 	}
 
+    double curScale = 0.0;
+    for (cat = 0; cat < ncategory; cat++)
+        curScale += rates[cat];
+
 	if (!cut_median) {
 		computeRatesMean();
 	} else {
@@ -119,10 +127,21 @@ void RateGamma::computeRates() {
 	if (phylo_tree && phylo_tree->params && phylo_tree->params->no_rescale_gamma_invar)
 		return;
 
+    double newScale = 0.0;
+    for (cat = 0; cat < ncategory; cat++)
+        newScale += rates[cat];
+
+    if (newScale != curScale) {
+        for (cat = 0; cat < ncategory; cat++)
+            rates[cat] *= curScale/newScale;    
+    }
+
 	/* if invariable sites are present */
-	double p_inv = getPInvar();
-	for (cat = 0; cat < ncategory; cat++)
-		rates[cat] = rates[cat]/(1.0 - p_inv);
+//	if (Params::getInstance().optimize_alg_gammai != "EM") {
+//		double p_inv = getPInvar();
+//		for (cat = 0; cat < ncategory; cat++)
+//			rates[cat] = rates[cat]/(1.0 - p_inv);
+//	}
 
 	/* check for very small rates */
 //	for (cat = 0; cat < ncategory; cat ++)
diff --git a/model/rategammainvar.cpp b/model/rategammainvar.cpp
index 2b0641d..70ebaa7 100644
--- a/model/rategammainvar.cpp
+++ b/model/rategammainvar.cpp
@@ -1,6 +1,8 @@
 /***************************************************************************
- *   Copyright (C) 2009 by BUI Quang Minh   *
- *   minh.bui at univie.ac.at   *
+ *   Copyright (C) 2009-2015 by                                            *
+ *   BUI Quang Minh <minh.bui at univie.ac.at>                                *
+ *   Lam-Tung Nguyen <nltung at gmail.com>                                    *
+ *                                                                         *
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
  *   it under the terms of the GNU General Public License as published by  *
@@ -20,15 +22,25 @@
 #include "rategammainvar.h"
 
 RateGammaInvar::RateGammaInvar(int ncat, double shape, bool median,
-		double p_invar_sites, bool simultaneous, PhyloTree *tree) :
+		double p_invar_sites, string optimize_alg, PhyloTree *tree, bool testParamDone) :
 		RateInvar(p_invar_sites, tree), RateGamma(ncat, shape, median, tree) {
 	name = "+I" + name;
 	full_name = "Invar+" + full_name;
-	joint_optimize = simultaneous;
+    this->optimize_alg = optimize_alg;
     cur_optimize = 0;
+    this->testParamDone = testParamDone;
+    for (int cat = 0; cat < ncategory; cat++)
+        rates[cat] = 1.0/(1.0-p_invar);
 	computeRates();
 }
 
+void RateGammaInvar::setPInvar(double pInvar) {
+    p_invar = pInvar;
+    for (int cat = 0; cat < ncategory; cat++)
+        rates[cat] = 1.0/(1.0-p_invar);
+    computeRates();
+}
+
 void RateGammaInvar::saveCheckpoint() {
     checkpoint->startStruct("RateGammaInvar");
 //    CKP_SAVE(joint_optimize);
@@ -48,6 +60,8 @@ void RateGammaInvar::restoreCheckpoint() {
 
 void RateGammaInvar::setNCategory(int ncat) {
 	RateGamma::setNCategory(ncat);
+    for (int cat = 0; cat < ncategory; cat++)
+        rates[cat] = 1.0/(1.0-p_invar);
 	name = "+I" + name;
 	full_name = "Invar+" + full_name;
 	computeRates();
@@ -60,8 +74,11 @@ string RateGammaInvar::getNameParams() {
 double RateGammaInvar::computeFunction(double value) {
 	if (cur_optimize == 0)
 		gamma_shape = value;
-	else
+	else {
 		p_invar = value;
+        for (int cat = 0; cat < ncategory; cat++)
+            rates[cat] = 1.0/(1.0-p_invar);
+    }
 	// need to compute rates again if p_inv or Gamma shape changes!
 	computeRates();
 	phylo_tree->clearAllPartialLH();
@@ -114,79 +131,53 @@ double RateGammaInvar::optimizeParameters(double gradient_epsilon) {
 	if (ndim == 0)
 		return phylo_tree->computeLikelihood();
 
+    if (verbose_mode >= VB_MED)
+        cout << "Optimizing " << name << " model parameters by " << optimize_alg << " algorithm..." << endl;
 
-	if (!joint_optimize) {
+	if (optimize_alg.find("EM_RR") != string::npos) {
+        return randomRestartOptimization(gradient_epsilon);
+    } else if (optimize_alg.find("Brent") != string::npos) {
 		double lh = phylo_tree->computeLikelihood();
 		cur_optimize = 0;
-		double gamma_lh;
-		if (Params::getInstance().testAlpha) {
-			gamma_lh = RateGamma::optimizeParameters(gradient_epsilon, 0.05, 10);
-		} else {
-            gamma_lh = RateGamma::optimizeParameters(gradient_epsilon);
-        }
+		double gamma_lh = RateGamma::optimizeParameters(gradient_epsilon);
 		assert(gamma_lh >= lh-0.1);
 		cur_optimize = 1;
 		double invar_lh = -DBL_MAX;
         invar_lh = RateInvar::optimizeParameters(gradient_epsilon);
 		assert(invar_lh >= gamma_lh-0.1);
-		//lh = tree_lh;
-
-		//assert(gamma_lh >= invar_lh - 0.1);
-//		phylo_tree->clearAllPartialLH();
-//		return gamma_lh;
         cur_optimize = 0;
         return invar_lh;
-	}
-
-/*
-	if (!joint_optimize) {
-//		double lh = phylo_tree->computeLikelihood();
-		cur_optimize = 1;
-		double invar_lh = -DBL_MAX;
-		invar_lh = RateInvar::optimizeParameters(gradient_epsilon);
-//		assert(tree_lh >= lh-0.1);
-//		lh = tree_lh;
-		cur_optimize = 0;
-		double gamma_lh;
-		if (Params::getInstance().testAlpha) {
-			gamma_lh = RateGamma::optimizeParameters(gradient_epsilon, 0.05, 10);
-		} else {
-			gamma_lh = RateGamma::optimizeParameters(gradient_epsilon);
-		}
-		//assert(gamma_lh >= invar_lh - 0.1);
-		phylo_tree->clearAllPartialLH();
-		return gamma_lh;
-	}
-*/
-
-	if (verbose_mode >= VB_MAX)
-		cout << "Optimizing " << name << " model parameters by BFGS..." << endl;
-
-	//if (freq_type == FREQ_ESTIMATE) scaleStateFreq(false);
-
-	double *variables = new double[ndim+1];
-	double *upper_bound = new double[ndim+1];
-	double *lower_bound = new double[ndim+1];
-	bool *bound_check = new bool[ndim+1];
-	double score;
-
-	// by BFGS algorithm
-	setVariables(variables);
-	setBounds(lower_bound, upper_bound, bound_check);
-
-	score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(gradient_epsilon, TOL_GAMMA_SHAPE));
-
-	getVariables(variables);
-
-	phylo_tree->clearAllPartialLH();
-    score = phylo_tree->computeLikelihood();
-
-	delete [] bound_check;
-	delete [] lower_bound;
-	delete [] upper_bound;
-	delete [] variables;
-
-	return score;
+	} else if (optimize_alg.find("EM") != string::npos) {
+        return optimizeWithEM(gradient_epsilon);
+    } else if (optimize_alg.find("BFGS") != string::npos) {
+        //if (freq_type == FREQ_ESTIMATE) scaleStateFreq(false);
+        double *variables = new double[ndim+1];
+        double *upper_bound = new double[ndim+1];
+        double *lower_bound = new double[ndim+1];
+        bool *bound_check = new bool[ndim+1];
+        double score;
+
+        // by BFGS algorithm
+        setVariables(variables);
+        setBounds(lower_bound, upper_bound, bound_check);
+
+        score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(gradient_epsilon, TOL_GAMMA_SHAPE));
+
+        getVariables(variables);
+
+        phylo_tree->clearAllPartialLH();
+        score = phylo_tree->computeLikelihood();
+
+        delete [] bound_check;
+        delete [] lower_bound;
+        delete [] upper_bound;
+        delete [] variables;
+
+        return score;
+    } else {
+        string errMsg = "Unknown optimization algorithm: " + optimize_alg;
+        outError(errMsg.c_str());
+    }
 }
 
 
@@ -219,3 +210,115 @@ int RateGammaInvar::computePatternRates(DoubleVector &pattern_rates, IntVector &
     return ncategory+1;
 }
 
+double RateGammaInvar::optimizeWithEM(double gradient_epsilon) {
+    double curlh = phylo_tree->computeLikelihood();
+
+    cur_optimize = 0;
+    double gamma_lh = RateGamma::optimizeParameters(gradient_epsilon);
+    assert(gamma_lh > curlh - 1.0);
+    curlh = gamma_lh;
+
+    size_t ncat = getNRate();
+    size_t nptn = phylo_tree->aln->getNPattern();
+    size_t nSites = phylo_tree->aln->getNSite();
+
+    // Compute the pattern likelihood for each category (invariable and variable category)
+    phylo_tree->computePatternLhCat(WSL_RATECAT);
+    phylo_tree->computePtnInvar();
+
+    double ppInvar = 0;
+    for (size_t ptn = 0; ptn < nptn; ptn++) {
+        double *this_lk_cat = phylo_tree->_pattern_lh_cat + ptn * ncat;
+        double lk_ptn = phylo_tree->ptn_invar[ptn];
+        for (size_t cat = 0; cat < ncat; cat++) {
+            lk_ptn += this_lk_cat[cat];
+        }
+        assert(lk_ptn != 0.0);
+        ppInvar += (phylo_tree->ptn_invar[ptn]) * phylo_tree->ptn_freq[ptn] / lk_ptn;
+    }
+
+    double newPInvar = ppInvar / nSites;
+    assert(newPInvar < 1.0);
+    //double curPInv = getPInvar();
+//    setPInvar(newPInvar);
+    p_invar = newPInvar;
+    phylo_tree->clearAllPartialLH();
+//    phylo_tree->scaleLength((1-newPInvar)/(1-curPInv));
+    double pinvLH = phylo_tree->computeLikelihood();
+    assert(pinvLH > curlh - 1.0);
+    return pinvLH;
+}
+
+double RateGammaInvar::randomRestartOptimization(double gradient_epsilon) {
+    if (testParamDone) {
+        return optimizeWithEM(gradient_epsilon);
+    }
+    double frac_const = phylo_tree->aln->frac_const_sites;
+    double bestLogl = phylo_tree->getCurScore();
+    double bestAlpha = 0.0;
+    double bestPInvar = 0.0;
+    double testInterval = (frac_const - MIN_PINVAR*2) / 10;
+    double initPInv = MIN_PINVAR;
+    double initAlpha = getGammaShape();
+
+    while (initPInv <= frac_const) {
+        if (verbose_mode >= VB_MED) {
+            cout << endl;
+            cout << "Testing with init. pinv = " << initPInv << " / init. alpha = " << initAlpha << endl;
+        }
+        setPInvar(initPInv);
+        setGammaShape(initAlpha);
+        phylo_tree->clearAllPartialLH();
+        double logl = optimizeWithEM(gradient_epsilon);
+        double estAlpha = getGammaShape();
+        double estPInv = getPInvar();
+
+        if (verbose_mode >= VB_MED) {
+            cout << "Est. alpha: " << estAlpha << " / Est. pinv: " << estPInv
+            << " / Logl: " << logl << endl;
+        }
+
+        initPInv = initPInv + testInterval;
+
+        if (logl > bestLogl) {
+            bestLogl = logl;
+            bestAlpha = estAlpha;
+            bestPInvar = estPInv;
+        }
+    }
+
+    if (verbose_mode >= VB_MED) {
+        cout << "Best gamma shape: " << bestAlpha << " / best p_inv: " << bestPInvar
+        << " / logl: " << bestLogl << endl;
+    }
+
+    setPInvar(bestPInvar);
+    setGammaShape(bestAlpha);
+    phylo_tree->clearAllPartialLH();
+    testParamDone = true;
+    return phylo_tree->computeLikelihood();
+}
+
+
+double RateGammaInvar::meanRates() {
+	double ret = 0.0;
+	for (int i = 0; i < ncategory; i++)
+		ret += rates[i];
+    ret *= (1.0-getPInvar())/ncategory;
+	return ret;
+}
+
+/**
+ * rescale rates s.t. mean rate is equal to 1, useful for FreeRate model
+ * @return rescaling factor
+ */
+double RateGammaInvar::rescaleRates() {
+	double norm = meanRates();
+	for (int i = 0; i < ncategory; i++)
+		rates[i] /= norm;
+	return norm;
+}
+
+
+
+
diff --git a/model/rategammainvar.h b/model/rategammainvar.h
index 6d7926c..3a7e3bc 100644
--- a/model/rategammainvar.h
+++ b/model/rategammainvar.h
@@ -37,7 +37,14 @@ public:
 		@param tree associated phylogenetic tree
 		@param testAlpha turn on option for doing random restart optimization of alpha and p_invar
 	*/
-    RateGammaInvar(int ncat, double shape, bool median, double p_invar_sites, bool simultaneous, PhyloTree *tree);
+    RateGammaInvar(int ncat, double shape, bool median, double p_invar_sites, string optimize_alg, PhyloTree *tree, bool testParamDone);
+
+    /**
+     *  check whether +I+G is used
+     */
+    virtual bool isGammai() const {
+        return true;
+    }
 
     /**
         save object into the checkpoint
@@ -67,10 +74,20 @@ public:
 		set the proportion of invariable sites. Default: do nothing
 		@param pinv the proportion of invariable sites
 	*/
-	virtual void setPInvar(double pInvar) {
-		p_invar = pInvar;
-        computeRates();
-	}
+	virtual void setPInvar(double pInvar);
+
+	/**
+	 * used to normal branch lengths if mean rate is not equal to 1 (e.g. FreeRate model)
+	 * @return mean rate, default = 1
+	 */
+	virtual double meanRates();
+
+	/**
+	 * rescale rates s.t. mean rate is equal to 1, useful for FreeRate model
+	 * @return rescaling factor
+	 */
+	virtual double rescaleRates();
+
 
 	/**
 	 * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f}
@@ -95,6 +112,11 @@ public:
 	*/
 	virtual double optimizeParameters(double gradient_epsilon);
 
+	/**
+        optimize rate parameters using EM algorithm
+        @return log-likelihood of optimized parameters
+    */
+	double optimizeWithEM();
 
 	/**
 		return the number of dimensions
@@ -121,7 +143,7 @@ public:
 	virtual void writeParameters(ostream &out);
 
 	/** TRUE to jointly optimize gamma shape and p_invar using BFGS, default: FALSE */
-	bool joint_optimize;
+	//bool joint_optimize;
 
 	virtual void setNCategory(int ncat);
 
@@ -150,16 +172,25 @@ protected:
 	virtual bool getVariables(double *variables);
 
 private:
-
-	/**
-	 *  TRUE to turn on random restart optimization for estimating alpha and p_invar
-	 */
-//	bool rr_ai;
+    /**
+     *  Determine which algorithm is used to optimized p_inv and alpha
+     */
+	string optimize_alg;
 
 	/**
 		current parameter to optimize. 0 if gamma shape or 1 if p_invar.
 	*/
 	int cur_optimize;
+
+    /**
+     *  Optimize p_inv and gamma shape using the EM algorithm
+     */
+	double optimizeWithEM(double gradient_epsilon);
+
+    /**
+     *  Start with different initial values of p_inv
+     */
+    double randomRestartOptimization(double gradient_epsilon);
 };
 
 #endif
diff --git a/model/rateheterogeneity.cpp b/model/rateheterogeneity.cpp
index fe20164..626ed00 100644
--- a/model/rateheterogeneity.cpp
+++ b/model/rateheterogeneity.cpp
@@ -70,9 +70,23 @@ void RateHeterogeneity::writeSiteRates(ostream &out, DoubleVector &pattern_rates
 		out << i+1 << "\t";
 		if (pattern_rates[ptn] >= MAX_SITE_RATE) out << "100.0"; else out << pattern_rates[ptn];
 		//cout << i << " "<< ptn << " " << pattern_cat[ptn] << endl;
-		if (!pattern_cat.empty()) out << "\t" << pattern_cat[ptn]+1 << "\t" << getRate(pattern_cat[ptn]);
+        if (!pattern_cat.empty()) {
+            int site_cat;
+            double cat_rate;
+            if (getPInvar() == 0.0) {
+                site_cat = pattern_cat[ptn]+1;
+                cat_rate = getRate(pattern_cat[ptn]);
+            } else {
+                site_cat = pattern_cat[ptn];
+                if (site_cat == 0)
+                    cat_rate = 0.0;
+                else
+                    cat_rate = getRate(pattern_cat[ptn]-1);
+            }
+            out << "\t" << site_cat << "\t" << cat_rate;
+            count[pattern_cat[ptn]]++;
+        }
 		out << endl;
-        count[pattern_cat[ptn]]++;
 	}
     cout << "Empirical proportions for each category:";
     for (i = 0; i < count.size(); i++)
diff --git a/model/rateheterogeneity.h b/model/rateheterogeneity.h
index 51bab81..c0c4ab5 100644
--- a/model/rateheterogeneity.h
+++ b/model/rateheterogeneity.h
@@ -179,6 +179,13 @@ public:
 	*/	
     virtual int isGammaRate() { return 0; }
 
+    /**
+     *  check whether +I+G is used
+     */
+    virtual bool isGammai() const {
+        return false;
+    }
+
 	/**
 		the target function which needs to be optimized
 		@param x the input vector x
@@ -255,6 +262,12 @@ public:
 	string name;
 
 
+    /**
+     *  Specify whether the initial starting value of the gamma shape and p_inv
+     *  has already been tested.
+     */
+    bool testParamDone;
+
 	/**
 		full name of the rate heterogeneity type
 	*/
diff --git a/ncl/nxscharactersblock.cpp b/ncl/nxscharactersblock.cpp
index 180d5c3..cb78f84 100644
--- a/ncl/nxscharactersblock.cpp
+++ b/ncl/nxscharactersblock.cpp
@@ -1419,6 +1419,13 @@ void NxsCharactersBlock::HandleFormat(
 			{
 			break;
 			}
+        else 
+            {
+				errormsg = "Expecting ';' but found ";
+                errormsg += token.GetToken();
+                errormsg += " at the end of FORMAT command";
+				throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());                
+            }
 		}
 
 	// Perform some last checks before leaving the FORMAT command
@@ -1819,7 +1826,7 @@ void NxsCharactersBlock::HandleStdMatrix(
 				{
 				// This should be the taxon label
 				//
-				token.SetLabileFlagBit(token.hyphenNotPunctuation);
+				token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation + NxsToken::preserveUnderscores);
 				token.GetNextToken();
 
 				if (page == 0 && newtaxa)
diff --git a/ngs.cpp b/ngs.cpp
index b8be2af..6b22e1c 100644
--- a/ngs.cpp
+++ b/ngs.cpp
@@ -142,7 +142,7 @@ void NGSAlignment::computeSumPairFreq (double *sum_pair_freq) {
     }
 }
 
-void NGSAlignment::computeEmpiricalRate (double *rates) {
+void NGSAlignment::computeDivergenceMatrix (double *rates) {
     int i, j, k, cat, id;
     assert(rates);
     double **pair_rates = (double**) new double[num_states];
@@ -410,7 +410,7 @@ NGSTree::NGSTree(Params &params, NGSAlignment *alignment) {
     model_factory = NULL;
     optimize_by_newton = params.optimize_by_newton;
     //tree.sse = params.SSE;
-    setLikelihoodKernel(LK_NORMAL);
+    setLikelihoodKernel(LK_EIGEN);
 }
 
 double NGSTree::computeLikelihood(double *pattern_lh) {
diff --git a/ngs.h b/ngs.h
index 89ac89d..255f4bf 100644
--- a/ngs.h
+++ b/ngs.h
@@ -82,7 +82,7 @@ public:
 		compute empirical rates between state pairs
 		@param rates (OUT) vector of size num_states*(num_states-1)/2 for the rates
 	*/
-	virtual void computeEmpiricalRate (double *rates);
+	virtual void computeDivergenceMatrix (double *rates);
 
 	/**
 		compute the empirical distance for a category, used to initialize rate scaling factor
diff --git a/pda.cpp b/pda.cpp
index 9f9db7d..88b15f8 100644
--- a/pda.cpp
+++ b/pda.cpp
@@ -2304,8 +2304,6 @@ int main(int argc, char *argv[])
 #endif
 	} else {
 		switch (Params::getInstance().SSE) {
-		case LK_NORMAL: cout << "Slow"; break;
-		case LK_SSE: cout << "Slow SSE3"; break;
 		case LK_EIGEN: cout << "No SSE"; break;
 		case LK_EIGEN_SSE:
 			if (instruction_set >= 7) {
diff --git a/phyloanalysis.cpp b/phyloanalysis.cpp
index ebfffcf..1558228 100644
--- a/phyloanalysis.cpp
+++ b/phyloanalysis.cpp
@@ -61,17 +61,15 @@ void reportReferences(Params &params, ofstream &out, string &original_model) {
 		<< "maximum likelihood phylogenies. Mol. Biol. Evol., 32:268-274." << endl << endl;
 
 	if (params.gbo_replicates)
-	out << "Since you also used ultrafast bootstrap (UFBoot) please cite: " << endl << endl
+	out << "Since you used ultrafast bootstrap (UFBoot) please also cite: " << endl << endl
 		<< "Bui Quang Minh, Minh Anh Thi Nguyen, and Arndt von Haeseler (2013) Ultrafast" << endl
 		<< "approximation for phylogenetic bootstrap. Mol. Biol. Evol., 30:1188-1195." << endl << endl;
 
-	/*		"*** If you use the parallel version, please cite: " << endl << endl <<
-	 "Bui Quang Minh, Le Sy Vinh, Arndt von Haeseler, and Heiko A. Schmidt (2005)" << endl <<
-	 "pIQPNNI - parallel reconstruction of large maximum likelihood phylogenies." << endl <<
-	 "Bioinformatics, 21:3794-3796." << endl << endl;*/
+    if (params.partition_file) 
+    out << "Since you used partition models please also cite:" << endl << endl
+        << "Olga Chernomor, Arndt von Haeseler, and Bui Quang Minh (2016) Terrace aware data" << endl
+        << "structure for phylogenomic inference from supermatrices. Syst. Biol., in press." << endl << endl;
 
-// 	if (original_model == "TEST" || original_model == "TESTONLY")
-// 		out << "Since you used Modeltest please also cite Posada and Crandall (1998)" << endl << endl;
 }
 
 void reportAlignment(ofstream &out, Alignment &alignment, int nremoved_seqs) {
@@ -743,13 +741,21 @@ void reportPhyloAnalysis(Params &params, string &original_model,
 				<< "NNI log-likelihood cutoff: " << tree.getNNICutoff() << endl
 				<< endl;
 */
-		if (params.compute_ml_tree && (params.min_iterations > 0 || original_model.find("ONLY") != string::npos)) {
-			if (original_model.find("ONLY") != string::npos)
+		if (params.compute_ml_tree) {
+			if (original_model.find("ONLY") != string::npos) {
 				out << "TREE USED FOR MODEL SELECTION" << endl
 					<< "-----------------------------" << endl << endl;
-            else 
+            } else if (params.min_iterations == 0) {
+                if (params.user_file)
+                    out << "USER TREE" << endl
+                        << "---------" << endl << endl;
+                else
+                    out << "STARTING TREE" << endl
+                        << "-------------" << endl << endl;                
+            } else { 
 				out << "MAXIMUM LIKELIHOOD TREE" << endl
 					<< "-----------------------" << endl << endl;
+            }
 
 			tree.setRootNode(params.root);
             
@@ -1727,7 +1733,7 @@ void runTreeReconstruction(Params &params, string &original_model, IQTree &iqtre
 
     iqtree.initializeAllPartialLh();
 	double initEpsilon = params.min_iterations == 0 ? params.modeps : (params.modeps*10);
-	if (params.test_param)
+	if (params.opt_gammai)
 		initEpsilon = 0.1;
 
 	string initTree;
@@ -1743,14 +1749,6 @@ void runTreeReconstruction(Params &params, string &original_model, IQTree &iqtre
 			exit(0);
 		}
 
-		if (params.testAlpha) { // DO RESTART ON ALPHA AND P_INVAR
-			double stime = getRealTime();
-			searchGAMMAInvarByRestarting(iqtree);
-			double etime = getRealTime();
-            cout << "Testing alpha took: " << etime -stime << " CPU seconds" << endl;
-            cout << endl;
-		}
-
 	}
 
     // Optimize model parameters and branch lengths using ML for the initial tree
@@ -2166,13 +2164,12 @@ void searchGAMMAInvarByRestarting(IQTree &iqtree) {
     delete [] state_freqs;
     delete [] bestRates;
     delete [] bestStateFreqs;
-	Params::getInstance().testAlpha = false;
 }
 
 // Test alpha fom 0.1 to 15 and p_invar from 0.1 to 0.99, stepsize = 0.01
 void exhaustiveSearchGAMMAInvar(Params &params, IQTree &iqtree) {
 	double alphaMin = 0.01;
-	double alphaMax = 15.00;
+	double alphaMax = 2.00;
 	double p_invarMin = 0.01;
 	double p_invarMax = 1.00;
 	double stepSize = 0.01;
@@ -2225,9 +2222,16 @@ void runStandardBootstrap(Params &params, string &original_model, Alignment *ali
 	vector<ModelInfo> *model_info = new vector<ModelInfo>;
 	StrVector removed_seqs, twin_seqs;
 
-	// turn off aLRT test
+	// turn off all branch tests
 	int saved_aLRT_replicates = params.aLRT_replicates;
+    int saved_localbp_replicates = params.localbp_replicates;
+    bool saved_aLRT_test = params.aLRT_test;
+    bool saved_aBayes_test = params.aBayes_test;
 	params.aLRT_replicates = 0;
+    params.localbp_replicates = 0;
+    params.aLRT_test = false;
+    params.aBayes_test = false;
+    
 	string treefile_name = params.out_prefix;
 	treefile_name += ".treefile";
 	string boottrees_name = params.out_prefix;
@@ -2376,7 +2380,12 @@ void runStandardBootstrap(Params &params, string &original_model, Alignment *ali
 
 	if (params.compute_ml_tree) {
 		cout << endl << "===> START ANALYSIS ON THE ORIGINAL ALIGNMENT" << endl << endl;
+        // restore branch tests
 		params.aLRT_replicates = saved_aLRT_replicates;
+        params.localbp_replicates = saved_localbp_replicates;
+        params.aLRT_test = saved_aLRT_test;
+        params.aBayes_test = saved_aBayes_test;
+        
 		runTreeReconstruction(params, original_model, *tree, *model_info);
 
 		cout << endl << "===> ASSIGN BOOTSTRAP SUPPORTS TO THE TREE FROM ORIGINAL ALIGNMENT" << endl << endl;
diff --git a/phylokernel.h b/phylokernel.h
index 678393e..7521c72 100644
--- a/phylokernel.h
+++ b/phylokernel.h
@@ -87,12 +87,12 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
     size_t nptn = aln->size() + model_factory->unobserved_ptns.size();
     PhyloNode *node = (PhyloNode*)(dad_branch->node);
 
+    if (!tip_partial_lh_computed)
+        computeTipPartialLikelihood();
+
 	if (node->isLeaf()) {
 	    dad_branch->lh_scale_factor = 0.0;
 	    //memset(dad_branch->scale_num, 0, nptn * sizeof(UBYTE));
-
-		if (!tip_partial_lh_computed)
-			computeTipPartialLikelihood();
 		return;
 	}
 
@@ -378,7 +378,7 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
 			}
             // check if one should scale partial likelihoods
 			double lh_max = horizontal_max(vc_max);
-            if (lh_max < SCALING_THRESHOLD) {
+            if (lh_max < SCALING_THRESHOLD && ptn_invar[ptn] == 0.0) {
             	// now do the likelihood scaling
             	partial_lh -= block; // revert its pointer
             	VectorClass scale_thres(SCALING_THRESHOLD_INVER);
@@ -460,7 +460,7 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
 
             // check if one should scale partial likelihoods
 			double lh_max = horizontal_max(vc_max);
-            if (lh_max < SCALING_THRESHOLD) {
+            if (lh_max < SCALING_THRESHOLD && ptn_invar[ptn] == 0.0) {
 				// now do the likelihood scaling
             	partial_lh -= block; // revert its pointer
             	VectorClass scale_thres(SCALING_THRESHOLD_INVER);
diff --git a/phylokernelmixrate.h b/phylokernelmixrate.h
index ecad5d1..a92559f 100644
--- a/phylokernelmixrate.h
+++ b/phylokernelmixrate.h
@@ -21,6 +21,12 @@
 
 template <class VectorClass, const int VCSIZE, const int nstates>
 void PhyloTree::computeMixratePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad) {
+    if (dad_branch->node->degree() > 3) {
+        // TODO: SIMD version for multifurcating node
+        computeMixratePartialLikelihoodEigen(dad_branch, dad);
+        return;
+    }
+
     // don't recompute the likelihood
 	assert(dad);
     if (dad_branch->partial_lh_computed & 1)
diff --git a/phylokernelmixture.h b/phylokernelmixture.h
index 6e50635..6b08498 100644
--- a/phylokernelmixture.h
+++ b/phylokernelmixture.h
@@ -19,6 +19,12 @@
 
 template <class VectorClass, const int VCSIZE, const int nstates>
 void PhyloTree::computeMixturePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad) {
+    if (dad_branch->node->degree() > 3) {
+        // TODO: SIMD version for multifurcating node
+        computeMixturePartialLikelihoodEigen(dad_branch, dad);
+        return;
+    }
+
     // don't recompute the likelihood
 	assert(dad);
     if (dad_branch->partial_lh_computed & 1)
diff --git a/phylosupertreeplen.cpp b/phylosupertreeplen.cpp
index bc93d40..c08c6e6 100644
--- a/phylosupertreeplen.cpp
+++ b/phylosupertreeplen.cpp
@@ -138,13 +138,12 @@ double PartitionModelPlen::optimizeParameters(bool fixed_len, bool write_info, d
                 tree->part_info[part].cur_score = tree->at(part)->computeLikelihood();
         	cur_lh += tree->part_info[part].cur_score;
             
-            
 
         	// normalize rates s.t. branch lengths are #subst per site
         	double mean_rate = tree->at(part)->getRate()->rescaleRates();
-        	if (mean_rate != 1.0) {
+        	if (fabs(mean_rate-1.0) > 1e-6) {
         		if (tree->fixed_rates) {
-        			outError("Partition " + tree->part_info[part].name + " follows FreeRate heterogeneity. Please use proportion edge-linked partition model (-spp)");
+        			outError("Unsupported -spj. Please use proportion edge-linked partition model (-spp)");
                 }
                 tree->at(part)->scaleLength(mean_rate);
         		tree->part_info[part].part_rate *= mean_rate;
@@ -1865,6 +1864,7 @@ void PhyloSuperTreePlen::initializeAllPartialLh() {
     for (it = begin(), part = 0; it != end(); it++, part++) {
         (*it)->tip_partial_lh = lh_addr;
         uint64_t tip_partial_lh_size = (*it)->aln->num_states * ((*it)->aln->STATE_UNKNOWN+1) * (*it)->model->getNMixtures();
+        tip_partial_lh_size = ((tip_partial_lh_size+3)/4)*4;
         lh_addr += tip_partial_lh_size;
     }
 }
diff --git a/phylotesting.cpp b/phylotesting.cpp
index 43e9a50..5a1b20f 100644
--- a/phylotesting.cpp
+++ b/phylotesting.cpp
@@ -24,6 +24,7 @@
 #include "model/rateinvar.h"
 #include "model/rategammainvar.h"
 #include "model/ratefree.h"
+#include "model/ratefreeinvar.h"
 //#include "modeltest_wrapper.h"
 #include "model/modelprotein.h"
 #include "model/modelbin.h"
@@ -559,7 +560,8 @@ int getModelList(Params &params, Alignment *aln, StrVector &models, bool separat
                     // disallow MG+F
                     if (freq_names[i] == "+F" && orig_model_names[j].find("MG") != string::npos)
                         continue;
-                    if (freq_names[i] != "" || model_type == 2) // empirical model also allow ""
+                    if (freq_names[i] != "" || (model_type == 2 && orig_model_names[j].find("MG") == string::npos)) 
+                        // empirical model also allow ""
                         model_names.push_back(orig_model_names[j] + freq_names[i]);
                 }
             } else {
@@ -883,7 +885,7 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, vector<ModelInf
 #ifdef _OPENMP
 //        for (i = 0; i < in_tree->size(); i++)
 //            cout << distID[i]+1 << "\t" << in_tree->part_info[distID[i]].name << "\t" << -dist[i] << endl;
-#pragma omp parallel for private(i) schedule(dynamic) reduction(+: lhsum, dfsum)
+#pragma omp parallel for private(i) schedule(dynamic) reduction(+: lhsum, dfsum) if(!params.model_test_and_tree)
 #endif
 	for (int j = 0; j < in_tree->size(); j++) {
         i = distID[j];
@@ -995,7 +997,7 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, vector<ModelInf
             quicksort(dist, 0, num_pairs-1, distID);
 
 #ifdef _OPENMP
-#pragma omp parallel for private(i) schedule(dynamic)
+#pragma omp parallel for private(i) schedule(dynamic) if(!params.model_test_and_tree)
 #endif
         for (int pair = 0; pair < num_pairs; pair++) {
             int part1 = distID[pair] >> 16;
@@ -1034,6 +1036,11 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, vector<ModelInf
                 PhyloTree *tree = in_tree->extractSubtree(merged_set);
                 tree->setAlignment(aln);
                 extractModelInfo(set_name, model_info, part_model_info);
+//                TODO
+                tree->num_precision = in_tree->num_precision;
+                if (params.model_test_and_tree) {
+                    tree->setCheckpoint(new Checkpoint());
+                }
 //#ifdef _OPENMP
                 model = testModel(params, tree, part_model_info, this_fmodel, models_block, set_name);
 //#else
@@ -1042,6 +1049,9 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, vector<ModelInf
                 logl = part_model_info[0].logl;
                 df = part_model_info[0].df;
                 treelen = part_model_info[0].tree_len;
+                if (params.model_test_and_tree) {
+                    delete tree->getCheckpoint();
+                }
                 delete tree;
                 delete aln;
             }
@@ -1143,6 +1153,20 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, vector<ModelInf
 	in_tree->printBestPartitionRaxml((string(params.out_prefix) + ".best_scheme").c_str());
 }
 
+bool isMixtureModel(ModelsBlock *models_block, string &model_str) {
+    size_t mix_pos;
+    for (mix_pos = 0; mix_pos < model_str.length(); mix_pos++) {
+        size_t next_mix_pos = model_str.find_first_of("+*", mix_pos);
+        string sub_model_str = model_str.substr(mix_pos, next_mix_pos-mix_pos);
+        if (models_block->findMixModel(sub_model_str))
+            return true;
+        if (next_mix_pos == string::npos)
+            break;
+        mix_pos = next_mix_pos;
+    }
+    return false;
+}
+
 string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_info, ostream &fmodel, ModelsBlock *models_block,
     string set_name, bool print_mem_usage) 
 {
@@ -1201,12 +1225,20 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
 	rate_class[0] = new RateHeterogeneity();
 	rate_class[1] = new RateInvar(-1, NULL);
 	rate_class[2] = new RateGamma(params.num_rate_cats, params.gamma_shape, params.gamma_median, NULL);
-	rate_class[3] = new RateGammaInvar(params.num_rate_cats, params.gamma_shape, params.gamma_median, -1, params.optimize_model_rate_joint, NULL);
+	rate_class[3] = new RateGammaInvar(params.num_rate_cats, params.gamma_shape, params.gamma_median, -1, params.optimize_alg_gammai, NULL, false);
     
     RateFree ** rate_class_free = new RateFree*[params.max_rate_cats-1];
     
     for (model = 0; model < params.max_rate_cats-1; model++)
         rate_class_free[model] = new RateFree(model+2, params.gamma_shape, "", false, params.optimize_alg, NULL);
+
+    RateFreeInvar ** rate_class_freeinvar = new RateFreeInvar*[params.max_rate_cats-1];
+    
+    for (model = 0; model < params.max_rate_cats-1; model++) {
+        rate_class_freeinvar[model] = new RateFreeInvar(model+2, params.gamma_shape, "", false, in_tree->aln->frac_const_sites/2.0, params.optimize_alg, NULL);
+        rate_class_freeinvar[model]->setFixPInvar(false);
+    }
+        
         
 	ModelGTR *subst_model = NULL;
 	if (seq_type == SEQ_BINARY)
@@ -1281,7 +1313,7 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
         int ncat = 0;
         string orig_name = params.model_name;
         
-        if (models_block->findMixModel(model_names[model])) {
+        if (isMixtureModel(models_block, model_names[model])) {
             // mixture model
             try {
                 mixture_model = true;
@@ -1302,7 +1334,7 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
             // normal model
             if (model_names[model].find("+ASC") != string::npos) {
                 model_fac->unobserved_ptns = in_tree->aln->getUnobservedConstPatterns();
-                if (model_fac->unobserved_ptns.size() == 0) {
+                if (model_fac->unobserved_ptns.size() < tree->aln->getNumNonstopCodons() || in_tree->aln->frac_const_sites > 0.0) {
                     cout.width(3);
                     cout << right << model+1 << "  ";
                     cout.width(13);
@@ -1342,7 +1374,17 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
             tree->setModel(subst_model);
             // initialize rate
             size_t pos;
-            if ((pos = model_names[model].find("+R")) != string::npos) {
+            if (model_names[model].find("+I") != string::npos && (pos = model_names[model].find("+R")) != string::npos) {
+                ncat = params.num_rate_cats;
+                if (model_names[model].length() > pos+2 && isdigit(model_names[model][pos+2])) {
+                    ncat = convert_int(model_names[model].c_str() + pos+2);
+    //                tree->getRate()->setNCategory(ncat);
+                }
+                if (ncat <= 1) outError("Number of rate categories for " + model_names[model] + " is <= 1");
+                if (ncat > params.max_rate_cats)
+                    outError("Number of rate categories for " + model_names[model] + " exceeds " + convertIntToString(params.max_rate_cats));
+                tree->setRate(rate_class_freeinvar[ncat-2]);
+            } else if ((pos = model_names[model].find("+R")) != string::npos) {
                 ncat = params.num_rate_cats;
                 if (model_names[model].length() > pos+2 && isdigit(model_names[model][pos+2])) {
                     ncat = convert_int(model_names[model].c_str() + pos+2);
@@ -1394,7 +1436,8 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
         if (skip_model) {
             assert(prev_model_id>=0);
             size_t pos_r = info.name.find("+R");
-            if (pos_r == string::npos || info.name.substr(0, pos_r) != model_info[prev_model_id].name.substr(0, pos_r))
+            size_t prev_pos_r = model_info[prev_model_id].name.find("+R");
+            if (pos_r == string::npos || prev_pos_r == string::npos || info.name.substr(0, pos_r) != model_info[prev_model_id].name.substr(0, prev_pos_r))
                 skip_model = 0;
         }
 		for (int i = 0; i < model_info.size(); i++)
@@ -1481,7 +1524,10 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
                         if (verbose_mode >= VB_MED)
                             cout << "reoptimizing from previous parameters of +R...." << endl;
                         assert(ncat >= 3);
-                        rate_class_free[ncat-2]->setRateAndProp(rate_class_free[ncat-3]);
+                        if (tree->getRate()->getPInvar() != 0.0)                        
+                            rate_class_freeinvar[ncat-2]->setRateAndProp(rate_class_freeinvar[ncat-3]);
+                        else
+                            rate_class_free[ncat-2]->setRateAndProp(rate_class_free[ncat-3]);
                         info.logl = tree->getModelFactory()->optimizeParameters(false, false, TOL_LIKELIHOOD_MODELTEST, TOL_GRADIENT_MODELTEST);
                         info.tree_len = tree->treeLength();                        
                     }
@@ -1716,6 +1762,11 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
 		delete rate_class_free[rate_type];
     }
     delete [] rate_class_free;
+
+	for (rate_type = params.max_rate_cats-2; rate_type >= 0; rate_type--) {
+		delete rate_class_freeinvar[rate_type];
+    }
+    delete [] rate_class_freeinvar;
     
 //	delete tree_hetero;
 //	delete tree_homo;
diff --git a/phylotree.cpp b/phylotree.cpp
index 2221943..3bbc1ac 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -89,7 +89,7 @@ void PhyloTree::init() {
     subTreeDistComputed = false;
     dist_matrix = NULL;
     var_matrix = NULL;
-    setLikelihoodKernel(LK_SSE);  // FOR TUNG: you forgot to initialize this variable!
+    setLikelihoodKernel(LK_EIGEN_SSE);  // FOR TUNG: you forgot to initialize this variable!
     save_all_trees = 0;
     nodeBranchDists = NULL;
     // FOR: upper bounds
@@ -630,130 +630,10 @@ size_t PhyloTree::getBitsBlockSize() {
     return len;
 }
 
-int PhyloTree::getBitsEntrySize() {
-    // reserve the last entry for parsimony score
-    return (aln->num_states + SIMD_BITS - 1) / UINT_BITS;
-}
-
 UINT *PhyloTree::newBitsBlock() {
     return aligned_alloc<UINT>(getBitsBlockSize());
 }
 
-void PhyloTree::getBitsBlock(UINT *bit_vec, int index, UINT* &bits_entry) {
-    int nstates = aln->num_states;
-    int myindex = (index * nstates);
-    int bit_pos_begin = myindex >> BITS_DIV;
-    int bit_off_begin = myindex & BITS_MODULO;
-    int bit_pos_end = (myindex + nstates) >> BITS_DIV;
-    int bit_off_end = (myindex + nstates) & BITS_MODULO;
-
-    if (bit_pos_begin == bit_pos_end) {
-        bits_entry[0] = (bit_vec[bit_pos_begin] >> bit_off_begin) & ((1 << nstates) - 1);
-        return;
-    }
-    UINT part1 = (bit_vec[bit_pos_begin] >> bit_off_begin);
-    int rest_bits = nstates;
-    int id;
-    for (id = 0; rest_bits >= UINT_BITS; id++, rest_bits -= UINT_BITS, bit_pos_begin++) {
-        bits_entry[id] = part1;
-        if (bit_off_begin > 0)
-            bits_entry[id] |= (bit_vec[bit_pos_begin + 1] << (UINT_BITS - bit_off_begin));
-        part1 = (bit_vec[bit_pos_begin + 1] >> bit_off_begin);
-    }
-    if (bit_pos_begin == bit_pos_end) {
-        bits_entry[id] = (bit_vec[bit_pos_begin] >> bit_off_begin) & ((1 << rest_bits) - 1);
-        return;
-    }
-    UINT part2 = bit_vec[bit_pos_end];
-    if (bit_off_end < UINT_BITS)
-        part2 &= ((1 << bit_off_end) - 1);
-    bits_entry[id] = part1;
-    if (bit_off_begin > 0)
-        bits_entry[id] |= (part2 << (UINT_BITS - bit_off_begin));
-}
-
-void PhyloTree::setBitsBlock(UINT* &bit_vec, int index, UINT *bits_entry) {
-    int nstates = aln->num_states;
-    int myindex = (index * nstates);
-    int bit_pos_begin = myindex >> BITS_DIV;
-    int bit_off_begin = myindex & BITS_MODULO;
-    int bit_pos_end = (myindex + nstates) >> BITS_DIV;
-    int bit_off_end = (myindex + nstates) & BITS_MODULO;
-
-    //assert(value <= allstates);
-
-    if (bit_pos_begin == bit_pos_end) {
-        // first clear the bit between bit_off_begin and bit_off_end
-        int allstates = (1 << nstates) - 1;
-        bit_vec[bit_pos_begin] &= ~(allstates << bit_off_begin);
-        // now set the bit
-        bit_vec[bit_pos_begin] |= bits_entry[0] << bit_off_begin;
-        return;
-    }
-    int len1 = UINT_BITS - bit_off_begin;
-    // clear bit from bit_off_begin to UINT_BITS
-    bit_vec[bit_pos_begin] &= (1 << bit_off_begin) - 1;
-    // set bit  from bit_off_begin to UINT_BITS
-    bit_vec[bit_pos_begin] |= (bits_entry[0] << bit_off_begin);
-    int rest_bits = nstates - len1;
-    int id;
-    for (id = 0; rest_bits >= UINT_BITS; bit_pos_begin++, id++, rest_bits -= UINT_BITS) {
-        bit_vec[bit_pos_begin + 1] = (bits_entry[id + 1] << bit_off_begin);
-        if (len1 < UINT_BITS)
-            bit_vec[bit_pos_begin + 1] |= (bits_entry[id] >> len1);
-    }
-
-    assert(bit_pos_begin == bit_pos_end - 1);
-    // clear bit from 0 to bit_off_end
-    bit_vec[bit_pos_end] &= ~((1 << bit_off_end) - 1);
-    // now set the bit the value
-    if (len1 < UINT_BITS)
-        bit_vec[bit_pos_end] |= (bits_entry[id] >> len1);
-    rest_bits -= bit_off_begin;
-    if (rest_bits > 0)
-        bit_vec[bit_pos_end] |= (bits_entry[id + 1] << bit_off_begin);
-}
-
-bool PhyloTree::isEmptyBitsEntry(UINT *bits_entry) {
-    int rest_bits = aln->num_states;
-    int i;
-    for (i = 0; rest_bits >= UINT_BITS; rest_bits -= UINT_BITS, i++)
-        if (bits_entry[i])
-            return false;
-    if (bits_entry[i] & ((1 << rest_bits) - 1))
-        return false;
-    return true;
-}
-
-void PhyloTree::unionBitsEntry(UINT *bits_entry1, UINT *bits_entry2, UINT* &bits_union) {
-    int rest_bits = aln->num_states;
-    int i;
-    for (i = 0; rest_bits > 0; rest_bits -= UINT_BITS, i++)
-        bits_union[i] = bits_entry1[i] | bits_entry2[i];
-}
-
-void PhyloTree::setBitsEntry(UINT* &bits_entry, int id) {
-    int bit_pos = id >> BITS_DIV;
-    int bit_off = id & BITS_MODULO;
-    bits_entry[bit_pos] |= (1 << bit_off);
-}
-
-bool PhyloTree::getBitsEntry(UINT* &bits_entry, int id) {
-    int bit_pos = id >> BITS_DIV;
-    int bit_off = id & BITS_MODULO;
-    if (bits_entry[bit_pos] & (1 << bit_off))
-        return true;
-    return false;
-}
-
-void setBitsAll(UINT* &bit_vec, int num) {
-    //int id;
-    int size = num / UINT_BITS;
-    memset(bit_vec, 255, size * sizeof(UINT));
-    num &= BITS_MODULO;
-    if (num)
-        bit_vec[size] = (1 << num) - 1;
-}
 
 void PhyloTree::computePartialParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad) {
     (this->*computePartialParsimonyPointer)(dad_branch, dad);
@@ -769,440 +649,16 @@ void PhyloTree::computeReversePartialParsimony(PhyloNode *node, PhyloNode *dad)
 
 }
 
-void PhyloTree::computePartialParsimonyNaive(PhyloNeighbor *dad_branch, PhyloNode *dad) {
-    // don't recompute the parsimony
-    if (dad_branch->partial_lh_computed & 2)
-        return;
-    Node *node = dad_branch->node;
-    //assert(node->degree() <= 3);
-    int ptn;
-    int nstates = aln->num_states;
-    int pars_size = getBitsBlockSize();
-    int entry_size = getBitsEntrySize();
-    assert(dad_branch->partial_pars);
-    UINT *bits_entry = new UINT[entry_size];
-    UINT *bits_entry_child = new UINT[entry_size];
-    //UINT *bits_entry1 = new UINT[entry_size];
-    //UINT *bits_entry2 = new UINT[entry_size];
-
-    if (node->isLeaf() && dad) {
-        // external node
-    	int ambi_aa[] = {4+8, 32+64, 512+1024};
-
-        setBitsAll(dad_branch->partial_pars, nstates * aln->size());
-        dad_branch->partial_pars[pars_size - 1] = 0;
-        for (ptn = 0; ptn < aln->size(); ptn++)
-            if (!aln->at(ptn).is_const) {
-                char state;
-                if (node->name == ROOT_NAME) {
-                    state = aln->STATE_UNKNOWN;
-                } else {
-                    assert(node->id < aln->getNSeq());
-                    state = (aln->at(ptn))[node->id];
-                }
-                if (state == aln->STATE_UNKNOWN) {
-                    // fill all entries with bit 1
-                    //setBitsBlock(dad_branch->partial_pars, ptn, (1 << nstates) - 1);
-                } else if (state < nstates) {
-                    memset(bits_entry, 0, sizeof(UINT) * entry_size);
-                    setBitsEntry(bits_entry, state);
-                    setBitsBlock(dad_branch->partial_pars, ptn, bits_entry);
-                } else if (aln->seq_type == SEQ_DNA) {
-                    // ambiguous character, for DNA, RNA
-                    state = state - (nstates - 1);
-                    memset(bits_entry, 0, sizeof(UINT) * entry_size);
-                    bits_entry[0] = state;
-                    setBitsBlock(dad_branch->partial_pars, ptn, bits_entry);
-                }  else if (aln->seq_type == SEQ_PROTEIN) {
-            		if (state >= 23) return;
-            		state -= 20;
-                    memset(bits_entry, 0, sizeof(UINT) * entry_size);
-                    bits_entry[0] = ambi_aa[(int)state];
-                    setBitsBlock(dad_branch->partial_pars, ptn, bits_entry);
-                } else {
-                	assert(0);
-                }
-            }
-    } else {
-        // internal node
-        memset(dad_branch->partial_pars, 255, pars_size * sizeof(int));
-        UINT *partial_pars_dad = dad_branch->partial_pars;
-        int partial_pars = 0;
-        //UINT *partial_pars_child1 = NULL, *partial_pars_child2 = NULL;
-        // take the intersection of two child states (with &= bit operation)
-        FOR_NEIGHBOR_IT(node, dad, it)if ((*it)->node->name != ROOT_NAME) {
-            computePartialParsimonyNaive((PhyloNeighbor*) (*it), (PhyloNode*) node);
-            /*
-             if (!partial_pars_child1)
-             partial_pars_child1 = ((PhyloNeighbor*) (*it))->partial_pars;
-             else
-             partial_pars_child2 = ((PhyloNeighbor*) (*it))->partial_pars;
-             */
-            UINT *partial_pars_child = ((PhyloNeighbor*) (*it))->partial_pars;
-            for (int i = 0; i < pars_size - 1; i++)
-            partial_pars_dad[i] &= partial_pars_child[i];
-            partial_pars += partial_pars_child[pars_size - 1];
-        }
-        //assert(partial_pars_child1 && partial_pars_child2);
-        // take the intersection of two bits block
-        //for (int i = 0; i < pars_size - 1; i++)
-        //    partial_pars_dad[i] = partial_pars_child1[i] & partial_pars_child2[i];
-        //int partial_pars = partial_pars_child1[pars_size - 1] + partial_pars_child2[pars_size - 1];
-        // now check if some intersection is empty, change to union (Fitch algorithm) and increase the parsimony score
-        memset(bits_entry, 0, entry_size * sizeof(UINT));
-        for (ptn = 0; ptn < aln->size(); ptn++)
-            if (!aln->at(ptn).is_const) {
-                getBitsBlock(partial_pars_dad, ptn, bits_entry);
-                if (isEmptyBitsEntry(bits_entry)) {
-                    FOR_NEIGHBOR_IT(node, dad, it2)if ((*it2)->node->name != ROOT_NAME) {
-                        UINT *partial_pars_child = ((PhyloNeighbor*) (*it2))->partial_pars;
-                        getBitsBlock(partial_pars_child, ptn, bits_entry_child);
-                        unionBitsEntry(bits_entry, bits_entry_child, bits_entry);
-                    }
-                    //getBitsBlock(partial_pars_child2, ptn, bits_entry2);
-                    //unionBitsEntry(bits_entry1, bits_entry2, bits_entry);
-                    //cout << bits_entry[0] << " " << bits_entry[1] << endl;
-                    setBitsBlock(partial_pars_dad, ptn, bits_entry);
-                    partial_pars += aln->at(ptn).frequency;
-                }
-            }
-
-            /*
-             for (ptn = 0; ptn < aln->size(); ptn++)
-             if (!aln->at(ptn).is_const) {
-             getBitsBlock(partial_pars_dad, ptn, bits_entry);
-             if (isEmptyBitsEntry(bits_entry)) {
-             getBitsBlock(partial_pars_child1, ptn, bits_entry1);
-             getBitsBlock(partial_pars_child2, ptn, bits_entry2);
-             unionBitsEntry(bits_entry1, bits_entry2, bits_entry);
-             //cout << bits_entry[0] << " " << bits_entry[1] << endl;
-             setBitsBlock(partial_pars_dad, ptn, bits_entry);
-             partial_pars += aln->at(ptn).frequency;
-             }
-             }*/
-        partial_pars_dad[pars_size - 1] = partial_pars;
-    }
-    dad_branch->partial_lh_computed |= 2;
-    //delete[] bits_entry2;
-    //delete[] bits_entry1;
-    delete[] bits_entry_child;
-    delete[] bits_entry;
-}
 
 int PhyloTree::computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) {
     return (this->*computeParsimonyBranchPointer)(dad_branch, dad, branch_subst);
 }
 
-int PhyloTree::computeParsimonyBranchNaive(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) {
-        
-    PhyloNode *node = (PhyloNode*) dad_branch->node;
-    PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
-    assert(node_branch);
-    if (!central_partial_pars)
-        initializeAllPartialPars();
-    // swap node and dad if dad is a leaf
-    if (node->isLeaf()) {
-        PhyloNode *tmp_node = dad;
-        dad = node;
-        node = tmp_node;
-        PhyloNeighbor *tmp_nei = dad_branch;
-        dad_branch = node_branch;
-        node_branch = tmp_nei;
-        //cout << "swapped\n";
-    }
-    if ((dad_branch->partial_lh_computed & 2) == 0)
-        computePartialParsimonyNaive(dad_branch, dad);
-    if ((node_branch->partial_lh_computed & 2) == 0)
-        computePartialParsimonyNaive(node_branch, node);
-    // now combine likelihood at the branch
-
-    int pars_size = getBitsBlockSize();
-    int entry_size = getBitsEntrySize();
-    //int nstates = aln->num_states;
-    int i, ptn;
-    int tree_pars = 0;
-    UINT *partial_pars = newBitsBlock();
-    UINT *bits_entry = new UINT[entry_size];
-    for (i = 0; i < pars_size - 1; i++)
-        partial_pars[i] = (node_branch->partial_pars[i] & dad_branch->partial_pars[i]);
-
-    for (ptn = 0; ptn < aln->size(); ptn++)
-        if (!aln->at(ptn).is_const) {
-            getBitsBlock(partial_pars, ptn, bits_entry);
-            if (isEmptyBitsEntry(bits_entry))
-                tree_pars += aln->at(ptn).frequency;
-        }
-    if (branch_subst)
-        *branch_subst = tree_pars;
-    tree_pars += node_branch->partial_pars[pars_size - 1] + dad_branch->partial_pars[pars_size - 1];
-    delete[] bits_entry;
-    aligned_free(partial_pars);
-    return tree_pars;
-}
 
 int PhyloTree::computeParsimony() {
     return computeParsimonyBranch((PhyloNeighbor*) root->neighbors[0], (PhyloNode*) root);
 }
 
-void PhyloTree::printParsimonyStates(PhyloNeighbor *dad_branch, PhyloNode *dad) {
-    if (!dad) {
-        dad = (PhyloNode*) root;
-        dad_branch = (PhyloNeighbor*) root->neighbors[0];
-        cout << "Parsimonious states for every node and site: " << endl;
-    }
-    int site;
-    cout << "States for node ";
-    int max_len = aln->getMaxSeqNameLength();
-    if (max_len < 3)
-        max_len = 3;
-    cout.width(max_len);
-    if (!dad_branch->node->name.empty())
-        cout << left << dad_branch->node->name;
-    else
-        cout << left << dad_branch->node->id;
-    cout << " are ";
-    UINT *bits_entry = new UINT[getBitsEntrySize()];
-    for (site = 0; site < aln->getNSite(); site++) {
-        int ptn = aln->getPatternID(site);
-        getBitsBlock(dad_branch->partial_pars, ptn, bits_entry);
-        if (aln->at(ptn).is_const) {
-            int state = aln->at(ptn)[0];
-            if (state < aln->num_states)
-                setBitsEntry(bits_entry, state);
-            else {
-                memset(bits_entry, 0, sizeof(UINT) * getBitsEntrySize());
-                bits_entry[0] = state - (aln->num_states - 1);
-                ;
-            }
-        }
-        cout << "{";
-        bool first = true;
-        for (int i = 0; i < aln->num_states; i++)
-            if (getBitsEntry(bits_entry, i)) {
-                cout << ((!first) ? "," : "") << i;
-                first = false;
-            }
-        cout << "}\t";
-    }
-    cout << endl;
-    delete[] bits_entry;
-    FOR_NEIGHBOR_IT(dad_branch->node, dad, it)printParsimonyStates((PhyloNeighbor*) (*it), (PhyloNode*) (dad_branch->node));
-}
-
-int PhyloTree::computeParsimonyScore(int ptn, int &states, PhyloNode *node, PhyloNode *dad) {
-    int score = 0;
-    states = 0;
-    if (!node)
-        node = (PhyloNode*) root;
-    if (node->degree() > 3)
-        outError("Does not work with multifurcating tree");
-    if (verbose_mode == VB_DEBUG)
-        cout << ptn << " " << node->id << "  " << node->name << endl;
-
-    if (node->isLeaf()) {
-        char state;
-        if (node->name == ROOT_NAME) {
-            state = aln->STATE_UNKNOWN;
-        } else {
-            assert(node->id < aln->getNSeq());
-            state = (*aln)[ptn][node->id];
-        }
-        if (state == aln->STATE_UNKNOWN) {
-            states = (1 << aln->num_states) - 1;
-        } else if (state < aln->num_states)
-            states = (1 << state);
-        else {
-            // ambiguous character, for DNA, RNA
-            states = state - 3;
-        }
-    }
-    if (!node->isLeaf() || node == root) {
-        int union_states = 0;
-        int intersect_states = (1 << aln->num_states) - 1;
-        if (states != 0) {
-            union_states = states;
-            intersect_states = states;
-        }
-
-        FOR_NEIGHBOR_IT(node, dad, it){
-        int states_child;
-        int score_child = computeParsimonyScore(ptn, states_child, (PhyloNode*) ((*it)->node), node);
-        union_states |= states_child;
-        intersect_states &= states_child;
-        score += score_child;
-    }
-        if (intersect_states)
-            states = intersect_states;
-        else {
-            states = union_states;
-            score++;
-        }
-    }
-    return score;
-}
-
-int PhyloTree::computeParsimonyScore() {
-    assert(root && root->isLeaf());
-
-    int score = 0;
-    for (int ptn = 0; ptn < aln->size(); ptn++)
-        if (!aln->at(ptn).is_const) {
-            int states;
-            score += computeParsimonyScore(ptn, states) * (*aln)[ptn].frequency;
-        }
-    return score;
-}
-
-/****************************************************************************
- Nearest Neighbor Interchange with parsimony
- ****************************************************************************/
-
-double PhyloTree::swapNNI(double cur_score, PhyloNode *node1, PhyloNode *node2) {
-    assert(node1->degree() == 3 && node2->degree() == 3);
-    FOR_NEIGHBOR_DECLARE(node1, node2, it1)
-        break;
-    Node *node1_nei = (*it1)->node;
-
-    FOR_NEIGHBOR_IT(node2, node1, it2){
-    // do the NNI swap
-    Node *node2_nei = (*it2)->node;
-    node1->updateNeighbor(node1_nei, node2_nei);
-    node1_nei->updateNeighbor(node1, node2);
-    node2->updateNeighbor(node2_nei, node1_nei);
-    node2_nei->updateNeighbor(node2, node1);
-
-    // compute the score of the swapped topology
-    double score = computeParsimonyScore();
-    // if better: return
-    if (score < cur_score) return score;
-    // else, swap back
-    node1->updateNeighbor(node2_nei, node1_nei);
-    node1_nei->updateNeighbor(node2, node1);
-    node2->updateNeighbor(node1_nei, node2_nei);
-    node2_nei->updateNeighbor(node1, node2);
-}
-    return cur_score;
-}
-
-double PhyloTree::searchNNI(double cur_score, PhyloNode *node, PhyloNode *dad) {
-    if (!node)
-        node = (PhyloNode*) root;
-    if (!node->isLeaf() && dad && !dad->isLeaf()) {
-        double score = swapNNI(cur_score, node, dad);
-        if (score < cur_score)
-            return score;
-    }
-
-    FOR_NEIGHBOR_IT(node, dad, it){
-    double score = searchNNI(cur_score, (PhyloNode*) (*it)->node, node);
-    if (score < cur_score) return score;
-}
-    return cur_score;
-}
-
-void PhyloTree::searchNNI() {
-    cout << "Search with Nearest Neighbor Interchange..." << endl;
-    double cur_score = computeParsimonyScore();
-    do {
-        double score = searchNNI(cur_score);
-        if (score >= cur_score)
-            break;
-        cout << "Better score found: " << score << endl;
-        cur_score = score;
-    } while (true);
-}
-
-
-int PhyloTree::addTaxonMP(Node *added_node, Node* &target_node, Node* &target_dad, Node *node, Node *dad) {
-    Neighbor *dad_nei = dad->findNeighbor(node);
-
-    // now insert the new node in the middle of the branch node-dad
-    double len = dad_nei->length;
-    node->updateNeighbor(dad, added_node, len / 2.0);
-    dad->updateNeighbor(node, added_node, len / 2.0);
-    added_node->updateNeighbor((Node*) 1, node, len / 2.0);
-    added_node->updateNeighbor((Node*) 2, dad, len / 2.0);
-    // compute the likelihood
-    //clearAllPartialLh();
-    int best_score = computeParsimonyScore();
-    target_node = node;
-    target_dad = dad;
-    // remove the added node
-    node->updateNeighbor(added_node, dad, len);
-    dad->updateNeighbor(added_node, node, len);
-    added_node->updateNeighbor(node, (Node*) 1, len);
-    added_node->updateNeighbor(dad, (Node*) 2, len);
-
-    // now tranverse the tree downwards
-
-    FOR_NEIGHBOR_IT(node, dad, it){
-    Node *target_node2;
-    Node *target_dad2;
-    double score = addTaxonMP(added_node, target_node2, target_dad2, (*it)->node, node);
-    if (score < best_score) {
-        best_score = score;
-        target_node = target_node2;
-        target_dad = target_dad2;
-    }
-}
-    return best_score;
-}
-
-void PhyloTree::growTreeMP(Alignment *alignment) {
-
-    cout << "Stepwise addition using maximum parsimony..." << endl;
-    aln = alignment;
-    int size = aln->getNSeq();
-    if (size < 3)
-        outError(ERR_FEW_TAXA);
-
-    root = newNode();
-    Node *new_taxon;
-
-    // create initial tree with 3 taxa
-    for (leafNum = 0; leafNum < 3; leafNum++) {
-        if (verbose_mode >= VB_MAX)
-            cout << "Add " << aln->getSeqName(leafNum) << " to the tree" << endl;
-        new_taxon = newNode(leafNum, aln->getSeqName(leafNum).c_str());
-        root->addNeighbor(new_taxon, 1.0);
-        new_taxon->addNeighbor(root, 1.0);
-    }
-    root = findNodeID(0);
-    //optimizeAllBranches();
-
-    // stepwise adding the next taxon
-    for (leafNum = 3; leafNum < size; leafNum++) {
-        if (verbose_mode >= VB_MAX)
-            cout << "Add " << aln->getSeqName(leafNum) << " to the tree";
-        // allocate a new taxon and a new ajedcent internal node
-        new_taxon = newNode(leafNum, aln->getSeqName(leafNum).c_str());
-        Node *added_node = newNode();
-        added_node->addNeighbor(new_taxon, 1.0);
-        new_taxon->addNeighbor(added_node, 1.0);
-
-        // preserve two neighbors
-        added_node->addNeighbor((Node*) 1, 1.0);
-        added_node->addNeighbor((Node*) 2, 1.0);
-
-        Node *target_node = NULL;
-        Node *target_dad = NULL;
-        int score = addTaxonMP(added_node, target_node, target_dad, root->neighbors[0]->node, root);
-        if (verbose_mode >= VB_MAX)
-            cout << ", score = " << score << endl;
-        // now insert the new node in the middle of the branch node-dad
-        double len = target_dad->findNeighbor(target_node)->length;
-        target_node->updateNeighbor(target_dad, added_node, len / 2.0);
-        target_dad->updateNeighbor(target_node, added_node, len / 2.0);
-        added_node->updateNeighbor((Node*) 1, target_node, len / 2.0);
-        added_node->updateNeighbor((Node*) 2, target_dad, len / 2.0);
-        // compute the likelihood
-        //clearAllPartialLh();
-        //optimizeAllBranches();
-        //optimizeNNI();
-    }
-
-    nodeNum = 2 * leafNum - 2;
-}
 
 /****************************************************************************
  likelihood function
@@ -1356,11 +812,11 @@ void PhyloTree::getMemoryRequired(uint64_t &partial_lh_entries, uint64_t &scale_
 	uint64_t tip_partial_lh_size = aln->num_states * (aln->STATE_UNKNOWN+1) * model->getNMixtures();
     if (sse == LK_EIGEN || sse == LK_EIGEN_SSE) {
         if (params->lh_mem_save == LM_PER_NODE)
-            partial_lh_entries = ((uint64_t)leafNum - 2) * (uint64_t) block_size + 2 + tip_partial_lh_size;
+            partial_lh_entries = ((uint64_t)leafNum - 2) * (uint64_t) block_size + 4 + tip_partial_lh_size;
         else
-            partial_lh_entries = ((uint64_t)leafNum * 3 - 6) * (uint64_t) block_size + 2 + tip_partial_lh_size;
+            partial_lh_entries = ((uint64_t)leafNum * 3 - 6) * (uint64_t) block_size + 4 + tip_partial_lh_size;
     } else
-    	partial_lh_entries = ((uint64_t)leafNum * 4 - 6) * (uint64_t) block_size + 2 + tip_partial_lh_size;
+    	partial_lh_entries = ((uint64_t)leafNum * 4 - 6) * (uint64_t) block_size + 4 + tip_partial_lh_size;
 
 
 	if (sse == LK_EIGEN || sse == LK_EIGEN_SSE) {
@@ -1587,15 +1043,15 @@ double PhyloTree::computeLikelihood(double *pattern_lh) {
 //        assert(current_it_back);
     }
     double score;
-    string root_name = ROOT_NAME;
-    Node *vroot = findLeafName(root_name);
-    if (root_state != aln->STATE_UNKNOWN && vroot) {
-        if (verbose_mode >= VB_DEBUG)
-            cout << __func__ << " HIT ROOT STATE " << endl;
-        score = computeLikelihoodRooted((PhyloNeighbor*) vroot->neighbors[0], (PhyloNode*) vroot);
-    } else {
+//    string root_name = ROOT_NAME;
+//    Node *vroot = findLeafName(root_name);
+//    if (root_state != aln->STATE_UNKNOWN && vroot) {
+//        if (verbose_mode >= VB_DEBUG)
+//            cout << __func__ << " HIT ROOT STATE " << endl;
+//        score = computeLikelihoodRooted((PhyloNeighbor*) vroot->neighbors[0], (PhyloNode*) vroot);
+//    } else {
         score = computeLikelihoodBranch(current_it, (PhyloNode*) current_it_back->node);
-    }
+//    }
     if (pattern_lh)
         memmove(pattern_lh, _pattern_lh, aln->size() * sizeof(double));
 
@@ -1611,26 +1067,27 @@ double PhyloTree::computeLikelihood(double *pattern_lh) {
          outError("Scaling error ", __func__);
          }*/
     }
+    curScore = score;
     return score;
 }
 
-double PhyloTree::computeLikelihoodRooted(PhyloNeighbor *dad_branch, PhyloNode *dad) {
-    double score = computeLikelihoodBranchNaive(dad_branch, dad);
-    if (verbose_mode >= VB_DEBUG) {
-        printTransMatrices(dad_branch->node, dad);
-        /*
-         FOR_NEIGHBOR_IT(dad_branch->node, dad, it) {
-         PhyloNeighbor *pit = (PhyloNeighbor*)(*it);
-         cout << pit->node->name << "\t" << pit->partial_lh[0] << endl;
-
-         }*/
-    }
-    double* state_freq = new double[aln->num_states];
-    model->getStateFrequency(state_freq);
-    score -= log(state_freq[(int) root_state]);
-    delete[] state_freq;
-    return score;
-}
+//double PhyloTree::computeLikelihoodRooted(PhyloNeighbor *dad_branch, PhyloNode *dad) {
+//    double score = computeLikelihoodBranchNaive(dad_branch, dad);
+//    if (verbose_mode >= VB_DEBUG) {
+//        printTransMatrices(dad_branch->node, dad);
+//        /*
+//         FOR_NEIGHBOR_IT(dad_branch->node, dad, it) {
+//         PhyloNeighbor *pit = (PhyloNeighbor*)(*it);
+//         cout << pit->node->name << "\t" << pit->partial_lh[0] << endl;
+//
+//         }*/
+//    }
+//    double* state_freq = new double[aln->num_states];
+//    model->getStateFrequency(state_freq);
+//    score -= log(state_freq[(int) root_state]);
+//    delete[] state_freq;
+//    return score;
+//}
 
 int PhyloTree::getNumLhCat(SiteLoglType wsl) {
     int ncat = 0;
@@ -1655,11 +1112,12 @@ double PhyloTree::computePatternLhCat(SiteLoglType wsl) {
         current_it = (PhyloNeighbor*)leaf->neighbors[0];
         current_it_back = (PhyloNeighbor*)current_it->node->findNeighbor(leaf);
     }
-    if (sse == LK_NORMAL || sse == LK_SSE) {
-        if (getModel()->isMixture())
-            outError("Naive kernel does not support mixture models, contact author if you really need this feature");
-        return computeLikelihoodBranchNaive(current_it, (PhyloNode*)current_it_back->node);
-    } else if (!getModel()->isMixture())
+//    if (sse == LK_NORMAL || sse == LK_SSE) {
+//        if (getModel()->isMixture())
+//            outError("Naive kernel does not support mixture models, contact author if you really need this feature");
+//        return computeLikelihoodBranchNaive(current_it, (PhyloNode*)current_it_back->node);
+//    } else 
+    if (!getModel()->isMixture())
         return computeLikelihoodBranchEigen(current_it, (PhyloNode*)current_it_back->node);
     else if (getModelFactory()->fused_mix_rate)
         return computeMixrateLikelihoodBranchEigen(current_it, (PhyloNode*)current_it_back->node);
@@ -2621,149 +2079,6 @@ void PhyloTree::computeAllBayesianBranchLengths(Node *node, Node *dad) {
     }
 }
 
-//double PhyloTree::computeLikelihoodBranchNaive(PhyloNeighbor *dad_branch, PhyloNode *dad, double *pattern_lh, double *pattern_rate) {
-double PhyloTree::computeLikelihoodBranchNaive(PhyloNeighbor *dad_branch, PhyloNode *dad) {
-    PhyloNode *node = (PhyloNode*) dad_branch->node;
-    PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
-    //assert(node_branch);
-    //assert(!site_rate->isSiteSpecificRate() || !model->isSiteSpecificModel());
-    if (!central_partial_lh)
-        initializeAllPartialLh();
-    // swap node and dad if dad is a leaf
-    // NEW: swap if root_state is given
-    if (node->isLeaf() || (node->name == ROOT_NAME && root_state != aln->STATE_UNKNOWN)) {
-        PhyloNode *tmp_node = dad;
-        dad = node;
-        node = tmp_node;
-        PhyloNeighbor *tmp_nei = dad_branch;
-        dad_branch = node_branch;
-        node_branch = tmp_nei;
-        //cout << "swapped\n";
-    }
-
-    if ((dad_branch->partial_lh_computed & 1) == 0)
-        computePartialLikelihood(dad_branch, dad);
-    if ((node_branch->partial_lh_computed & 1) == 0)
-        computePartialLikelihood(node_branch, node);
-    // now combine likelihood at the branch
-
-    double tree_lh = node_branch->lh_scale_factor + dad_branch->lh_scale_factor;
-    int ncat = site_rate->getNRate();
-    double p_invar = site_rate->getPInvar();
-    double p_var_cat = (1.0 - p_invar) / (double) ncat;
-    int nstates = aln->num_states;
-    size_t block = ncat * nstates;
-    int trans_size = model->getTransMatrixSize();
-    size_t ptn; // for big data size > 4GB memory required
-    int cat, state1, state2;
-    size_t nptn = aln->size() + model_factory->unobserved_ptns.size();
-    size_t orig_nptn = aln->size();
-    int discrete_cat = site_rate->getNDiscreteRate();
-    double *trans_mat = new double[discrete_cat * trans_size];
-    double *state_freq = new double[nstates];
-    model->getStateFrequency(state_freq);
-
-    if (!site_rate->isSiteSpecificRate())
-        for (cat = 0; cat < discrete_cat; cat++) {
-            //trans_mat[cat] = model->newTransMatrix();
-            double *trans_cat = trans_mat + (cat * trans_size);
-            model_factory->computeTransMatrixFreq(dad_branch->length * site_rate->getRate(cat), state_freq, trans_cat);
-        }
-
-    bool not_ptn_cat = (site_rate->getPtnCat(0) < 0);
-    double prob_const = 0.0; // probability of unobserved const patterns
-#ifdef _OPENMP
-#pragma omp parallel for reduction(+: tree_lh, prob_const) private(ptn, cat, state1, state2)
-#endif
-    for (ptn = 0; ptn < nptn; ptn++) {
-        double lh_ptn = 0.0; // likelihood of the pattern
-        int dad_state = 1000; // just something big enough
-        int ptn_cat = site_rate->getPtnCat(ptn);
-        if (dad->name == ROOT_NAME && root_state != aln->STATE_UNKNOWN) {
-            dad_state = root_state;
-        } else if (dad->isLeaf()) {
-        	if (ptn < orig_nptn)
-        		dad_state = (*aln)[ptn][dad->id];
-        	else
-        		dad_state = model_factory->unobserved_ptns[ptn-orig_nptn];
-        }
-        int dad_offset = dad_state * nstates;
-        if (site_rate->isSiteSpecificRate()) {
-        	if (ptn < orig_nptn)
-        		model_factory->computeTransMatrixFreq(dad_branch->length * site_rate->getPtnRate(ptn), state_freq, trans_mat);
-        	else
-        		model_factory->computeTransMatrixFreq(dad_branch->length, state_freq, trans_mat);
-        }
-        for (cat = 0; cat < ncat; cat++) {
-            double lh_cat = 0.0; // likelihood of the pattern's category
-            size_t lh_offset = cat * nstates + ptn * block;
-            double *partial_lh_site = node_branch->partial_lh + lh_offset;
-            double *partial_lh_child = dad_branch->partial_lh + lh_offset;
-            if (dad_state < nstates) { // single state
-                // external node
-                double *trans_state = trans_mat + ((not_ptn_cat ? cat : ptn_cat) * trans_size + dad_offset);
-                if (model->isSiteSpecificModel() && ptn < nptn)
-                    trans_state += (nstates * nstates * model->getPtnModelID(ptn));
-                for (state2 = 0; state2 < nstates; state2++)
-                    lh_cat += partial_lh_child[state2] * trans_state[state2];
-            } else {
-                // internal node, or external node but ambiguous character
-                for (state1 = 0; state1 < nstates; state1++) {
-                    double lh_state = 0.0; // likelihood of state1
-                    double *trans_state = trans_mat + ((not_ptn_cat ? cat : ptn_cat) * trans_size + state1 * nstates);
-                    if (model->isSiteSpecificModel() && ptn < nptn)
-                        trans_state += (nstates * nstates * model->getPtnModelID(ptn));
-                    for (state2 = 0; state2 < nstates; state2++)
-                        lh_state += partial_lh_child[state2] * trans_state[state2];
-                    lh_cat += lh_state * partial_lh_site[state1];
-                }
-            }
-            lh_ptn += lh_cat;
-            _pattern_lh_cat[ptn * ncat + cat] = lh_cat;
-//            if (pattern_rate)
-//                rate_ptn += lh_cat * site_rate->getRate(cat);
-        }
-        if (ptn < orig_nptn) {
-//			if (pattern_rate)
-//				pattern_rate[ptn] = rate_ptn / lh_ptn;
-			lh_ptn *= p_var_cat;
-			if ((*aln)[ptn].const_char == nstates)
-				lh_ptn += p_invar;
-			else if ((*aln)[ptn].const_char < nstates) {
-				lh_ptn += p_invar * state_freq[(int) (*aln)[ptn].const_char];
-			}
-			//#ifdef DEBUG
-			if (lh_ptn <= 0.0)
-				cout << "Negative likelihood: " << lh_ptn << " " << site_rate->getPtnRate(ptn) << endl;
-			//#endif
-        	lh_ptn = log(lh_ptn);
-			_pattern_lh[ptn] = lh_ptn;
-			if (discard_saturated_site && site_rate->isSiteSpecificRate() && site_rate->getPtnRate(ptn) >= MAX_SITE_RATE)
-				continue;
-			tree_lh += lh_ptn * aln->at(ptn).frequency;
-        } else {
-        	lh_ptn = lh_ptn*p_var_cat + p_invar*state_freq[(int)model_factory->unobserved_ptns[ptn-orig_nptn]];
-        	prob_const += lh_ptn;
-        }
-
-    }
-    if (orig_nptn < nptn) {
-    	// ascertainment bias correction
-    	prob_const = log(1.0 - prob_const);
-    	for (ptn = 0; ptn < orig_nptn; ptn++)
-    		_pattern_lh[ptn] -= prob_const;
-    	tree_lh -= aln->getNSite()*prob_const;
-    }
-//    if (pattern_lh)
-//        memmove(pattern_lh, _pattern_lh, aln->size() * sizeof(double));
-    delete[] state_freq;
-    delete[] trans_mat;
-    //for (cat = ncat-1; cat >= 0; cat--)
-    //delete trans_mat[cat];
-    //delete state_freq;
-
-    return tree_lh;
-}
 
 double PhyloTree::computeLikelihoodZeroBranch(PhyloNeighbor *dad_branch, PhyloNode *dad) {
     double lh_zero_branch;
@@ -2779,406 +2094,6 @@ double PhyloTree::computeLikelihoodZeroBranch(PhyloNeighbor *dad_branch, PhyloNo
     return lh_zero_branch;
 }
 
-void PhyloTree::computePartialLikelihoodNaive(PhyloNeighbor *dad_branch, PhyloNode *dad) {
-    // don't recompute the likelihood
-    if (dad_branch->partial_lh_computed & 1)
-        return;
-    Node * node = dad_branch->node;
-    size_t ptn, cat;
-    int ncat = site_rate->getNRate();
-    int nstates = aln->num_states;
-    size_t block = nstates * site_rate->getNRate();
-    int trans_size = model->getTransMatrixSize();
-    size_t lh_size = (aln->size()+model_factory->unobserved_ptns.size()) * block;
-    double *partial_lh_site;
-    size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
-    size_t orig_nptn = aln->size();
-
-    dad_branch->lh_scale_factor = 0.0;
-    memset(dad_branch->scale_num, 0, nptn * sizeof(UBYTE));
-
-    assert(dad_branch->partial_lh);
-    //if (!dad_branch->partial_lh)
-    //	dad_branch->partial_lh = newPartialLh();
-    if (node->isLeaf() && dad) {
-        /* external node */
-        memset(dad_branch->partial_lh, 0, lh_size * sizeof(double));
-        for (ptn = 0; ptn < nptn; ptn++) {
-            char state;
-            partial_lh_site = dad_branch->partial_lh + (ptn * block);
-            if (node->name == ROOT_NAME) {
-                state = aln->STATE_UNKNOWN;
-            } else {
-                assert(node->id < aln->getNSeq());
-                if (ptn < orig_nptn)
-                	state = (aln->at(ptn))[node->id];
-                else // ascertainment bias correction
-                	state = model_factory->unobserved_ptns[ptn-orig_nptn];
-            }
-            if (state < nstates) {
-				for (cat = 0; cat < ncat; cat++)
-					partial_lh_site[cat * nstates + state] = 1.0;
-			} else if (state == aln->STATE_UNKNOWN) {
-                // fill all entries (also over rate category) with 1.0
-                dad_branch->scale_num[ptn] = -1;
-                for (int state2 = 0; state2 < block; state2++) {
-                    partial_lh_site[state2] = 1.0;
-                }
-            } else if (aln->seq_type == SEQ_DNA) {
-                // ambiguous character, for DNA, RNA
-                state = state - (nstates - 1);
-                for (int state2 = 0; state2 < nstates && state2 <= 6; state2++)
-                    if (state & (1 << state2)) {
-                        for (cat = 0; cat < ncat; cat++)
-                            partial_lh_site[cat * nstates + state2] = 1.0;
-                    }
-            } else if (aln->seq_type == SEQ_PROTEIN) {
-                // ambiguous character, for DNA, RNA
-                state = state - (nstates);
-                assert(state < 3);
-                int state_map[] = {4+8,32+64,512+1024};
-                for (int state2 = 0; state2 < 11; state2++)
-                    if (state_map[(int)state] & (1 << state2)) {
-                        for (cat = 0; cat < ncat; cat++)
-                            partial_lh_site[cat * nstates + state2] = 1.0;
-                    }
-            } else {
-            	outError("Internal error ", __func__);
-            }
-        }
-    } else {
-        /* internal node */
-        int discrete_cat = site_rate->getNDiscreteRate();
-        double *trans_mat = new double[discrete_cat * trans_size];
-        //for (cat = 0; cat < discrete_cat; cat++) trans_mat[cat] = model->newTransMatrix();
-        for (ptn = 0; ptn < lh_size; ptn++) {
-            dad_branch->partial_lh[ptn] = 1.0;
-        }
-        for (ptn = 0; ptn < nptn; ptn++)
-            dad_branch->scale_num[ptn] = -1;
-
-        FOR_NEIGHBOR_IT(node, dad, it)if ((*it)->node->name != ROOT_NAME) {
-            computePartialLikelihoodNaive((PhyloNeighbor*) (*it), (PhyloNode*) node);
-
-            dad_branch->lh_scale_factor += ((PhyloNeighbor*) (*it))->lh_scale_factor;
-
-            if (!site_rate->isSiteSpecificRate())
-            for (cat = 0; cat < discrete_cat; cat++)
-            model_factory->computeTransMatrix((*it)->length * site_rate->getRate(cat), trans_mat + (cat * trans_size));
-
-            bool not_ptn_cat = (site_rate->getPtnCat(0) < 0);
-
-            double sum_scale = 0.0;
-#ifdef _OPENMP
-#pragma omp parallel for reduction(+: sum_scale) private(ptn, cat, partial_lh_site)
-#endif
-            for (ptn = 0; ptn < nptn; ptn++)
-            if (((PhyloNeighbor*) (*it))->scale_num[ptn] >= 0) {
-                // avoid the case that all child partial likelihoods equal 1.0
-                //double *partial_lh_child = ((PhyloNeighbor*) (*it))->partial_lh + (ptn*block);
-                //if (partial_lh_child[0] < 0.0) continue;
-                //
-                if (dad_branch->scale_num[ptn] < 0) dad_branch->scale_num[ptn] = 0;
-                dad_branch->scale_num[ptn] += ((PhyloNeighbor*) (*it))->scale_num[ptn];
-                int ptn_cat = 0;
-                if (ptn < orig_nptn) {
-                	ptn_cat = site_rate->getPtnCat(ptn);
-					if (site_rate->isSiteSpecificRate())
-						model_factory->computeTransMatrix((*it)->length * site_rate->getPtnRate(ptn), trans_mat);
-                } else {
-					if (site_rate->isSiteSpecificRate())
-						model_factory->computeTransMatrix((*it)->length, trans_mat);
-                }
-                for (cat = 0; cat < ncat; cat++) {
-                    size_t lh_offset = cat * nstates + ptn*block;
-                    partial_lh_site = dad_branch->partial_lh + lh_offset;
-                    double *partial_lh_child = ((PhyloNeighbor*) (*it))->partial_lh + lh_offset;
-                    for (int state = 0; state < nstates; state++) {
-                        double lh_child = 0.0;
-                        double *trans_state = trans_mat + ((not_ptn_cat ? cat : ptn_cat) * trans_size + state * nstates);
-                        if (model->isSiteSpecificModel() && ptn < orig_nptn)
-                        	trans_state += (nstates * nstates * model->getPtnModelID(ptn));
-                        for (int state2 = 0; state2 < nstates; state2++)
-                        lh_child += trans_state[state2] * partial_lh_child[state2];
-
-                        if (!isfinite(lh_child))
-                        	outError("Numerical error with ", __func__);
-                        partial_lh_site[state] *= lh_child;
-                    }
-                }
-                // check if one should scale partial likelihoods
-                bool do_scale = true;
-                partial_lh_site = dad_branch->partial_lh + (ptn * block);
-                for (cat = 0; cat < block; cat++)
-                if (partial_lh_site[cat] > SCALING_THRESHOLD) {
-                    do_scale = false;
-                    break;
-                }
-                if (!do_scale) continue;
-                // now do the likelihood scaling
-                /*
-                 double lh_max = partial_lh_site[0];
-                 for (cat = 1; cat < block; cat++)
-                 if (lh_max < partial_lh_site[cat]) lh_max = partial_lh_site[cat];
-                 for (cat = 0; cat < block; cat++)
-                 partial_lh_site[cat] /= lh_max;
-                 dad_branch->lh_scale_factor += log(lh_max) * (*aln)[ptn].frequency;
-
-                 */
-                for (cat = 0; cat < block; cat++)
-                partial_lh_site[cat] /= SCALING_THRESHOLD;
-                // unobserved const pattern will never have underflow
-                sum_scale += LOG_SCALING_THRESHOLD * (*aln)[ptn].frequency;
-                dad_branch->scale_num[ptn] += 1;
-
-//                if (pattern_scale)
-//                pattern_scale[ptn] += LOG_SCALING_THRESHOLD;
-            }
-            dad_branch->lh_scale_factor += sum_scale;
-        }
-        delete[] trans_mat;
-        //for (cat = ncat - 1; cat >= 0; cat--)
-        //  delete [] trans_mat[cat];
-    }
-    dad_branch->partial_lh_computed |= 1;
-
-}
-
-void PhyloTree::computeLikelihoodDervNaive(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf) {
-    PhyloNode *node = (PhyloNode*) dad_branch->node;
-    PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
-    assert(node_branch);
-    // swap node and dad if dad is a leaf
-    // NEW: swap if root_state is given
-    if (node->isLeaf() || (node->name == ROOT_NAME && root_state != aln->STATE_UNKNOWN)) {
-        PhyloNode *tmp_node = dad;
-        dad = node;
-        node = tmp_node;
-        PhyloNeighbor *tmp_nei = dad_branch;
-        dad_branch = node_branch;
-        node_branch = tmp_nei;
-        //cout << "swapped\n";
-    }
-    if ((dad_branch->partial_lh_computed & 1) == 0)
-        computePartialLikelihoodNaive(dad_branch, dad);
-    if ((node_branch->partial_lh_computed & 1) == 0)
-        computePartialLikelihoodNaive(node_branch, node);
-
-    // now combine likelihood at the branch
-//    double tree_lh = node_branch->lh_scale_factor + dad_branch->lh_scale_factor;
-    df = ddf = 0.0;
-    int ncat = site_rate->getNRate();
-    double p_invar = site_rate->getPInvar();
-    double p_var_cat = (1.0 - p_invar) / (double) ncat;
-    int nstates = aln->num_states;
-    size_t block = ncat * nstates;
-    int trans_size = model->getTransMatrixSize();
-    size_t nptn = aln->size() + model_factory->unobserved_ptns.size();
-    size_t orig_nptn = aln->size();
-    size_t ptn, cat, state1, state2;
-
-    int discrete_cat = site_rate->getNDiscreteRate();
-
-    double *trans_mat = new double[discrete_cat * trans_size];
-    double *trans_derv1 = new double[discrete_cat * trans_size];
-    double *trans_derv2 = new double[discrete_cat * trans_size];
-    double *state_freq = new double[nstates];
-    model->getStateFrequency(state_freq);
-
-    if (!site_rate->isSiteSpecificRate())
-        for (cat = 0; cat < discrete_cat; cat++) {
-            //trans_mat[cat] = model->newTransMatrix();
-            double *trans_cat = trans_mat + (cat * trans_size);
-            double *derv1_cat = trans_derv1 + (cat * trans_size);
-            double *derv2_cat = trans_derv2 + (cat * trans_size);
-            double rate_val = site_rate->getRate(cat);
-            //double rate_sqr = rate_val * rate_val;
-            model_factory->computeTransDervFreq(dad_branch->length, rate_val, state_freq, trans_cat, derv1_cat,
-                    derv2_cat);
-            /*
-             for (state1 = 0; state1 < nstates; state1++) {
-             double *trans_mat_state = trans_cat + (state1 * nstates);
-             double *trans_derv1_state = derv1_cat + (state1 * nstates);
-             double *trans_derv2_state = derv2_cat + (state1 * nstates);
-
-             for (state2 = 0; state2 < nstates; state2++) {
-             trans_mat_state[state2] *= state_freq[state1];
-             trans_derv1_state[state2] *= (state_freq[state1] * rate_val);
-             trans_derv2_state[state2] *= (state_freq[state1] * rate_sqr);
-             }
-             }*/
-        }
-
-    bool not_ptn_cat = (site_rate->getPtnCat(0) < 0);
-    double derv1_frac;
-    double derv2_frac;
-
-    double my_df = 0.0;
-    double my_ddf = 0.0;
-    double prob_const = 0.0;
-    double prob_const_derv1 = 0.0, prob_const_derv2 = 0.0;
-
-#ifdef _OPENMP
-#pragma omp parallel for reduction(+: my_df, my_ddf, prob_const, prob_const_derv1, prob_const_derv2) private(ptn, cat, state1, state2, derv1_frac, derv2_frac)
-#endif
-    for (ptn = 0; ptn < nptn; ptn++) {
-        int ptn_cat = site_rate->getPtnCat(ptn);
-        if (discard_saturated_site && site_rate->isSiteSpecificRate() && nptn<orig_nptn &&site_rate->getPtnRate(ptn) >= MAX_SITE_RATE)
-            continue;
-        double lh_ptn = 0.0; // likelihood of the pattern
-        double lh_ptn_derv1 = 0.0;
-        double lh_ptn_derv2 = 0.0;
-        int dad_state = aln->STATE_UNKNOWN;
-
-        if (dad->name == ROOT_NAME && root_state != aln->STATE_UNKNOWN)
-            dad_state = root_state;
-        else if (dad->isLeaf()) {
-        	if (ptn < orig_nptn)
-        		dad_state = (*aln)[ptn][dad->id];
-        	else
-        		dad_state = model_factory->unobserved_ptns[ptn-orig_nptn];
-        }
-        int dad_offset = dad_state * nstates;
-        if (site_rate->isSiteSpecificRate()) {
-        	if (ptn < orig_nptn)
-            model_factory->computeTransDervFreq(dad_branch->length, site_rate->getPtnRate(ptn), state_freq, trans_mat,
-                    trans_derv1, trans_derv2);
-        	else
-                model_factory->computeTransDervFreq(dad_branch->length, 1.0, state_freq, trans_mat,
-                        trans_derv1, trans_derv2);
-        }
-        for (cat = 0; cat < ncat; cat++) {
-            size_t lh_offset = cat * nstates + ptn * block;
-            double *partial_lh_site = node_branch->partial_lh + lh_offset;
-            double *partial_lh_child = dad_branch->partial_lh + lh_offset;
-            if (dad_state < nstates) {
-                // external node
-                int cat2 = (not_ptn_cat ? cat : ptn_cat) * trans_size + dad_offset;
-                if (model->isSiteSpecificModel() && ptn < orig_nptn)
-                    cat2 += (nstates * nstates * model->getPtnModelID(ptn));
-                double *trans_state = trans_mat + cat2;
-                double *derv1_state = trans_derv1 + cat2;
-                double *derv2_state = trans_derv2 + cat2;
-                for (state2 = 0; state2 < nstates; state2++) {
-                    lh_ptn += partial_lh_child[state2] * trans_state[state2];
-                    lh_ptn_derv1 += partial_lh_child[state2] * derv1_state[state2];
-                    lh_ptn_derv2 += partial_lh_child[state2] * derv2_state[state2];
-                }
-            } else {
-                // internal node, or external node but ambiguous character
-                for (state1 = 0; state1 < nstates; state1++) {
-                    double lh_state = 0.0; // likelihood of state1
-                    double lh_state_derv1 = 0.0;
-                    double lh_state_derv2 = 0.0;
-                    int cat2 = (not_ptn_cat ? cat : ptn_cat) * trans_size + state1 * nstates;
-                    if (model->isSiteSpecificModel() && ptn < orig_nptn)
-                        cat2 += (nstates * nstates * model->getPtnModelID(ptn));
-                    double *trans_state = trans_mat + cat2;
-                    double *derv1_state = trans_derv1 + cat2;
-                    double *derv2_state = trans_derv2 + cat2;
-                    for (state2 = 0; state2 < nstates; state2++) {
-                        lh_state += partial_lh_child[state2] * trans_state[state2];
-                        lh_state_derv1 += partial_lh_child[state2] * derv1_state[state2];
-                        lh_state_derv2 += partial_lh_child[state2] * derv2_state[state2];
-                    }
-                    lh_ptn += lh_state * partial_lh_site[state1];
-                    lh_ptn_derv1 += lh_state_derv1 * partial_lh_site[state1];
-                    lh_ptn_derv2 += lh_state_derv2 * partial_lh_site[state1];
-                }
-            }
-        }
-        /*		if (p_invar > 0.0) {
-         lh_ptn *= p_var_cat;
-         lh_ptn_derv1 *= p_var_cat;
-         lh_ptn_derv2 *= p_var_cat;
-         if ((*aln)[ptn].is_const && (*aln)[ptn].const_char < nstates) {
-         lh_ptn += p_invar * state_freq[(int) (*aln)[ptn].const_char];
-         }
-         assert(lh_ptn > 0);
-         double derv1_frac = lh_ptn_derv1 / lh_ptn;
-         double derv2_frac = lh_ptn_derv2 / lh_ptn;
-         tree_lh += log(lh_ptn) * (*aln)[ptn].frequency;
-         df += derv1_frac * (*aln)[ptn].frequency;
-         ddf += (derv2_frac - derv1_frac * derv1_frac) * (*aln)[ptn].frequency;
-         } else {
-         double derv1_frac = lh_ptn_derv1 / lh_ptn;
-         double derv2_frac = lh_ptn_derv2 / lh_ptn;
-         lh_ptn *= p_var_cat;
-         assert(lh_ptn > 0);
-         tree_lh += log(lh_ptn) * (*aln)[ptn].frequency;
-         df += derv1_frac * (*aln)[ptn].frequency;
-         ddf += (derv2_frac - derv1_frac * derv1_frac) * (*aln)[ptn].frequency;
-
-         }
-         */
-        // Tung beo's trick
-        if (lh_ptn<=0) {
-            cout << "Abnormal " << __func__;
-            abort();
-        }
-
-        if (ptn < orig_nptn) {
-            lh_ptn = lh_ptn * p_var_cat;
-			if ((*aln)[ptn].const_char == nstates)
-				lh_ptn += p_invar;
-			else if ((*aln)[ptn].const_char < nstates) {
-				lh_ptn += p_invar * state_freq[(int) (*aln)[ptn].const_char];
-			}
-	        double pad = p_var_cat / lh_ptn;
-	        if (std::isinf(pad)) {
-	            lh_ptn_derv1 *= p_var_cat;
-	            lh_ptn_derv2 *= p_var_cat;
-	            derv1_frac = lh_ptn_derv1 / lh_ptn;
-	            derv2_frac = lh_ptn_derv2 / lh_ptn;
-	        } else {
-	            derv1_frac = lh_ptn_derv1 * pad;
-	            derv2_frac = lh_ptn_derv2 * pad;
-	        }
-	        double freq = (*aln)[ptn].frequency;
-	        double tmp1 = derv1_frac * freq;
-	        double tmp2 = derv2_frac * freq;
-	        my_df += tmp1;
-	        my_ddf += tmp2 - tmp1 * derv1_frac;
-//	        lh_ptn = log(lh_ptn);
-//	        tree_lh += lh_ptn * freq;
-//	        _pattern_lh[ptn] = lh_ptn;
-	        if (!isfinite(lh_ptn) || !isfinite(my_df) || !isfinite(my_ddf)) {
-	            cout << "Abnormal " << __func__;
-	            abort();
-	        }
-        } else {
-        	lh_ptn = lh_ptn*p_var_cat + p_invar*state_freq[(int)model_factory->unobserved_ptns[ptn-orig_nptn]];
-        	prob_const += lh_ptn;
-        	prob_const_derv1 += lh_ptn_derv1 * p_var_cat;
-        	prob_const_derv2 += lh_ptn_derv2 * p_var_cat;
-        }
-
-    }
-    if (orig_nptn < nptn) {
-    	// ascertainment bias correction
-    	prob_const = 1.0 - prob_const;
-    	derv1_frac = prob_const_derv1 / prob_const;
-    	derv2_frac = prob_const_derv2 / prob_const;
-    	int nsites = aln->getNSite();
-    	my_df += nsites * derv1_frac;
-    	my_ddf += nsites *(derv2_frac + derv1_frac*derv1_frac);
-//    	prob_const = log(prob_const);
-//    	tree_lh -= nsites * prob_const;
-//    	for (ptn = 0; ptn < orig_nptn; ptn++)
-//    		_pattern_lh[ptn] -= prob_const;
-    }
-    delete[] state_freq;
-    delete[] trans_derv2;
-    delete[] trans_derv1;
-    delete[] trans_mat;
-    //for (cat = ncat-1; cat >= 0; cat--)
-    //delete trans_mat[cat];
-    //delete state_freq;
-    df = my_df;
-    ddf = my_ddf;
-
-//    return tree_lh;
-}
 
 /****************************************************************************
  Branch length optimization by maximum likelihood
@@ -3260,7 +2175,7 @@ void PhyloTree::optimizeOneBranch(PhyloNode *node1, PhyloNode *node2, bool clear
 	}	else {
         // Brent method
         optx = minimizeOneDimen(params->min_branch_length, current_len, params->max_branch_length, params->min_branch_length, &negative_lh, &ferror);
-        if (verbose_mode >= VB_DEBUG) {
+        if (verbose_mode >= VB_MAX) {
             cout << "minimizeBrent logl: " << -negative_lh << endl;
         }
 	}
@@ -3391,7 +2306,7 @@ double PhyloTree::optimizeAllBranches(int my_iterations, double tolerance, int m
 //        }
         
 
-        if (new_tree_lh < tree_lh) {
+        if (new_tree_lh < tree_lh - tolerance*0.1) {
         	// IN RARE CASE: tree log-likelihood decreases, revert the branch length and stop
         	if (verbose_mode >= VB_MED)
         		cout << "NOTE: Restoring branch lengths as tree log-likelihood decreases after branch length optimization: "
@@ -3414,6 +2329,7 @@ double PhyloTree::optimizeAllBranches(int my_iterations, double tolerance, int m
         	return new_tree_lh;
         tree_lh = new_tree_lh;
     }
+    curScore = tree_lh;
     return tree_lh;
 }
 
diff --git a/phylotree.h b/phylotree.h
index 68b33dd..259f649 100644
--- a/phylotree.h
+++ b/phylotree.h
@@ -22,7 +22,7 @@
 
 //#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE (512*256)
 //#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE (8*512*512)
-#include <Eigen/Core>
+//#include <Eigen/Core>
 #include "mtree.h"
 #include "alignment.h"
 #include "model/modelsubst.h"
@@ -55,7 +55,7 @@ const double TOL_LIKELIHOOD_PARAMOPT = 0.001; // BQM: newly introduced for Model
 
 const int SPR_DEPTH = 2;
 
-using namespace Eigen;
+//using namespace Eigen;
 
 inline size_t get_safe_upper_limit(size_t cur_limit) {
 	if (instruction_set >= 7)
@@ -132,7 +132,7 @@ inline void aligned_free(void *mem) {
 /**
  *  Row Major Array For Eigen
  */
-typedef Array<double, Dynamic, Dynamic, RowMajor> RowMajorArrayXXd;
+//typedef Array<double, Dynamic, Dynamic, RowMajor> RowMajorArrayXXd;
 
 
 typedef std::map< string, double > StringDoubleMap;
@@ -514,7 +514,7 @@ public:
      * 		Return the approximated branch length estimation using corrected parsimony branch length
      * 		This is usually used as the starting point before using Newton-Raphson
      */
-    double computeCorrectedParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad);
+//    double computeCorrectedParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad);
 
     /**
             initialize partial_pars vector of all PhyloNeighbors, allocating central_partial_pars
@@ -544,7 +544,7 @@ public:
             @param dad its dad, used to direct the tranversal
      */
     virtual void computePartialParsimony(PhyloNeighbor *dad_branch, PhyloNode *dad);
-    void computePartialParsimonyNaive(PhyloNeighbor *dad_branch, PhyloNode *dad);
+//    void computePartialParsimonyNaive(PhyloNeighbor *dad_branch, PhyloNode *dad);
     void computePartialParsimonyFast(PhyloNeighbor *dad_branch, PhyloNode *dad);
     template<class VectorClass>
     void computePartialParsimonyFastSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad);
@@ -562,13 +562,13 @@ public:
             @return parsimony score of the tree
      */
     virtual int computeParsimonyBranch(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst = NULL);
-    int computeParsimonyBranchNaive(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst = NULL);
+//    int computeParsimonyBranchNaive(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst = NULL);
     int computeParsimonyBranchFast(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst = NULL);
     template<class VectorClass>
     int computeParsimonyBranchFastSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst = NULL);
 
 
-    void printParsimonyStates(PhyloNeighbor *dad_branch = NULL, PhyloNode *dad = NULL);
+//    void printParsimonyStates(PhyloNeighbor *dad_branch = NULL, PhyloNode *dad = NULL);
 
     virtual void setParsimonyKernel(LikelihoodKernel lk);
 #if defined(BINARY32) || defined(__NOAVX__)
@@ -576,23 +576,6 @@ public:
 #else
     virtual void setParsimonyKernelAVX();
 #endif
-    /**
-            SLOW VERSION: compute the parsimony score of the tree, given the alignment
-            @return the parsimony score
-     */
-    int computeParsimonyScore();
-
-
-    /**
-            SLOW VERSION: compute the parsimony score of the tree, given the alignment
-            @return the parsimony score
-            @param node the current node
-            @param dad dad of the node, used to direct the search
-            @param ptn pattern ID
-            @param states set of admissible states at the current node (in binary code)
-     */
-    int computeParsimonyScore(int ptn, int &states, PhyloNode *node = NULL, PhyloNode *dad = NULL);
-
 
     /****************************************************************************
             likelihood function
@@ -675,16 +658,6 @@ public:
     typedef void (PhyloTree::*ComputePartialLikelihoodType)(PhyloNeighbor *, PhyloNode *);
     ComputePartialLikelihoodType computePartialLikelihoodPointer;
 
-    /**
-     * original naive version in IQ-TREE
-     */
-    void computePartialLikelihoodNaive(PhyloNeighbor *dad_branch, PhyloNode *dad = NULL);
-
-    /**
-     * this implements the SSE version using Eigen library
-     */
-    template<int NSTATES>
-    void computePartialLikelihoodSSE(PhyloNeighbor *dad_branch, PhyloNode *dad = NULL);
 
     //template <const int nstates>
     void computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNode *dad = NULL);
@@ -725,12 +698,6 @@ public:
     ComputeLikelihoodBranchType computeLikelihoodBranchPointer;
 
     /**
-     * this implements the SSE version using Eigen library
-     */
-    template<int NSTATES>
-    double computeLikelihoodBranchSSE(PhyloNeighbor *dad_branch, PhyloNode *dad);
-
-    /**
      * MINH: this implements the fast alternative strategy for reversible model (March 2013)
      * where partial likelihoods at nodes store real partial likelihoods times eigenvectors
      */
@@ -760,8 +727,6 @@ public:
     template <class VectorClass, const int VCSIZE, const int nstates>
     double computeSitemodelLikelihoodBranchEigenSIMD(PhyloNeighbor *dad_branch, PhyloNode *dad);
 
-    double computeLikelihoodBranchNaive(PhyloNeighbor *dad_branch, PhyloNode *dad);
-
     /****************************************************************************
             computing likelihood on a branch using buffer
      ****************************************************************************/
@@ -805,7 +770,7 @@ public:
         @param dad its dad, used to direct the tranversal
         @return tree likelihood
      */
-    virtual double computeLikelihoodRooted(PhyloNeighbor *dad_branch, PhyloNode *dad);
+//    virtual double computeLikelihoodRooted(PhyloNeighbor *dad_branch, PhyloNode *dad);
 
     /**
             compute the tree likelihood
@@ -978,14 +943,6 @@ public:
             computing derivatives of likelihood function
      ****************************************************************************/
 
-    void computeLikelihoodDervNaive(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf);
-
-    /**
-     * this implements the SSE version using Eigen library
-     */
-    template<int NSTATES>
-    void computeLikelihoodDervSSE(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf);
-
     //template <const int nstates>
     void computeLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf);
 
@@ -1047,50 +1004,6 @@ public:
      */
     int computeParsimonyTree(const char *out_prefix, Alignment *alignment);
 
-    /**
-            SLOW VERSION: grow the tree by step-wise addition
-            @param alignment input alignment
-     */
-    void growTreeMP(Alignment *alignment);
-
-    /**
-            used internally by growTreeMP() to find the best target branch to add into the tree
-            @param added_node node to add
-            @param target_node (OUT) one end of the best branch found
-            @param target_dad (OUT) the other end of the best branch found
-            @param node the current node
-            @param dad dad of the node, used to direct the search
-            @return the parsimony score of the tree
-     */
-    int addTaxonMP(Node *added_node, Node* &target_node, Node* &target_dad, Node *node, Node *dad);
-
-
-    /****************************************************************************
-            Nearest Neighbor Interchange with parsimony
-     ****************************************************************************/
-    /**
-            search by a nearest neigbor interchange with parsimony
-     */
-    void searchNNI();
-
-    /**
-            search by a nearest neigbor interchange with parsimony
-            @param node the current node
-            @param dad dad of the node, used to direct the search
-            @param cur_score current score
-            @return best score
-     */
-    double searchNNI(double cur_score, PhyloNode *node = NULL, PhyloNode *dad = NULL);
-
-    /**
-            try to swap the tree with nearest neigbor interchange at the branch connecting node1-node2.
-            If a swap shows better score, return the swapped tree and the score.
-            @param cur_score current score
-            @param node1 1st end node of the branch
-            @param node2 2nd end node of the branch
-            @return best score
-     */
-    double swapNNI(double cur_score, PhyloNode *node1, PhyloNode *node2);
 
     /****************************************************************************
             Branch length optimization by maximum likelihood
@@ -1877,55 +1790,6 @@ protected:
      */
     UINT *newBitsBlock();
 
-    /**
-            @return size of the bits entry (for storing num_states bits)
-     */
-    int getBitsEntrySize();
-
-    /**
-            @param bits_entry
-            @return TRUE if bits_entry contains all 0s, FALSE otherwise
-     */
-    bool isEmptyBitsEntry(UINT *bits_entry);
-
-    /**
-            @param bits_entry1
-            @param bits_entry1
-            @param bits_union (OUT) union of bits_entry1 and bits_entry2
-     */
-    void unionBitsEntry(UINT *bits_entry1, UINT *bits_entry2, UINT* &bits_union);
-
-    /**
-            set a single bit to 1
-            @param bits_entry
-            @param id index of the bit in the entry to set to 1
-     */
-    void setBitsEntry(UINT* &bits_entry, int id);
-
-    /**
-            get a single bit content
-            @param bits_entry
-            @param id index of the bit in the entry
-            @return TRUE if bit ID is 1, FALSE otherwise
-     */
-    bool getBitsEntry(UINT* &bits_entry, int id);
-
-    /**
-            get bit blocks, each block span num_state bits
-            @param bit_vec bit block vector
-            @param index block index
-            @param bits_entry (OUT) content of the block at index
-     */
-    void getBitsBlock(UINT *bit_vec, int index, UINT* &bits_entry);
-
-    /**
-            set bit blocks, each block span num_state bits
-            @param bit_vec (OUT) bit block vector
-            @param index block index
-            @param bits_entry the content of the block at index
-     */
-    void setBitsBlock(UINT* &bit_vec, int index, UINT *bits_entry);
-
     virtual void saveCurrentTree(double logl) {
     } // save current tree
 
diff --git a/phylotreesse.cpp b/phylotreesse.cpp
index 3bc4054..7782612 100644
--- a/phylotreesse.cpp
+++ b/phylotreesse.cpp
@@ -34,10 +34,10 @@
 void PhyloTree::setParsimonyKernel(LikelihoodKernel lk) {
     // set parsimony kernel
     switch (lk) {
-    case LK_SSE:
-        computeParsimonyBranchPointer = &PhyloTree::computeParsimonyBranchNaive;
-        computePartialParsimonyPointer = &PhyloTree::computePartialParsimonyNaive;
-    	break;
+//    case LK_SSE:
+//        computeParsimonyBranchPointer = &PhyloTree::computeParsimonyBranchNaive;
+//        computePartialParsimonyPointer = &PhyloTree::computePartialParsimonyNaive;
+//    	break;
     case LK_EIGEN:
         computeParsimonyBranchPointer = &PhyloTree::computeParsimonyBranchFast;
         computePartialParsimonyPointer = &PhyloTree::computePartialParsimonyFast;
@@ -50,10 +50,10 @@ void PhyloTree::setParsimonyKernel(LikelihoodKernel lk) {
             computePartialParsimonyPointer = &PhyloTree::computePartialParsimonyFastSIMD<Vec4ui>;
         }
     	break;
-    default:
-        computeParsimonyBranchPointer = &PhyloTree::computeParsimonyBranchNaive;
-        computePartialParsimonyPointer = &PhyloTree::computePartialParsimonyNaive;
-    	break;
+//    default:
+//        computeParsimonyBranchPointer = &PhyloTree::computeParsimonyBranchNaive;
+//        computePartialParsimonyPointer = &PhyloTree::computePartialParsimonyNaive;
+//    	break;
     }
 }
 
@@ -71,12 +71,12 @@ void PhyloTree::setLikelihoodKernel(LikelihoodKernel lk) {
 		dotProductDouble = &PhyloTree::dotProductSIMD<double, Vec2d, 2>;
 	}
 	sse = lk;
-    if (!aln || lk == LK_NORMAL) {
-        computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchNaive;
-        computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervNaive;
-        computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodNaive;
+    if (!aln) {
+        computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchEigen;
+        computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervEigen;
+        computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodEigen;
         computeLikelihoodFromBufferPointer = NULL;
-        sse = LK_NORMAL;
+        sse = LK_EIGEN;
         return;
     }
     
@@ -145,12 +145,12 @@ void PhyloTree::setLikelihoodKernel(LikelihoodKernel lk) {
 	switch(aln->num_states) {
 	case 4:
 		switch(sse) {
-		case LK_SSE:
-			computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSSE<4>;
-			computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSSE<4>;
-			computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSSE<4>;
-	        computeLikelihoodFromBufferPointer = NULL;
-			break;
+//		case LK_SSE:
+//			computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSSE<4>;
+//			computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSSE<4>;
+//			computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSSE<4>;
+//	        computeLikelihoodFromBufferPointer = NULL;
+//			break;
 		case LK_EIGEN_SSE:
 			if (instruction_set >= 7) {
 				// CPU supports AVX
@@ -183,12 +183,12 @@ void PhyloTree::setLikelihoodKernel(LikelihoodKernel lk) {
 		break;
 	case 20:
 		switch(sse) {
-		case LK_SSE:
-			computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSSE<20>;
-			computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSSE<20>;
-			computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSSE<20>;
-	        computeLikelihoodFromBufferPointer = NULL;
-			break;
+//		case LK_SSE:
+//			computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSSE<20>;
+//			computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSSE<20>;
+//			computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSSE<20>;
+//	        computeLikelihoodFromBufferPointer = NULL;
+//			break;
 		case LK_EIGEN_SSE:
 			if (instruction_set >= 7) {
 				setLikelihoodKernelAVX();
@@ -220,12 +220,12 @@ void PhyloTree::setLikelihoodKernel(LikelihoodKernel lk) {
 
 	case 64: // CODON
 		switch(sse) {
-		case LK_SSE:
-			computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSSE<64>;
-			computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSSE<64>;
-			computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSSE<64>;
-			computeLikelihoodFromBufferPointer = NULL;
-			break;
+//		case LK_SSE:
+//			computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSSE<64>;
+//			computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSSE<64>;
+//			computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSSE<64>;
+//			computeLikelihoodFromBufferPointer = NULL;
+//			break;
 		case LK_EIGEN_SSE:
 			if (instruction_set >= 7) {
 				setLikelihoodKernelAVX();
@@ -261,12 +261,12 @@ void PhyloTree::setLikelihoodKernel(LikelihoodKernel lk) {
 
 	case 2:
 		switch(sse) {
-		case LK_SSE:
-			computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSSE<2>;
-			computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSSE<2>;
-			computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSSE<2>;
-	        computeLikelihoodFromBufferPointer = NULL;
-			break;
+//		case LK_SSE:
+//			computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchSSE<2>;
+//			computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervSSE<2>;
+//			computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodSSE<2>;
+//	        computeLikelihoodFromBufferPointer = NULL;
+//			break;
 		case LK_EIGEN_SSE:
 			computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchEigenSIMD<Vec2d, 2, 2>;
 			computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervEigenSIMD<Vec2d, 2, 2>;
@@ -299,12 +299,12 @@ void PhyloTree::setLikelihoodKernel(LikelihoodKernel lk) {
                 computeLikelihoodFromBufferPointer = NULL;
             }
             sse = LK_EIGEN;
-        } else {
-            computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchNaive;
-            computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervNaive;
-            computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodNaive;
-            computeLikelihoodFromBufferPointer = NULL;
-            sse = LK_NORMAL;
+//        } else {
+//            computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchNaive;
+//            computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervNaive;
+//            computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodNaive;
+//            computeLikelihoodFromBufferPointer = NULL;
+//            sse = LK_NORMAL;
         }
 		break;
 	}
@@ -312,18 +312,18 @@ void PhyloTree::setLikelihoodKernel(LikelihoodKernel lk) {
 
 void PhyloTree::changeLikelihoodKernel(LikelihoodKernel lk) {
 	if (sse == lk) return;
-	if ((sse == LK_EIGEN || sse == LK_EIGEN_SSE) && (lk == LK_NORMAL || lk == LK_SSE)) {
-		// need to increase the memory usage when changing from new kernel to old kernel
-        if (params->lh_mem_save == LM_PER_NODE)
-            params->lh_mem_save = LM_ALL_BRANCH;
-		setLikelihoodKernel(lk);
-		deleteAllPartialLh();
-		initializeAllPartialLh();
-		clearAllPartialLH();
-	} else {
+//	if ((sse == LK_EIGEN || sse == LK_EIGEN_SSE) && (lk == LK_NORMAL || lk == LK_SSE)) {
+//		// need to increase the memory usage when changing from new kernel to old kernel
+//        if (params->lh_mem_save == LM_PER_NODE)
+//            params->lh_mem_save = LM_ALL_BRANCH;
+//		setLikelihoodKernel(lk);
+//		deleteAllPartialLh();
+//		initializeAllPartialLh();
+//		clearAllPartialLH();
+//	} else {
 		// otherwise simply assign variable sse
 		setLikelihoodKernel(lk);
-	}
+//	}
 }
 
 /*******************************************************
@@ -591,11 +591,11 @@ void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNo
     size_t nstates = aln->num_states;
     size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
 
+    if (!tip_partial_lh_computed)
+        computeTipPartialLikelihood();
+
 	if (node->isLeaf()) {
 	    dad_branch->lh_scale_factor = 0.0;
-
-		if (!tip_partial_lh_computed)
-			computeTipPartialLikelihood();
 		return;
 	}
     
@@ -787,7 +787,7 @@ void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNo
                             outWarning((string)"Numerical underflow for site " + convertIntToString(i+1));
                             x++;
                         }
-                } else {
+                } else if (ptn_invar[ptn] == 0.0) {
                     // now do the likelihood scaling
                     for (i = 0; i < block; i++) {
                         partial_lh[i] *= SCALING_THRESHOLD_INVER;
@@ -905,7 +905,7 @@ void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNo
 							outWarning((string)"Numerical underflow for site " + convertIntToString(i+1));
 							x++;
 						}
-            	} else {
+            	} else if (ptn_invar[ptn] == 0.0) {
 					// now do the likelihood scaling
 					for (i = 0; i < block; i++) {
 						partial_lh[i] *= SCALING_THRESHOLD_INVER;
@@ -986,7 +986,8 @@ void PhyloTree::computePartialLikelihoodEigen(PhyloNeighbor *dad_branch, PhyloNo
 							outWarning((string)"Numerical underflow for site " + convertIntToString(i+1));
 							x++;
 						}
-            	} else {
+            	} else if (ptn_invar[ptn] == 0.0) {
+                    // BQM 2016-05-03: only scale for non-constant sites
 					// now do the likelihood scaling
 					for (i = 0; i < block; i++) {
 						partial_lh[i] *= SCALING_THRESHOLD_INVER;
@@ -1221,19 +1222,19 @@ double PhyloTree::computeLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloN
 #endif
     	for (ptn = 0; ptn < nptn; ptn++) {
 			double lh_ptn = ptn_invar[ptn];
-			double *lh_cat = _pattern_lh_cat + ptn*ncat;
-			double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
-			int state_dad = (ptn < orig_nptn) ? (aln->at(ptn))[dad->id] : model_factory->unobserved_ptns[ptn-orig_nptn];
-			double *lh_node = partial_lh_node + state_dad*block;
-			for (c = 0; c < ncat; c++) {
-				for (i = 0; i < nstates; i++) {
-					*lh_cat += lh_node[i] * partial_lh_dad[i];
-				}
-				lh_node += nstates;
-				partial_lh_dad += nstates;
-				lh_ptn += *lh_cat;
-				lh_cat++;
-			}
+            double *lh_cat = _pattern_lh_cat + ptn*ncat;
+            double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
+            int state_dad = (ptn < orig_nptn) ? (aln->at(ptn))[dad->id] : model_factory->unobserved_ptns[ptn-orig_nptn];
+            double *lh_node = partial_lh_node + state_dad*block;
+            for (c = 0; c < ncat; c++) {
+                for (i = 0; i < nstates; i++) {
+                    *lh_cat += lh_node[i] * partial_lh_dad[i];
+                }
+                lh_node += nstates;
+                partial_lh_dad += nstates;
+                lh_ptn += *lh_cat;
+                lh_cat++;
+            }
 //			assert(lh_ptn > -1e-10);
 			if (ptn < orig_nptn) {
 				lh_ptn = log(fabs(lh_ptn));
@@ -1255,20 +1256,20 @@ double PhyloTree::computeLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloN
 #endif
     	for (ptn = 0; ptn < nptn; ptn++) {
 			double lh_ptn = ptn_invar[ptn];
-			double *lh_cat = _pattern_lh_cat + ptn*ncat;
-			double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
-			double *partial_lh_node = node_branch->partial_lh + ptn*block;
-			double *val_tmp = val;
-			for (c = 0; c < ncat; c++) {
-				for (i = 0; i < nstates; i++) {
-					*lh_cat +=  val_tmp[i] * partial_lh_node[i] * partial_lh_dad[i];
-				}
-				lh_ptn += *lh_cat;
-				partial_lh_node += nstates;
-				partial_lh_dad += nstates;
-				val_tmp += nstates;
-				lh_cat++;
-			}
+            double *lh_cat = _pattern_lh_cat + ptn*ncat;
+            double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
+            double *partial_lh_node = node_branch->partial_lh + ptn*block;
+            double *val_tmp = val;
+            for (c = 0; c < ncat; c++) {
+                for (i = 0; i < nstates; i++) {
+                    *lh_cat +=  val_tmp[i] * partial_lh_node[i] * partial_lh_dad[i];
+                }
+                lh_ptn += *lh_cat;
+                partial_lh_node += nstates;
+                partial_lh_dad += nstates;
+                val_tmp += nstates;
+                lh_cat++;
+            }
 
 //			assert(lh_ptn > 0.0);
             if (ptn < orig_nptn) {
@@ -1338,439 +1339,6 @@ double PhyloTree::computeLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloN
 }
 
 
-/************************************************************************************************
- *
- *   SSE vectorized functions of the Naive implementation
- *
- *************************************************************************************************/
-
-template<const int NSTATES>
-inline double PhyloTree::computeLikelihoodBranchSSE(PhyloNeighbor *dad_branch, PhyloNode *dad) {
-    PhyloNode *node = (PhyloNode*) dad_branch->node; // Node A
-    PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad); // Node B
-    assert(node_branch);
-    if (!central_partial_lh)
-        initializeAllPartialLh();
-    // swap node and dad if dad is a leaf
-    if (node->isLeaf()) {
-        PhyloNode *tmp_node = dad;
-        dad = node;
-        node = tmp_node;
-        PhyloNeighbor *tmp_nei = dad_branch;
-        dad_branch = node_branch;
-        node_branch = tmp_nei;
-    }
-    if ((dad_branch->partial_lh_computed & 1) == 0)
-        computePartialLikelihoodSSE<NSTATES>(dad_branch, dad);
-    if ((node_branch->partial_lh_computed & 1) == 0)
-        computePartialLikelihoodSSE<NSTATES>(node_branch, node);
-
-    // now combine likelihood at the branch
-    double tree_lh = node_branch->lh_scale_factor + dad_branch->lh_scale_factor;
-    int ptn, cat, state1, state2;
-    double *partial_lh_site;
-    double *partial_lh_child;
-    double *trans_state;
-    double p_invar = site_rate->getPInvar();
-    int numCat = site_rate->getNRate();
-    int numStates = model->num_states;
-    int tranSize = numStates * numStates;
-    int alnSize = aln->size() + model_factory->unobserved_ptns.size();
-    int orig_alnSize = aln->size();
-    int block = numStates * numCat;
-
-    double p_var_cat = (1.0 - p_invar) / (double) numCat;
-
-    EIGEN_ALIGN16 double *trans_mat_orig = new double[numCat * tranSize + 1];
-    double *trans_mat = trans_mat_orig;
-    if (((intptr_t) trans_mat) % 16 != 0)
-        trans_mat = trans_mat + 1;
-    EIGEN_ALIGN16 double state_freq[NSTATES];
-    model->getStateFrequency(state_freq);
-    for (cat = 0; cat < numCat; cat++) {
-        double *trans_cat = trans_mat + (cat * tranSize);
-        model_factory->computeTransMatrix(dad_branch->length * site_rate->getRate(cat), trans_cat);
-        for (state1 = 0; state1 < NSTATES; state1++) {
-            double *trans_mat_state = trans_cat + (state1 * NSTATES);
-            for (state2 = 0; state2 < NSTATES; state2++)
-                trans_mat_state[state2] *= state_freq[state1];
-        }
-    }
-
-    double prob_const = 0.0; // probability of unobserved const patterns
-
-#ifdef _OPENMP
-#pragma omp parallel for reduction(+: tree_lh, prob_const) private(ptn, cat) schedule(static)
-#endif
-    for (ptn = 0; ptn < alnSize; ++ptn) {
-        double lh_ptn = 0.0; // likelihood of the pattern
-        for (cat = 0; cat < numCat; cat++) {
-            partial_lh_site = node_branch->partial_lh + (ptn * block + cat * NSTATES);
-            partial_lh_child = dad_branch->partial_lh + (ptn * block + cat * NSTATES);
-            trans_state = trans_mat + cat * tranSize;
-            Map<Matrix<double, 1, NSTATES>, Aligned> eigen_partial_lh_child(&partial_lh_child[0]);
-            Map<Matrix<double, 1, NSTATES>, Aligned> eigen_partial_lh_site(&partial_lh_site[0]);
-            Map<Matrix<double, NSTATES, NSTATES>, Aligned> eigen_trans_state(&trans_state[0]);
-            lh_ptn += (eigen_partial_lh_child * eigen_trans_state).dot(eigen_partial_lh_site);
-        }
-        if (ptn < orig_alnSize) {
-			lh_ptn *= p_var_cat;
-			if ((*aln)[ptn].const_char == NSTATES)
-				lh_ptn += p_invar;
-			else if ((*aln)[ptn].const_char < NSTATES) {
-				lh_ptn += p_invar * state_freq[(int) (*aln)[ptn].const_char];
-			}
-			lh_ptn = log(lh_ptn);
-			tree_lh += lh_ptn * (aln->at(ptn).frequency);
-			_pattern_lh[ptn] = lh_ptn;
-			// BQM: pattern_lh contains the LOG-likelihood, not likelihood
-        } else {
-			lh_ptn = lh_ptn*p_var_cat + p_invar*state_freq[(int)model_factory->unobserved_ptns[ptn-orig_alnSize]];
-			prob_const += lh_ptn;
-
-        }
-    }
-    if (orig_alnSize < alnSize) {
-    	// ascertainment bias correction
-    	prob_const = log(1.0 - prob_const);
-    	for (ptn = 0; ptn < orig_alnSize; ptn++)
-    		_pattern_lh[ptn] -= prob_const;
-    	tree_lh -= aln->getNSite()*prob_const;
-    }
-
-    delete[] trans_mat_orig;
-    return tree_lh;
-}
-
-template<int NSTATES>
-void PhyloTree::computePartialLikelihoodSSE(PhyloNeighbor *dad_branch, PhyloNode *dad) {
-    // don't recompute the likelihood
-    if (dad_branch->partial_lh_computed & 1)
-        return;
-    Node *node = dad_branch->node;
-    int ptn, cat;
-    //double *trans_state;
-    double *partial_lh_site;
-    double *partial_lh_child;
-    dad_branch->lh_scale_factor = 0.0;
-
-    int numCat = site_rate->getNRate();
-    int numStates = model->num_states;
-    int tranSize = numStates * numStates;
-    int alnSize = aln->size() + model_factory->unobserved_ptns.size();
-    int orig_alnSize = aln->size();
-    int block = numStates * numCat;
-    size_t lh_size = alnSize * block;
-    memset(dad_branch->scale_num, 0, alnSize * sizeof(UBYTE));
-
-    if (node->isLeaf() && dad) {
-        // external node
-        memset(dad_branch->partial_lh, 0, lh_size * sizeof(double));
-        for (ptn = 0; ptn < alnSize; ++ptn) {
-            char state;
-            partial_lh_site = dad_branch->partial_lh + (ptn * block);
-
-            if (node->name == ROOT_NAME) {
-                state = aln->STATE_UNKNOWN;
-            } else if (ptn < orig_alnSize){
-                state = (aln->at(ptn))[node->id];
-            } else {
-            	state = model_factory->unobserved_ptns[ptn-orig_alnSize];
-            }
-
-            if (state == aln->STATE_UNKNOWN) {
-#ifndef KEEP_GAP_LH
-                dad_branch->scale_num[ptn] = -1;
-#endif
-                for (int state2 = 0; state2 < block; state2++) {
-                    partial_lh_site[state2] = 1.0;
-                }
-            } else if (state < NSTATES) {
-                double *_par_lh_site = partial_lh_site + state;
-                for (cat = 0; cat < numCat; cat++) {
-                    *_par_lh_site = 1.0;
-                    _par_lh_site += NSTATES;
-                }
-            } else if (aln->seq_type == SEQ_DNA) {
-                // ambiguous character, for DNA, RNA
-                state = state - (NSTATES - 1);
-                for (int state2 = 0; state2 < NSTATES; state2++)
-                    if (state & (1 << state2)) {
-                        for (cat = 0; cat < numCat; cat++)
-                            partial_lh_site[cat * NSTATES + state2] = 1.0;
-                    }
-            } else if (aln->seq_type == SEQ_PROTEIN) {
-                // ambiguous character, for DNA, RNA
-                state = state - (NSTATES);
-                assert(state < 3);
-                int state_map[] = {4+8,32+64,512+1024};
-                for (int state2 = 0; state2 < 11; state2++)
-                    if (state_map[(int)state] & (1 << state2)) {
-                        for (cat = 0; cat < numCat; cat++)
-                            partial_lh_site[cat * NSTATES + state2] = 1.0;
-                    }
-            } else {
-            	outError("Internal error ", __func__);
-            }
-        }
-    } else {
-        // internal node
-        EIGEN_ALIGN16 double *trans_mat_orig = new double[numCat * tranSize + 2];
-        double *trans_mat = trans_mat_orig;
-        if (((intptr_t) trans_mat) % 16 != 0)
-            trans_mat = trans_mat + 1;
-        for (ptn = 0; ptn < lh_size; ++ptn)
-            dad_branch->partial_lh[ptn] = 1.0;
-#ifndef KEEP_GAP_LH
-        for (ptn = 0; ptn < alnSize; ptn++)
-            dad_branch->scale_num[ptn] = -1;
-#endif
-        FOR_NEIGHBOR_IT(node, dad, it)if ((*it)->node->name != ROOT_NAME) {
-            computePartialLikelihoodSSE<NSTATES > ((PhyloNeighbor*) (*it), (PhyloNode*) node);
-            dad_branch->lh_scale_factor += ((PhyloNeighbor*) (*it))->lh_scale_factor;
-            for (cat = 0; cat < numCat; cat++) {
-                model_factory->computeTransMatrix((*it)->length * site_rate->getRate(cat), &trans_mat[cat * tranSize]);
-            }
-            partial_lh_site = dad_branch->partial_lh;
-            partial_lh_child = ((PhyloNeighbor*) (*it))->partial_lh;
-            double sum_scale = 0.0;
-#ifdef _OPENMP
-#pragma omp parallel for reduction(+: sum_scale) private(ptn, cat, partial_lh_site, partial_lh_child) schedule(static)
-#endif
-            for (ptn = 0; ptn < alnSize; ++ptn)
-#ifndef KEEP_GAP_LH
-            if (((PhyloNeighbor*) (*it))->scale_num[ptn] < 0) {
-#ifndef _OPENMP
-                partial_lh_site += NSTATES * numCat;
-                partial_lh_child += NSTATES * numCat;
-#endif
-            } else
-#endif
-            {
-#ifndef KEEP_GAP_LH
-                if (dad_branch->scale_num[ptn] < 0)
-                dad_branch->scale_num[ptn] = 0;
-#endif
-#ifdef _OPENMP
-                int lh_offset = ptn*block;
-                partial_lh_site = dad_branch->partial_lh + lh_offset;
-                partial_lh_child = ((PhyloNeighbor*) (*it))->partial_lh + lh_offset;
-#endif
-                dad_branch->scale_num[ptn] += ((PhyloNeighbor*) (*it))->scale_num[ptn];
-                double *partial_lh_block = partial_lh_site;
-                double *trans_state = trans_mat;
-                bool do_scale = true;
-                for (cat = 0; cat < numCat; cat++)
-                {
-                    MappedRowVec(NSTATES) ei_partial_lh_child(partial_lh_child);
-                    MappedRowVec(NSTATES) ei_partial_lh_site(partial_lh_site);
-                    MappedMat(NSTATES) ei_trans_state(trans_state);
-                    ei_partial_lh_site.array() *= (ei_partial_lh_child * ei_trans_state).array();
-                    partial_lh_site += NSTATES;
-                    partial_lh_child += NSTATES;
-                    trans_state += tranSize;
-                }
-                for (cat = 0; cat < block; cat++)
-                if (partial_lh_block[cat] > SCALING_THRESHOLD) {
-                    do_scale = false;
-                    break;
-                }
-                if (do_scale) {
-                    // unobserved const pattern will never have underflow
-                    Map<VectorXd, Aligned> ei_lh_block(partial_lh_block, block);
-                    ei_lh_block *= SCALING_THRESHOLD_INVER;
-                    sum_scale += LOG_SCALING_THRESHOLD *  (*aln)[ptn].frequency;
-                    dad_branch->scale_num[ptn] += 1;
-                }
-            }
-            dad_branch->lh_scale_factor += sum_scale;
-        }
-        delete[] trans_mat_orig;
-    }
-
-    dad_branch->partial_lh_computed |= 1;
-}
-
-/****************************************************************************
- computing derivatives of likelihood function
- ****************************************************************************/
-template<int NSTATES>
-inline void PhyloTree::computeLikelihoodDervSSE(PhyloNeighbor *dad_branch, PhyloNode *dad, double &df, double &ddf) {
-    PhyloNode *node = (PhyloNode*) dad_branch->node;
-    PhyloNeighbor *node_branch = (PhyloNeighbor*) node->findNeighbor(dad);
-    //assert(node_branch);
-    // swap node and dad if node is a leaf
-    if (node->isLeaf()) {
-        PhyloNode *tmp_node = dad;
-        dad = node;
-        node = tmp_node;
-        PhyloNeighbor *tmp_nei = dad_branch;
-        dad_branch = node_branch;
-        node_branch = tmp_nei;
-    }
-    if ((dad_branch->partial_lh_computed & 1) == 0)
-        computePartialLikelihoodSSE<NSTATES>(dad_branch, dad);
-    if ((node_branch->partial_lh_computed & 1) == 0)
-        computePartialLikelihoodSSE<NSTATES>(node_branch, node);
-    df = ddf = 0.0;
-    int cat = 0;
-    double *partial_lh_site = node_branch->partial_lh;
-    double *partial_lh_child = dad_branch->partial_lh;
-    double lh_ptn; // likelihood of the pattern
-    double lh_ptn_derv1;
-    double lh_ptn_derv2;
-    double derv1_frac;
-    double derv2_frac;
-    double *trans_state;
-    double *derv1_state;
-    double *derv2_state;
-    double p_invar = site_rate->getPInvar();
-
-    int numCat = site_rate->getNRate();
-    int numStates = model->num_states;
-    int tranSize = numStates * numStates;
-    int alnSize = aln->size() + model_factory->unobserved_ptns.size();
-    int orig_alnSize = aln->size();
-
-    double p_var_cat = (1.0 - p_invar) / (double) numCat;
-    double state_freq[NSTATES];
-    model->getStateFrequency(state_freq);
-    double *trans_mat_orig  = new double[numCat * tranSize + 1];
-    double *trans_derv1_orig  = new double[numCat * tranSize + 1];
-    double *trans_derv2_orig  = new double[numCat * tranSize + 1];
-    // make alignment 16
-    double *trans_mat = trans_mat_orig, *trans_derv1 = trans_derv1_orig, *trans_derv2 = trans_derv2_orig;
-    if (((intptr_t) trans_mat) % 16 != 0)
-        trans_mat = trans_mat + 1;
-    if (((intptr_t) trans_derv1) % 16 != 0)
-        trans_derv1 = trans_derv1 + 1;
-    if (((intptr_t) trans_derv2) % 16 != 0)
-        trans_derv2 = trans_derv2 + 1;
-
-    int discrete_cat = site_rate->getNDiscreteRate();
-    if (!site_rate->isSiteSpecificRate())
-        for (cat = 0; cat < discrete_cat; cat++) {
-            double *trans_cat = trans_mat + (cat * tranSize);
-            double *derv1_cat = trans_derv1 + (cat * tranSize);
-            double *derv2_cat = trans_derv2 + (cat * tranSize);
-            double rate_val = site_rate->getRate(cat);
-            model_factory->computeTransDervFreq(dad_branch->length, rate_val, state_freq, trans_cat, derv1_cat,
-                    derv2_cat);
-        }
-    int dad_state = aln->STATE_UNKNOWN;
-    double my_df = 0.0;
-    double my_ddf = 0.0;
-    double prob_const = 0.0, prob_const_derv1 = 0.0, prob_const_derv2 = 0.0;
-
-#ifdef _OPENMP
-#pragma omp parallel for reduction(+: my_df, my_ddf,prob_const, prob_const_derv1, prob_const_derv2) \
-	private(cat, partial_lh_child, partial_lh_site,\
-	lh_ptn, lh_ptn_derv1, lh_ptn_derv2, derv1_frac, derv2_frac, dad_state, trans_state, derv1_state, derv2_state) schedule(static)
-#endif
-    for (int ptn = 0; ptn < alnSize; ++ptn) {
-#ifdef _OPENMP
-        int lh_offset = ptn*numCat*numStates;
-        partial_lh_site = node_branch->partial_lh + lh_offset;
-        partial_lh_child = dad_branch->partial_lh + lh_offset;
-#endif
-        lh_ptn = 0.0;
-        lh_ptn_derv1 = 0.0;
-        lh_ptn_derv2 = 0.0;
-        int padding = 0;
-        dad_state = aln->STATE_UNKNOWN; // FOR TUNG: This is missing in your codes!
-        if (dad->isLeaf()) {
-        	if (ptn < orig_alnSize)
-        		dad_state = (*aln)[ptn][dad->id];
-        	else
-        		dad_state = model_factory->unobserved_ptns[ptn-orig_alnSize];
-        }
-        padding = dad_state * NSTATES;
-        if (dad_state < NSTATES) {
-            //external node
-            trans_state = trans_mat + padding;
-            derv1_state = trans_derv1 + padding;
-            derv2_state = trans_derv2 + padding;
-            for (cat = 0; cat < numCat; cat++) {
-                MappedVec(NSTATES)ei_partial_lh_child(partial_lh_child);
-                MappedVec(NSTATES) ei_trans_state(trans_state);
-                MappedVec(NSTATES) ei_derv1_state(derv1_state);
-                MappedVec(NSTATES) ei_derv2_state(derv2_state);
-                lh_ptn += ei_partial_lh_child.dot(ei_trans_state);
-                lh_ptn_derv1 += ei_partial_lh_child.dot(ei_derv1_state);
-                lh_ptn_derv2 += ei_partial_lh_child.dot(ei_derv2_state);
-                partial_lh_child += NSTATES;
-                partial_lh_site += NSTATES;
-                trans_state += tranSize;
-                derv1_state += tranSize;
-                derv2_state += tranSize;
-            }
-        } else {
-            // internal node, or external node but ambiguous character
-            trans_state = trans_mat;
-            derv1_state = trans_derv1;
-            derv2_state = trans_derv2;
-            for (cat = 0; cat < numCat; cat++) {
-                MappedRowVec(NSTATES) ei_partial_lh_site(partial_lh_site);
-                MappedRowVec(NSTATES) ei_partial_lh_child(partial_lh_child);
-                MappedMat(NSTATES) ei_trans_state(trans_state);
-                MappedMat(NSTATES) ei_derv1_state(derv1_state);
-                MappedMat(NSTATES) ei_derv2_state(derv2_state);
-                lh_ptn += (ei_partial_lh_child * ei_trans_state).dot(ei_partial_lh_site);
-                lh_ptn_derv1 += (ei_partial_lh_child * ei_derv1_state).dot(ei_partial_lh_site);
-                lh_ptn_derv2 += (ei_partial_lh_child * ei_derv2_state).dot(ei_partial_lh_site);
-                partial_lh_site += NSTATES;
-                partial_lh_child += NSTATES;
-                trans_state += tranSize;
-                derv1_state += tranSize;
-                derv2_state += tranSize;
-            }
-        }
-        if (ptn < orig_alnSize) {
-			lh_ptn = lh_ptn * p_var_cat;
-			if ((*aln)[ptn].const_char == NSTATES)
-				lh_ptn += p_invar;
-			else if ((*aln)[ptn].const_char < NSTATES) {
-				lh_ptn += p_invar * state_freq[(int) (*aln)[ptn].const_char];
-			}
-			double pad = p_var_cat / lh_ptn;
-			if (std::isinf(pad)) {
-				lh_ptn_derv1 *= p_var_cat;
-				lh_ptn_derv2 *= p_var_cat;
-				derv1_frac = lh_ptn_derv1 / lh_ptn;
-				derv2_frac = lh_ptn_derv2 / lh_ptn;
-			} else {
-				derv1_frac = lh_ptn_derv1 * pad;
-				derv2_frac = lh_ptn_derv2 * pad;
-			}
-	        double freq = aln->at(ptn).frequency;
-			double tmp1 = derv1_frac * freq;
-			double tmp2 = derv2_frac * freq;
-			my_df += tmp1;
-			my_ddf += tmp2 - tmp1 * derv1_frac;
-        } else {
-        	lh_ptn = lh_ptn*p_var_cat + p_invar*state_freq[(int)model_factory->unobserved_ptns[ptn-orig_alnSize]];
-        	prob_const += lh_ptn;
-        	prob_const_derv1 += lh_ptn_derv1 * p_var_cat;
-        	prob_const_derv2 += lh_ptn_derv2 * p_var_cat;
-        }
-    }
-    if (orig_alnSize < alnSize) {
-    	// ascertainment bias correction
-    	prob_const = 1.0 - prob_const;
-    	derv1_frac = prob_const_derv1 / prob_const;
-    	derv2_frac = prob_const_derv2 / prob_const;
-    	int nsites = aln->getNSite();
-    	my_df += nsites * derv1_frac;
-    	my_ddf += nsites *(derv2_frac + derv1_frac*derv1_frac);
-    }
-
-    delete[] trans_derv2_orig;
-    delete[] trans_derv1_orig;
-    delete[] trans_mat_orig;
-    df = my_df;
-    ddf = my_ddf;
-}
-
 
 /************************************************************************************************
  *
@@ -1789,11 +1357,11 @@ void PhyloTree::computeMixratePartialLikelihoodEigen(PhyloNeighbor *dad_branch,
     size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
     PhyloNode *node = (PhyloNode*)(dad_branch->node);
 
+    if (!tip_partial_lh_computed)
+        computeTipPartialLikelihood();
+
 	if (node->isLeaf()) {
 	    dad_branch->lh_scale_factor = 0.0;
-
-		if (!tip_partial_lh_computed)
-			computeTipPartialLikelihood();
 		return;
 	}
 
@@ -1814,21 +1382,15 @@ void PhyloTree::computeMixratePartialLikelihoodEigen(PhyloNeighbor *dad_branch,
     dad_branch->lh_scale_factor = 0.0;
 
 	// internal node
-	assert(node->degree() == 3); // it works only for strictly bifurcating tree
+//	assert(node->degree() == 3); // it works only for strictly bifurcating tree
 	PhyloNeighbor *left = NULL, *right = NULL; // left & right are two neighbors leading to 2 subtrees
 	FOR_NEIGHBOR_IT(node, dad, it) {
+        PhyloNeighbor *nei = (PhyloNeighbor*)*it;
 		if (!left) left = (PhyloNeighbor*)(*it); else right = (PhyloNeighbor*)(*it);
+        if ((nei->partial_lh_computed & 1) == 0)
+            computePartialLikelihood(nei, node);
+        dad_branch->lh_scale_factor += nei->lh_scale_factor;
 	}
-
-	if (!left->node->isLeaf() && right->node->isLeaf()) {
-		PhyloNeighbor *tmp = left;
-		left = right;
-		right = tmp;
-	}
-	if ((left->partial_lh_computed & 1) == 0)
-		computeMixratePartialLikelihoodEigen(left, node);
-	if ((right->partial_lh_computed & 1) == 0)
-		computeMixratePartialLikelihoodEigen(right, node);
         
     if (params->lh_mem_save == LM_PER_NODE && !dad_branch->partial_lh) {
         // re-orient partial_lh
@@ -1848,66 +1410,186 @@ void PhyloTree::computeMixratePartialLikelihoodEigen(PhyloNeighbor *dad_branch,
         assert(done && "partial_lh is not re-oriented");
     }        
         
-	dad_branch->lh_scale_factor = left->lh_scale_factor + right->lh_scale_factor;
-	double *eleft = new double[block*nstates], *eright = new double[block*nstates];
+    // precompute buffer to save times
+    double *echildren = new double[block*nstates*(node->degree()-1)];
+    double *partial_lh_leaves = new double[(aln->STATE_UNKNOWN+1)*block*(node->degree()-1)];
+    double *echild = echildren;
+    double *partial_lh_leaf = partial_lh_leaves;
 
-	// precompute information buffer
-	for (c = 0; c < ncat; c++) {
-		double *expleft = new double[nstates];
-		double *expright = new double[nstates];
-		double len_left = site_rate->getRate(c) * left->length;
-		double len_right = site_rate->getRate(c) * right->length;
-		for (i = 0; i < nstates; i++) {
-			expleft[i] = exp(eval[c*nstates+i]*len_left);
-			expright[i] = exp(eval[c*nstates+i]*len_right);
-		}
-		for (x = 0; x < nstates; x++)
-			for (i = 0; i < nstates; i++) {
-				eleft[c*nstatesqr+x*nstates+i] = evec[c*nstatesqr+x*nstates+i] * expleft[i];
-				eright[c*nstatesqr+x*nstates+i] = evec[c*nstatesqr+x*nstates+i] * expright[i];
-			}
-		delete [] expright;
-		delete [] expleft;
+    FOR_NEIGHBOR_IT(node, dad, it) {
+        double expchild[nstates];
+        PhyloNeighbor *child = (PhyloNeighbor*)*it;
+        // precompute information buffer
+        for (c = 0; c < ncat; c++) {
+            double len_child = site_rate->getRate(c) * child->length;
+            for (i = 0; i < nstates; i++) {
+                expchild[i] = exp(eval[c*nstates+i]*len_child);
+            }
+            for (x = 0; x < nstates; x++)
+                for (i = 0; i < nstates; i++) {
+                    echild[c*nstatesqr+x*nstates+i] = evec[c*nstatesqr+x*nstates+i] * expchild[i];
+                }
+        }
+        // pre compute information for tip
+        if (child->node->isLeaf()) {
+            vector<int>::iterator it;
+            for (it = aln->seq_states[child->node->id].begin(); it != aln->seq_states[child->node->id].end(); it++) {
+                int state = (*it);
+                for (c = 0; c < ncat; c++)
+                for (x = 0; x < nstates; x++) {
+                    double vchild = 0.0;
+                    for (i = 0; i < nstates; i++) {
+                        vchild += echild[c*nstatesqr+x*nstates+i] * tip_partial_lh[state*block+c*nstates+i];
+                    }
+                    partial_lh_leaf[state*block+c*nstates+x] = vchild;
+                }
+            }
+            size_t addr = aln->STATE_UNKNOWN * block;
+            for (x = 0; x < block; x++) {
+                partial_lh_leaf[addr+x] = 1.0;
+            }
+            partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
+        }
+        echild += block*nstates;
+    }
+
+    double *eleft = echildren, *eright = echildren + block*nstates;
+    
+	if (!left->node->isLeaf() && right->node->isLeaf()) {
+		PhyloNeighbor *tmp = left;
+		left = right;
+		right = tmp;
+        double *etmp = eleft;
+        eleft = eright;
+        eright = etmp;
 	}
+    
+    if (node->degree() > 3) {
+        /*--------------------- multifurcating node ------------------*/
+        double sum_scale = 0.0;
+    
+        // now for-loop computing partial_lh over all site-patterns
+#ifdef _OPENMP
+#pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i) schedule(static)
+#endif
+        for (ptn = 0; ptn < nptn; ptn++) {
+            double partial_lh_all[block];
+            for (i = 0; i < block; i++)
+                partial_lh_all[i] = 1.0;
+            dad_branch->scale_num[ptn] = 0;
+                
+            double *partial_lh_leaf = partial_lh_leaves;
+            double *echild = echildren;
 
-	if (left->node->isLeaf() && right->node->isLeaf()) {
-		// special treatment for TIP-TIP (cherry) case
+            FOR_NEIGHBOR_IT(node, dad, it) {
+                PhyloNeighbor *child = (PhyloNeighbor*)*it;
+                if (child->node->isLeaf()) {
+                    // external node
+                    int state_child = (ptn < orig_ntn) ? (aln->at(ptn))[child->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn];
+                    double *child_lh = partial_lh_leaf + state_child*block;
+                    for (c = 0; c < block; c++) {
+                        partial_lh_all[c] *= child_lh[c];
+                    }
+                    partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
+                } else {
+                    // internal node
+                    double *partial_lh = partial_lh_all;
+                    double *partial_lh_child = child->partial_lh + ptn*block;
+                    dad_branch->scale_num[ptn] += child->scale_num[ptn];
 
-		// pre compute information for both tips
-		double *partial_lh_left = new double[(aln->STATE_UNKNOWN+1)*block];
-		double *partial_lh_right = new double[(aln->STATE_UNKNOWN+1)*block];
-
-		vector<int>::iterator it;
-		for (it = aln->seq_states[left->node->id].begin(); it != aln->seq_states[left->node->id].end(); it++) {
-			int state = (*it);
-			for (c = 0; c < ncat; c++)
-			for (x = 0; x < nstates; x++) {
-				double vleft = 0.0;
+                    double *echild_ptr = echild;
+                    for (c = 0; c < ncat; c++) {
+                        // compute real partial likelihood vector
+                        for (x = 0; x < nstates; x++) {
+                            double vchild = 0.0;
+                            for (i = 0; i < nstates; i++) {
+                                vchild += echild_ptr[i] * partial_lh_child[i];
+                            }
+                            echild_ptr += nstates;
+                            partial_lh[x] *= vchild;
+                        }
+                        partial_lh += nstates;
+                        partial_lh_child += nstates;
+                    }
+                } // if
+                echild += block*nstates;
+            } // FOR_NEIGHBOR
+
+
+            // compute dot-product with inv_eigenvector
+            double lh_max = 0.0;
+            double *partial_lh_tmp = partial_lh_all;
+            double *partial_lh = dad_branch->partial_lh + ptn*block;
+            double *inv_evec_ptr = inv_evec;
+			for (c = 0; c < ncat; c++) {
+				// compute dot-product with inv_eigenvector
 				for (i = 0; i < nstates; i++) {
-					vleft += eleft[c*nstatesqr+x*nstates+i] * tip_partial_lh[state*block+c*nstates+i];
+					double res = 0.0;
+					for (x = 0; x < nstates; x++) {
+						res += partial_lh_tmp[x]*inv_evec_ptr[x];
+					}
+                    inv_evec_ptr += nstates;
+					partial_lh[i] = res;
+                    lh_max = max(fabs(res), lh_max);
 				}
-				partial_lh_left[state*block+c*nstates+x] = vleft;
+                partial_lh += nstates;
+                partial_lh_tmp += nstates;
 			}
-		}
 
-		for (it = aln->seq_states[right->node->id].begin(); it != aln->seq_states[right->node->id].end(); it++) {
-			int state = (*it);
-			for (c = 0; c < ncat; c++)
-			for (x = 0; x < nstates; x++) {
-				double vright = 0.0;
-				for (i = 0; i < nstates; i++) {
-					vright += eright[c*nstatesqr+x*nstates+i] * tip_partial_lh[state*block+c*nstates+i];
+            if (lh_max < SCALING_THRESHOLD) {
+				// now do the likelihood scaling
+                partial_lh = dad_branch->partial_lh + ptn*block;
+				for (i = 0; i < block; i++) {
+					partial_lh[i] *= SCALING_THRESHOLD_INVER;
 				}
-				partial_lh_right[state*block+c*nstates+x] = vright;
-			}
-		}
+				// unobserved const pattern will never have underflow
+				sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn];
+				dad_branch->scale_num[ptn] += 1;
+            }
+            
+        } // for ptn
+        dad_branch->lh_scale_factor += sum_scale;
+                
+        // end multifurcating treatment
 
-		for (x = 0; x < block; x++) {
-			size_t addr = aln->STATE_UNKNOWN * block;
-			partial_lh_left[addr+x] = 1.0;
-			partial_lh_right[addr+x] = 1.0;
-		}
+	} else if (left->node->isLeaf() && right->node->isLeaf()) {
+		// special treatment for TIP-TIP (cherry) case
 
+		// pre compute information for both tips
+		double *partial_lh_left = partial_lh_leaves;
+		double *partial_lh_right = partial_lh_leaves + (aln->STATE_UNKNOWN+1)*block;
+
+//		vector<int>::iterator it;
+//		for (it = aln->seq_states[left->node->id].begin(); it != aln->seq_states[left->node->id].end(); it++) {
+//			int state = (*it);
+//			for (c = 0; c < ncat; c++)
+//			for (x = 0; x < nstates; x++) {
+//				double vleft = 0.0;
+//				for (i = 0; i < nstates; i++) {
+//					vleft += eleft[c*nstatesqr+x*nstates+i] * tip_partial_lh[state*block+c*nstates+i];
+//				}
+//				partial_lh_left[state*block+c*nstates+x] = vleft;
+//			}
+//		}
+//
+//		for (it = aln->seq_states[right->node->id].begin(); it != aln->seq_states[right->node->id].end(); it++) {
+//			int state = (*it);
+//			for (c = 0; c < ncat; c++)
+//			for (x = 0; x < nstates; x++) {
+//				double vright = 0.0;
+//				for (i = 0; i < nstates; i++) {
+//					vright += eright[c*nstatesqr+x*nstates+i] * tip_partial_lh[state*block+c*nstates+i];
+//				}
+//				partial_lh_right[state*block+c*nstates+x] = vright;
+//			}
+//		}
+//
+//		for (x = 0; x < block; x++) {
+//			size_t addr = aln->STATE_UNKNOWN * block;
+//			partial_lh_left[addr+x] = 1.0;
+//			partial_lh_right[addr+x] = 1.0;
+//		}
+//
 
 		// scale number must be ZERO
 	    memset(dad_branch->scale_num, 0, nptn * sizeof(UBYTE));
@@ -1938,33 +1620,33 @@ void PhyloTree::computeMixratePartialLikelihoodEigen(PhyloNeighbor *dad_branch,
 				}
 			}
 		}
-		delete [] partial_lh_right;
-		delete [] partial_lh_left;
+//		delete [] partial_lh_right;
+//		delete [] partial_lh_left;
 	} else if (left->node->isLeaf() && !right->node->isLeaf()) {
 		// special treatment to TIP-INTERNAL NODE case
 		// only take scale_num from the right subtree
 		memcpy(dad_branch->scale_num, right->scale_num, nptn * sizeof(UBYTE));
 
 		// pre compute information for left tip
-		double *partial_lh_left = new double[(aln->STATE_UNKNOWN+1)*block];
-
-		vector<int>::iterator it;
-		for (it = aln->seq_states[left->node->id].begin(); it != aln->seq_states[left->node->id].end(); it++) {
-			int state = (*it);
-			for (c = 0; c < ncat; c++)
-			for (x = 0; x < nstates; x++) {
-				double vleft = 0.0;
-				for (i = 0; i < nstates; i++) {
-					vleft += eleft[c*nstatesqr+x*nstates+i] * tip_partial_lh[state*block+c*nstates+i];
-				}
-				partial_lh_left[state*block+c*nstates+x] = vleft;
-			}
-		}
-		for (x = 0; x < block; x++) {
-			size_t addr = aln->STATE_UNKNOWN * block;
-			partial_lh_left[addr+x] = 1.0;
-		}
-
+		double *partial_lh_left = partial_lh_leaves;
+
+//		vector<int>::iterator it;
+//		for (it = aln->seq_states[left->node->id].begin(); it != aln->seq_states[left->node->id].end(); it++) {
+//			int state = (*it);
+//			for (c = 0; c < ncat; c++)
+//			for (x = 0; x < nstates; x++) {
+//				double vleft = 0.0;
+//				for (i = 0; i < nstates; i++) {
+//					vleft += eleft[c*nstatesqr+x*nstates+i] * tip_partial_lh[state*block+c*nstates+i];
+//				}
+//				partial_lh_left[state*block+c*nstates+x] = vleft;
+//			}
+//		}
+//		for (x = 0; x < block; x++) {
+//			size_t addr = aln->STATE_UNKNOWN * block;
+//			partial_lh_left[addr+x] = 1.0;
+//		}
+//
 
 		double sum_scale = 0.0;
 #ifdef _OPENMP
@@ -2012,7 +1694,7 @@ void PhyloTree::computeMixratePartialLikelihoodEigen(PhyloNeighbor *dad_branch,
 
 		}
 		dad_branch->lh_scale_factor += sum_scale;
-		delete [] partial_lh_left;
+//		delete [] partial_lh_left;
 
 	} else {
 		// both left and right are internal node
@@ -2066,8 +1748,8 @@ void PhyloTree::computeMixratePartialLikelihoodEigen(PhyloNeighbor *dad_branch,
 
 	}
 
-	delete [] eright;
-	delete [] eleft;
+    delete [] partial_lh_leaves;
+    delete [] echildren;
 }
 
 //template <const int nstates>
@@ -2260,14 +1942,6 @@ double PhyloTree::computeMixrateLikelihoodBranchEigen(PhyloNeighbor *dad_branch,
     		double *lh_tip = tip_partial_lh + (*it)*block;
     		for (i = 0; i < block; i++)
     			lh_node[i] = val[i]*lh_tip[i];
-//    		double *val_tmp = val;
-//			for (c = 0; c < ncat; c++) {
-//				for (i = 0; i < nstates; i++) {
-//					  lh_node[i] = val_tmp[i] * lh_tip[i];
-//				}
-//				lh_node += nstates;
-//				val_tmp += nstates;
-//			}
     	}
 
     	// now do the real computation
@@ -2373,11 +2047,11 @@ void PhyloTree::computeMixturePartialLikelihoodEigen(PhyloNeighbor *dad_branch,
     size_t nptn = aln->size()+model_factory->unobserved_ptns.size();
     PhyloNode *node = (PhyloNode*)(dad_branch->node);
 
+    if (!tip_partial_lh_computed)
+        computeTipPartialLikelihood();
+
 	if (node->isLeaf()) {
 	    dad_branch->lh_scale_factor = 0.0;
-
-		if (!tip_partial_lh_computed)
-			computeTipPartialLikelihood();
 		return;
 	}
 
@@ -2398,22 +2072,15 @@ void PhyloTree::computeMixturePartialLikelihoodEigen(PhyloNeighbor *dad_branch,
     dad_branch->lh_scale_factor = 0.0;
 
 	// internal node
-	assert(node->degree() == 3); // it works only for strictly bifurcating tree
 	PhyloNeighbor *left = NULL, *right = NULL; // left & right are two neighbors leading to 2 subtrees
 	FOR_NEIGHBOR_IT(node, dad, it) {
+        PhyloNeighbor *nei = (PhyloNeighbor*)*it;
 		if (!left) left = (PhyloNeighbor*)(*it); else right = (PhyloNeighbor*)(*it);
+        if ((nei->partial_lh_computed & 1) == 0)
+            computePartialLikelihood(nei, node);
+        dad_branch->lh_scale_factor += nei->lh_scale_factor;
 	}
 
-	if (!left->node->isLeaf() && right->node->isLeaf()) {
-		PhyloNeighbor *tmp = left;
-		left = right;
-		right = tmp;
-	}
-	if ((left->partial_lh_computed & 1) == 0)
-		computeMixturePartialLikelihoodEigen(left, node);
-	if ((right->partial_lh_computed & 1) == 0)
-		computeMixturePartialLikelihoodEigen(right, node);
-        
     if (params->lh_mem_save == LM_PER_NODE && !dad_branch->partial_lh) {
         // re-orient partial_lh
         bool done = false;
@@ -2433,92 +2100,166 @@ void PhyloTree::computeMixturePartialLikelihoodEigen(PhyloNeighbor *dad_branch,
     }
 
         
-	dad_branch->lh_scale_factor = left->lh_scale_factor + right->lh_scale_factor;
-//	double partial_lh_tmp[nstates];
-	double *eleft = new double[block*nstates], *eright = new double[block*nstates];
+    double *echildren = new double[block*nstates*(node->degree()-1)];
+    double *partial_lh_leaves = new double[(aln->STATE_UNKNOWN+1)*block*(node->degree()-1)];
+    double *echild = echildren;
+    double *partial_lh_leaf = partial_lh_leaves;
 
-	// precompute information buffer
-	for (c = 0; c < ncat; c++) {
-		double *expleft = new double[nstates];
-		double *expright = new double[nstates];
-		double len_left = site_rate->getRate(c) * left->length;
-		double len_right = site_rate->getRate(c) * right->length;
-		for (m = 0; m < nmixture; m++) {
-			for (i = 0; i < nstates; i++) {
-				expleft[i] = exp(eval[m*nstates+i]*len_left);
-				expright[i] = exp(eval[m*nstates+i]*len_right);
-			}
-			for (x = 0; x < nstates; x++)
-				for (i = 0; i < nstates; i++) {
-					eleft[(m*ncat+c)*nstatesqr+x*nstates+i] = evec[m*nstatesqr+x*nstates+i] * expleft[i];
-					eright[(m*ncat+c)*nstatesqr+x*nstates+i] = evec[m*nstatesqr+x*nstates+i] * expright[i];
-				}
-		}
-		delete [] expright;
-		delete [] expleft;
+    FOR_NEIGHBOR_IT(node, dad, it) {
+        // precompute information buffer
+        double expchild[nstates];
+        PhyloNeighbor *child = (PhyloNeighbor*)*it;
+        for (c = 0; c < ncat; c++) {
+            double len_child = site_rate->getRate(c) * child->length;
+            for (m = 0; m < nmixture; m++) {
+                for (i = 0; i < nstates; i++) {
+                    expchild[i] = exp(eval[m*nstates+i]*len_child);
+                }
+                for (x = 0; x < nstates; x++)
+                    for (i = 0; i < nstates; i++) {
+                        echild[(m*ncat+c)*nstatesqr+x*nstates+i] = evec[m*nstatesqr+x*nstates+i] * expchild[i];
+                    }
+            }
+        }
+        if (child->node->isLeaf()) {
+            vector<int>::iterator it;
+            for (it = aln->seq_states[child->node->id].begin(); it != aln->seq_states[child->node->id].end(); it++) {
+                int state = (*it);
+                for (m = 0; m < nmixture; m++) {
+                    double *this_echild = &echild[m*nstatesqr*ncat];
+                    double *this_tip_partial_lh = &tip_partial_lh[state*nstates*nmixture + m*nstates];
+                    double *this_partial_lh_leaf = &partial_lh_leaf[state*block+m*statecat];
+                    for (x = 0; x < statecat; x++) {
+                        double vchild = 0.0;
+                        for (i = 0; i < nstates; i++) {
+                            vchild += this_echild[x*nstates+i] * this_tip_partial_lh[i];
+                        }
+                        this_partial_lh_leaf[x] = vchild;
+                    }
+                }
+            }
+            size_t addr = aln->STATE_UNKNOWN * block;
+            for (x = 0; x < block; x++) {
+                partial_lh_leaf[addr+x] = 1.0;
+            }
+            partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
+        }
+        echild += block*nstates;
+    }
+
+    double *eleft = echildren, *eright = echildren + block*nstates;
+    
+	if (!left->node->isLeaf() && right->node->isLeaf()) {
+		PhyloNeighbor *tmp = left;
+		left = right;
+		right = tmp;
+        double *etmp = eleft;
+        eleft = eright;
+        eright = etmp;
 	}
+    
+    if (node->degree() > 3) {
+        /*--------------------- multifurcating node ------------------*/
+        // now for-loop computing partial_lh over all site-patterns
+        double sum_scale = 0.0;
+#ifdef _OPENMP
+#pragma omp parallel for reduction(+: sum_scale) private(ptn, c, x, i, m) schedule(static)
+#endif
+        for (ptn = 0; ptn < nptn; ptn++) {
+            double partial_lh_all[block];
+            for (i = 0; i < block; i++)
+                partial_lh_all[i] = 1.0;
+            dad_branch->scale_num[ptn] = 0;
+                
+            double *partial_lh_leaf = partial_lh_leaves;
+            double *echild = echildren;
 
-	if (left->node->isLeaf() && right->node->isLeaf()) {
-		// special treatment for TIP-TIP (cherry) case
+            FOR_NEIGHBOR_IT(node, dad, it) {
+                PhyloNeighbor *child = (PhyloNeighbor*)*it;
+                if (child->node->isLeaf()) {
+                    // external node
+                    int state_child = (ptn < orig_ntn) ? (aln->at(ptn))[child->node->id] : model_factory->unobserved_ptns[ptn-orig_ntn];
+                    double *child_lh = partial_lh_leaf + state_child*block;
+                    for (c = 0; c < block; c++) {
+                        // compute real partial likelihood vector
+                        partial_lh_all[c] *= child_lh[c];
+                    }
+                    partial_lh_leaf += (aln->STATE_UNKNOWN+1)*block;
+                } else {
+                    // internal node
+                    double *partial_lh = partial_lh_all;
+                    double *partial_lh_child = child->partial_lh + ptn*block;
+                    dad_branch->scale_num[ptn] += child->scale_num[ptn];
+                    double *echild_ptr = echild;
 
-		// pre compute information for both tips
-		double *partial_lh_left = new double[(aln->STATE_UNKNOWN+1)*block];
-		double *partial_lh_right = new double[(aln->STATE_UNKNOWN+1)*block];
+                    for (m = 0; m < nmixture; m++) {
+                        for (c = 0; c < ncat; c++) {
+                            // compute real partial likelihood vector
+                            for (x = 0; x < nstates; x++) {
+                                double vchild = 0.0;
+//                                size_t addr = (m*ncat+c)*nstatesqr+x*nstates;
+                                for (i = 0; i < nstates; i++) {
+                                    vchild += echild_ptr[i] * partial_lh_child[i];
+                                }
+                                echild_ptr += nstates;
+                                partial_lh[x] *= vchild;
+                            }
+                            partial_lh += nstates;
+                            partial_lh_child += nstates;
+                        }
+                    }
 
-		vector<int>::iterator it;
-		for (it = aln->seq_states[left->node->id].begin(); it != aln->seq_states[left->node->id].end(); it++) {
-			int state = (*it);
-			for (m = 0; m < nmixture; m++) {
-				double *this_eleft = &eleft[m*nstatesqr*ncat];
-				double *this_tip_partial_lh = &tip_partial_lh[state*nstates*nmixture + m*nstates];
-				double *this_partial_lh_left = &partial_lh_left[state*block+m*statecat];
-
-//				for (c = 0; c < ncat; c++)
-//					for (x = 0; x < nstates; x++) {
-//						double vleft = 0.0;
-//						for (i = 0; i < nstates; i++) {
-//							vleft += this_eleft[(c*nstates+x)*nstates+i] * this_tip_partial_lh[i];
-//						}
-//						this_partial_lh_left[c*nstates+x] = vleft;
-//					}
-
-				for (x = 0; x < statecat; x++) {
-					double vleft = 0.0;
-					for (i = 0; i < nstates; i++) {
-						vleft += this_eleft[x*nstates+i] * this_tip_partial_lh[i];
-					}
-					this_partial_lh_left[x] = vleft;
-				}
-			}
-		}
+                } // if
+                echild += block*nstates;
+            } // FOR_NEIGHBOR
 
-		for (it = aln->seq_states[right->node->id].begin(); it != aln->seq_states[right->node->id].end(); it++) {
-			int state = (*it);
-			for (m = 0; m < nmixture; m++) {
-				double *this_eright = &eright[m*nstatesqr*ncat];
-				double *this_tip_partial_lh = &tip_partial_lh[state*nstates*nmixture + m*nstates];
-				double *this_partial_lh_right = &partial_lh_right[state*block+m*statecat];
-				for (x = 0; x < statecat; x++) {
-					double vright = 0.0;
+            // compute dot-product with inv_eigenvector
+            double lh_max = 0.0;
+            double *partial_lh_tmp = partial_lh_all;
+            double *partial_lh = dad_branch->partial_lh+ptn*block;
+            for (m = 0; m < nmixture; m++) {
+				for (c = 0; c < ncat; c++) {
+                    double *inv_evec_ptr = inv_evec + m*nstatesqr;
 					for (i = 0; i < nstates; i++) {
-						vright += this_eright[x*nstates+i] * this_tip_partial_lh[i];
+						double res = 0.0;
+						for (x = 0; x < nstates; x++) {
+							res += partial_lh_tmp[x]*inv_evec_ptr[x];
+						}
+                        inv_evec_ptr += nstates;
+						partial_lh[i] = res;
+						lh_max = max(fabs(res), lh_max);
 					}
-					this_partial_lh_right[x] = vright;
+                    partial_lh += nstates;
+                    partial_lh_tmp += nstates;
 				}
-			}
-		}
+            }
+            if (lh_max < SCALING_THRESHOLD) {
+				// now do the likelihood scaling
+                partial_lh = dad_branch->partial_lh + ptn*block;
+				for (i = 0; i < block; i++) {
+					partial_lh[i] *= SCALING_THRESHOLD_INVER;
+				}
+				// unobserved const pattern will never have underflow
+				sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn];
+				dad_branch->scale_num[ptn] += 1;
+            }
 
-		size_t addr = aln->STATE_UNKNOWN * block;
-		for (x = 0; x < block; x++) {
-			partial_lh_left[addr+x] = 1.0;
-			partial_lh_right[addr+x] = 1.0;
-		}
+        } // for ptn
+        dad_branch->lh_scale_factor += sum_scale;               
+                
+        // end multifurcating treatment
+        
+	} else if (left->node->isLeaf() && right->node->isLeaf()) {
+		// special treatment for TIP-TIP (cherry) case
 
+		// pre compute information for both tips
+		double *partial_lh_left = partial_lh_leaves;
+		double *partial_lh_right = partial_lh_leaves + (aln->STATE_UNKNOWN+1)*block;
 
 		// scale number must be ZERO
 	    memset(dad_branch->scale_num, 0, nptn * sizeof(UBYTE));
 #ifdef _OPENMP
-//#pragma omp parallel for private(ptn, c, x, i, m)
+#pragma omp parallel for private(ptn, c, x, i, m)
 #endif
 		for (ptn = 0; ptn < nptn; ptn++) {
 			double partial_lh_tmp[nstates];
@@ -2545,36 +2286,13 @@ void PhyloTree::computeMixturePartialLikelihoodEigen(PhyloNeighbor *dad_branch,
 				}
 			}
 		}
-		delete [] partial_lh_right;
-		delete [] partial_lh_left;
 	} else if (left->node->isLeaf() && !right->node->isLeaf()) {
 		// special treatment to TIP-INTERNAL NODE case
 		// only take scale_num from the right subtree
 		memcpy(dad_branch->scale_num, right->scale_num, nptn * sizeof(UBYTE));
 
 		// pre compute information for left tip
-		double *partial_lh_left = new double[(aln->STATE_UNKNOWN+1)*block];
-
-		vector<int>::iterator it;
-		for (it = aln->seq_states[left->node->id].begin(); it != aln->seq_states[left->node->id].end(); it++) {
-			int state = (*it);
-			for (m = 0; m < nmixture; m++) {
-				double *this_eleft = &eleft[m*nstatesqr*ncat];
-				double *this_tip_partial_lh = &tip_partial_lh[state*nstates*nmixture + m*nstates];
-				double *this_partial_lh_left = &partial_lh_left[state*block+m*statecat];
-				for (x = 0; x < statecat; x++) {
-					double vleft = 0.0;
-					for (i = 0; i < nstates; i++) {
-						vleft += this_eleft[x*nstates+i] * this_tip_partial_lh[i];
-					}
-					this_partial_lh_left[x] = vleft;
-				}
-			}
-		}
-		size_t addr = aln->STATE_UNKNOWN * block;
-		for (x = 0; x < block; x++) {
-			partial_lh_left[addr+x] = 1.0;
-		}
+		double *partial_lh_left = partial_lh_leaves;
 
 		double sum_scale = 0.0;
 #ifdef _OPENMP
@@ -2624,7 +2342,6 @@ void PhyloTree::computeMixturePartialLikelihoodEigen(PhyloNeighbor *dad_branch,
 
 		}
 		dad_branch->lh_scale_factor += sum_scale;
-		delete [] partial_lh_left;
 
 	} else {
 		// both left and right are internal node
@@ -2679,9 +2396,9 @@ void PhyloTree::computeMixturePartialLikelihoodEigen(PhyloNeighbor *dad_branch,
 		dad_branch->lh_scale_factor += sum_scale;
 
 	}
-
-	delete [] eright;
-	delete [] eleft;
+    
+    delete [] partial_lh_leaves;
+	delete [] echildren;
 }
 
 //template <const int nstates>
@@ -2854,7 +2571,6 @@ double PhyloTree::computeMixtureLikelihoodBranchEigen(PhyloNeighbor *dad_branch,
 
     size_t block = ncat * nstates * nmixture;
     size_t statemix = nstates * nmixture;
-    size_t cat_states = ncat * nstates;
     size_t catmix = ncat * nmixture;
     size_t ptn; // for big data size > 4GB memory required
     size_t c, i, m;
@@ -2908,12 +2624,6 @@ double PhyloTree::computeMixtureLikelihoodBranchEigen(PhyloNeighbor *dad_branch,
 			int state_dad = (ptn < orig_nptn) ? (aln->at(ptn))[dad->id] : model_factory->unobserved_ptns[ptn-orig_nptn];
 			double *lh_node = partial_lh_node + state_dad*block;
 			for (m = 0; m < nmixture; m++) {
-//                for (i = 0; i < cat_states; i++)
-//                    *lh_cat += lh_node[i] * partial_lh_dad[i];
-//                lh_ptn += *lh_cat;
-//                lh_node += cat_states;
-//                partial_lh_dad += cat_states;
-//                lh_cat++;
 				for (c = 0; c < ncat; c++) {
 					for (i = 0; i < nstates; i++) {
 						*lh_cat += lh_node[i] * partial_lh_dad[i];
@@ -2950,13 +2660,6 @@ double PhyloTree::computeMixtureLikelihoodBranchEigen(PhyloNeighbor *dad_branch,
 			double *partial_lh_node = node_branch->partial_lh + ptn*block;
 			double *val_tmp = val;
 			for (m = 0; m < nmixture; m++) {
-//                for (i = 0; i < cat_states; i++)
-//                    *lh_cat += val_tmp[i] * partial_lh_node[i] * partial_lh_dad[i];
-//                lh_ptn += *lh_cat;
-//                partial_lh_dad += cat_states;
-//                partial_lh_node += cat_states;
-//                val_tmp += cat_states;
-//                lh_cat++;
 				for (c = 0; c < ncat; c++) {
 					for (i = 0; i < nstates; i++) {
 						*lh_cat +=  val_tmp[i] * partial_lh_node[i] * partial_lh_dad[i];
diff --git a/quartet.cpp b/quartet.cpp
index eca150d..ca2dc5d 100644
--- a/quartet.cpp
+++ b/quartet.cpp
@@ -21,6 +21,10 @@
 #include "myreader.h"
 // #include "lmap.c"
 
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
 #if 0  /*** moved to phylotree.h ***/
 /* Index definition for counter array needed in likelihood mapping analysis (HAS) */
 #define LM_REG1 0   /* top corner */
diff --git a/tools.cpp b/tools.cpp
index 85ca06c..c137a81 100644
--- a/tools.cpp
+++ b/tools.cpp
@@ -640,9 +640,9 @@ void parseArg(int argc, char *argv[], Params &params) {
     verbose_mode = VB_MIN;
     params.tree_gen = NONE;
     params.user_file = NULL;
-    params.fai = false;
-    params.testAlpha = false;
-    params.test_param = false;
+    params.opt_gammai = true;
+    params.opt_gammai_fast = false;
+    params.opt_gammai_keep_bran = false;
     params.testAlphaEpsAdaptive = false;
     params.randomAlpha = false;
     params.testAlphaEps = 0.1;
@@ -717,6 +717,7 @@ void parseArg(int argc, char *argv[], Params &params) {
     params.bootstrap_spec = NULL;
 
     params.aln_file = NULL;
+    params.phylip_sequential_format = false;
     params.treeset_file = NULL;
     params.topotest_replicates = 0;
     params.do_weighted_test = false;
@@ -768,6 +769,7 @@ void parseArg(int argc, char *argv[], Params &params) {
     params.model_test_and_tree = 0;
     params.model_test_separate_rate = false;
     params.optimize_mixmodel_weight = false;
+    params.optimize_rate_matrix = false;
     params.store_trans_matrix = false;
     //params.freq_type = FREQ_EMPIRICAL;
     params.freq_type = FREQ_UNKNOWN;
@@ -780,6 +782,7 @@ void parseArg(int argc, char *argv[], Params &params) {
     params.optimize_model_rate_joint = false;
     params.optimize_by_newton = true;
     params.optimize_alg = "2-BFGS-B,EM";
+    params.optimize_alg_gammai = "EM";
     params.fixed_branch_length = false;
     params.min_branch_length = 0.0; // this is now adjusted later based on alignment length
     params.max_branch_length = 100.0;
@@ -1034,6 +1037,13 @@ void parseArg(int argc, char *argv[], Params &params) {
 				params.optimize_alg = argv[cnt];
 				continue;
 			}
+            if (strcmp(argv[cnt], "-optalg_gammai") == 0) {
+                cnt++;
+                if (cnt >= argc)
+                    throw "Use -optalg_gammai <Brent|BFGS|EM>";
+                params.optimize_alg_gammai = argv[cnt];
+                continue;
+            }
 			if (strcmp(argv[cnt], "-root") == 0) {
 				params.is_rooted = true;
 				continue;
@@ -1507,6 +1517,10 @@ void parseArg(int argc, char *argv[], Params &params) {
 				params.aln_file = argv[cnt];
 				continue;
 			}
+			if (strcmp(argv[cnt], "--sequential") == 0) {
+                params.phylip_sequential_format = true;
+                continue;
+            }
 			if (strcmp(argv[cnt], "-z") == 0) {
 				cnt++;
 				if (cnt >= argc)
@@ -1544,6 +1558,7 @@ void parseArg(int argc, char *argv[], Params &params) {
 					throw "Use -spp <type of partition model>";
 				params.partition_file = argv[cnt];
 				params.partition_type = 'p';
+                params.opt_gammai = false;
 				continue;
 			}
 			if (strcmp(argv[cnt], "-spj") == 0 || strcmp(argv[cnt], "-q") == 0) {
@@ -1552,6 +1567,8 @@ void parseArg(int argc, char *argv[], Params &params) {
 					throw "Use -q <type of partition model>";
 				params.partition_file = argv[cnt];
 				params.partition_type = 'j';
+                params.optimize_alg_gammai = "Brent";
+                params.opt_gammai = false;
 				continue;
 			}
 			if (strcmp(argv[cnt], "-M") == 0) {
@@ -1711,6 +1728,9 @@ void parseArg(int argc, char *argv[], Params &params) {
 				cnt++;
 				if (cnt >= argc)
 					throw "Use -n <#iterations>";
+                if (params.gbo_replicates != 0) {
+                    outError("Ultrafast bootstrap does not work with -n option");
+                }
 				params.min_iterations = convert_int(argv[cnt]);
 				params.stop_condition = SC_FIXED_ITERATION;
 //                params.autostop = false;
@@ -1805,18 +1825,22 @@ void parseArg(int argc, char *argv[], Params &params) {
 				params.optimize_mixmodel_weight = true;
 				continue;
 			}
-			if (strcmp(argv[cnt], "-mh") == 0) {
-				params.mvh_site_rate = true;
-				params.discard_saturated_site = false;
-				params.SSE = LK_NORMAL;
-				continue;
-			}
-			if (strcmp(argv[cnt], "-mhs") == 0) {
-				params.mvh_site_rate = true;
-				params.discard_saturated_site = true;
-				params.SSE = LK_NORMAL;
+			if (strcmp(argv[cnt], "--opt-rate-mat") == 0) {
+				params.optimize_rate_matrix = true;
 				continue;
 			}
+//			if (strcmp(argv[cnt], "-mh") == 0) {
+//				params.mvh_site_rate = true;
+//				params.discard_saturated_site = false;
+//				params.SSE = LK_NORMAL;
+//				continue;
+//			}
+//			if (strcmp(argv[cnt], "-mhs") == 0) {
+//				params.mvh_site_rate = true;
+//				params.discard_saturated_site = true;
+//				params.SSE = LK_NORMAL;
+//				continue;
+//			}
 			if (strcmp(argv[cnt], "-rl") == 0) {
 				params.rate_mh_type = false;
 				continue;
@@ -1845,14 +1869,14 @@ void parseArg(int argc, char *argv[], Params &params) {
 					throw "Lambda must be in (0,1]";
 				continue;
 			}
-			if (strcmp(argv[cnt], "-nosse") == 0) {
-				params.SSE = LK_NORMAL;
-				continue;
-			}
-			if (strcmp(argv[cnt], "-slowsse") == 0) {
-				params.SSE = LK_SSE;
-				continue;
-			}
+//			if (strcmp(argv[cnt], "-nosse") == 0) {
+//				params.SSE = LK_NORMAL;
+//				continue;
+//			}
+//			if (strcmp(argv[cnt], "-slowsse") == 0) {
+//				params.SSE = LK_SSE;
+//				continue;
+//			}
 			if (strcmp(argv[cnt], "-fastlk") == 0) {
 				params.SSE = LK_EIGEN;
 				continue;
@@ -2381,6 +2405,9 @@ void parseArg(int argc, char *argv[], Params &params) {
 				cnt++;
 				if (cnt >= argc)
 					throw "Use -bb <#replicates>";
+                if (params.min_iterations != -1) {
+                    outError("Ultrafast bootstrap does not work with -te or -n option");
+                }
 				params.gbo_replicates = convert_int(argv[cnt]);
 //				params.avoid_duplicated_trees = true;
 				if (params.gbo_replicates < 1000)
@@ -2546,13 +2573,20 @@ void parseArg(int argc, char *argv[], Params &params) {
 				continue;
 			}
 
-			if (strcmp(argv[cnt], "--test-alpha") == 0) {
-				params.testAlpha = true;
-				continue;
-			}
+            if (strcmp(argv[cnt], "-opt_gammai") == 0 || strcmp(argv[cnt], "--opt-gamma-inv") == 0) {
+                params.opt_gammai = true;
+                continue;
+            }
 
-            if (strcmp(argv[cnt], "-test_param") == 0 || strcmp(argv[cnt], "--opt-gamma-inv") == 0) {
-                params.test_param = true;
+            if (strcmp(argv[cnt], "--opt-gammai-fast") == 0) {
+                params.opt_gammai_fast = true;
+                params.opt_gammai = true;
+                continue;
+            }
+
+            if (strcmp(argv[cnt], "--opt-gammai-kb") == 0) {
+                params.opt_gammai_keep_bran = true;
+                params.opt_gammai = true;
                 continue;
             }
 
@@ -2564,19 +2598,6 @@ void parseArg(int argc, char *argv[], Params &params) {
                 params.randomAlpha = true;
                 continue;
             }
-            if (strcmp(argv[cnt], "--test-alpha-eps") == 0) {
-                cnt++;
-                if (cnt >= argc)
-                    throw "Use --test-alpha-eps <logl_eps>";
-                params.testAlphaEps = convert_double(argv[cnt]);
-                params.testAlpha = true;
-                continue;
-            }
-
-            if (strcmp(argv[cnt], "-fai") == 0) {
-                params.fai = true;
-                continue;
-            }
 
             if (strcmp(argv[cnt], "-eai") == 0) {
                 params.exh_ai = true;
@@ -2813,14 +2834,14 @@ void parseArg(int argc, char *argv[], Params &params) {
 					throw "At least 1 thread please";
 				continue;
 			}
-			if (strcmp(argv[cnt], "-rootstate") == 0) {
-                cnt++;
-                if (cnt >= argc)
-                    throw "Use -rootstate <rootstate>";
-                params.root_state = argv[cnt];
-                params.SSE = LK_NORMAL;
-                continue;
-			}
+//			if (strcmp(argv[cnt], "-rootstate") == 0) {
+//                cnt++;
+//                if (cnt >= argc)
+//                    throw "Use -rootstate <rootstate>";
+//                params.root_state = argv[cnt];
+//                params.SSE = LK_NORMAL;
+//                continue;
+//			}
 			if (strcmp(argv[cnt], "-ct") == 0) {
             	params.count_trees = true;
             	continue;
@@ -2843,6 +2864,9 @@ void parseArg(int argc, char *argv[], Params &params) {
 			}
 			if (strcmp(argv[cnt], "-t") == 0 || strcmp(argv[cnt], "-te") == 0) {
                 if (strcmp(argv[cnt], "-te") == 0) {
+                    if (params.gbo_replicates != 0) {
+                        outError("Ultrafast bootstrap does not work with -te option");
+                    }
                     params.min_iterations = 0;
                     params.stop_condition = SC_FIXED_ITERATION;
                 }
@@ -3165,6 +3189,7 @@ void usage_iqtree(char* argv[], bool full_command) {
             << "  -gmedian             Median approximation for +G site rates (default: mean)" << endl
             << "  --opt-gamma-inv      More thorough estimation for +I+G model parameters" << endl
             << "  -i <p_invar>         Proportion of invariable sites (default: estimate)" << endl
+            << "  -wsr                 Write site rates to .rate file" << endl
             << "  -mh                  Computing site-specific rates to .mhrate file using" << endl
             << "                       Meyer & von Haeseler (2003) method" << endl
             //<< "  -c <#categories>     Number of Gamma rate categories (default: 4)" << endl
@@ -3222,6 +3247,7 @@ void usage_iqtree(char* argv[], bool full_command) {
 			<< "  -blfix               Fix branch lengths of user tree passed via -te" << endl
 			<< "  -blmin               Min branch length for optimization (default 0.000001)" << endl
 			<< "  -blmax               Max branch length for optimization (default 100)" << endl
+			<< "  -wsr                 Write site rates and categories to .rate file" << endl
 			<< "  -wsl                 Write site log-likelihoods to .sitelh file" << endl
             << "  -wslr                Write site log-likelihoods per rate category" << endl
             << "  -wslm                Write site log-likelihoods per mixture class" << endl
diff --git a/tools.h b/tools.h
index 21a2462..6426a56 100644
--- a/tools.h
+++ b/tools.h
@@ -395,7 +395,7 @@ struct NNIInfo {
 };
 
 enum LikelihoodKernel {
-	LK_NORMAL, LK_SSE, LK_EIGEN, LK_EIGEN_SSE
+	LK_EIGEN, LK_EIGEN_SSE
 };
 
 enum LhMemSave {
@@ -429,20 +429,18 @@ private:
 public:
 
     /**
-    *  Fast and accurate optimiation for alpha and p_invar
-    */
-    bool fai;
-
-	/**
-	 *  Use random restart strategy for estimating alpha and p_invar
-	 */
-	bool testAlpha;
-
-    /**
      *  Restart the optimization of alpha and pinvar from different starting
      *  pinv values (supercedes the option testAlpha
      */
-    bool test_param;
+    bool opt_gammai;
+
+    /**
+     *  A faster version of opt_gammai using a heuristic similar to binary search.
+     *  Thus, one does not need to perform 10 independent trials as in opt_gammai.
+     */
+    bool opt_gammai_fast;
+
+    bool opt_gammai_keep_bran;
 
     /**
      *  Automatic adjust the log-likelihood espilon using some heuristic
@@ -657,6 +655,9 @@ public:
      */
     char *aln_file;
 
+    /** true if sequential phylip format is used, default: false (interleaved format) */
+    bool phylip_sequential_format;
+
     /**
             file containing multiple trees to evaluate at the end
      */
@@ -1164,6 +1165,9 @@ public:
     /** TRUE to optimize mixture model weights */
     bool optimize_mixmodel_weight;
 
+    /** TRUE to always optimize rate matrix even if user parameters are specified in e.g. GTR{1,2,3,4,5} */
+    bool optimize_rate_matrix;
+
     /**
             TRUE to store transition matrix into a hash table for computation efficiency
      */
@@ -1217,6 +1221,11 @@ public:
     string optimize_alg;
 
     /**
+     *  Optimization algorithm for +I+G
+     */
+    string optimize_alg_gammai;
+
+    /**
             TRUE if you want to fix branch lengths during model optimization
      */
     bool fixed_branch_length;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/iqtree.git



More information about the debian-med-commit mailing list