[med-svn] [iqtree] 01/03: New upstream version 1.5.5+dfsg

Andreas Tille tille at debian.org
Mon Jun 19 07:07:38 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository iqtree.

commit 73a82c8538bbd24a0b304d422fe764eb55d6978c
Author: Andreas Tille <tille at debian.org>
Date:   Mon Jun 19 08:47:03 2017 +0200

    New upstream version 1.5.5+dfsg
---
 CMakeLists.txt         | 105 +++++++++----
 alignment.cpp          | 284 ++++++++++++++++++---------------
 alignment.h            |  11 +-
 checkpoint.cpp         |   4 +-
 checkpoint.h           |   9 +-
 constrainttree.cpp     |  48 ++++--
 constrainttree.h       |  15 +-
 iqtree.cpp             |  33 ++--
 mexttree.cpp           |  10 +-
 mexttree.h             |   2 +-
 model/modelcodon.cpp   |  35 +++--
 model/modelfactory.cpp |  20 ++-
 model/modelfactory.h   |   4 +-
 ngs.cpp                |   2 +-
 pda.cpp                |  82 +++++-----
 phyloanalysis.cpp      | 214 +++++++++++++++++--------
 phylokernel.h          |   8 +-
 phylokernelfma.cpp     |  16 +-
 phylokernelnew.h       | 414 +++++++++++++++++++++++++++++--------------------
 phylokernelsse.cpp     |  16 +-
 phylosupertree.cpp     |   4 +-
 phylosupertreeplen.cpp |  27 +++-
 phylosupertreeplen.h   |   2 +
 phylotesting.cpp       | 104 +++++++++----
 phylotesting.h         |   9 +-
 phylotree.cpp          | 141 +++++++++++++++--
 phylotree.h            |  35 ++---
 phylotreeavx.cpp       |  16 +-
 phylotreesse.cpp       |  30 ++--
 superalignment.cpp     | 130 ++++++++++------
 superalignment.h       |   7 +
 tools.cpp              | 185 +++++++++++++---------
 tools.h                |  27 +++-
 33 files changed, 1324 insertions(+), 725 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6acf40b..dcae10f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,11 +50,11 @@ add_definitions(-DIQ_TREE)
 # The version number.
 set (iqtree_VERSION_MAJOR 1)
 set (iqtree_VERSION_MINOR 5)
-set (iqtree_VERSION_PATCH "3")
+set (iqtree_VERSION_PATCH "5")
 
 set(BUILD_SHARED_LIBS OFF)
 
-if (CMAKE_C_COMPILER MATCHES "mpi")
+if (CMAKE_C_COMPILER MATCHES "mpic")
     set(IQTREE_FLAGS "${IQTREE_FLAGS} mpi")
 endif()
 
@@ -87,13 +87,13 @@ if (WIN32)
     add_definitions(-DWIN32)
 elseif (APPLE)
 	message("Target OS     : Mac OS X")
-	# to be compatible back to Mac OS X 10.6
+	# to be compatible back to Mac OS X 10.7
 	if (IQTREE_FLAGS MATCHES "oldmac")
 		add_definitions("-mmacosx-version-min=10.5")
 		set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -mmacosx-version-min=10.5")
 	else()
-		add_definitions("-mmacosx-version-min=10.6")
-		set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -mmacosx-version-min=10.6")
+		add_definitions("-mmacosx-version-min=10.7")
+		set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -mmacosx-version-min=10.7")
 	endif()
     SET(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
 elseif (UNIX)
@@ -124,8 +124,13 @@ if (CMAKE_COMPILER_IS_GNUCXX)
 	message("Compiler      : GNU Compiler (gcc)")
 	set(GCC "TRUE")
 #	set(COMBINED_FLAGS "-Wall -Wno-unused-function -Wno-sign-compare -pedantic -D_GNU_SOURCE -fms-extensions -Wno-deprecated")
-	set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g")
-	set(CMAKE_C_FLAGS_RELEASE "-O3 -g")
+	set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -ffunction-sections -fdata-sections")
+	set(CMAKE_C_FLAGS_RELEASE "-O3 -g -ffunction-sections -fdata-sections")
+    if (APPLE)
+        set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -Wl,-dead_strip")
+    else()
+        set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -Wl,--gc-sections")
+    endif()
     # require at least gcc 4.8
     if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8)
         message(FATAL_ERROR "GCC version must be at least 4.8!")
@@ -139,12 +144,23 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
 	message("Compiler      : Clang")
 	set(CLANG "TRUE")
 #	set(COMBINED_FLAGS "-Wall -Wno-unused-function -Wno-sign-compare -pedantic -D_GNU_SOURCE -Wno-nested-anon-types")
-    #if (APPLE AND NOT CMAKE_BUILD_TYPE MATCHES "Debug")
-    #    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
-    #    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++")
-    #endif()
-	set(CMAKE_CXX_FLAGS_RELEASE "-O3")
-	set(CMAKE_C_FLAGS_RELEASE "-O3")
+    set(CMAKE_CXX_FLAGS_RELEASE "-O3 -ffunction-sections -fdata-sections")
+    set(CMAKE_C_FLAGS_RELEASE "-O3 -ffunction-sections -fdata-sections")
+    if (APPLE)
+        set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -Wl,-dead_strip")
+    else()
+        set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -Wl,--gc-sections")
+    endif()
+
+    # use libc++ per default in MacOS
+    if (APPLE AND CMAKE_BUILD_TYPE MATCHES "Release")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
+    endif()
+
+    #remove -rdynamic for Clang under Linux
+    if (UNIX AND IQTREE_FLAGS MATCHES "static")
+        SET(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS)
+    endif()
 elseif (CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
 	set(VCC "TRUE")
 	message("Compiler      : MS Visual C++ Compiler")
@@ -173,6 +189,15 @@ if (MSVC)
 	endif()
 endif()
 
+# enable link time optimization
+if (IQTREE_FLAGS MATCHES "lto")
+    #if (CLANG)
+    #    set(COMBINED_FLAGS "${COMBINED_FLAGS} -flto=thin")
+    #else()
+        set(COMBINED_FLAGS "${COMBINED_FLAGS} -flto")
+    #endif()
+endif()
+
 ##################################################################
 # configure MPI compilation
 ##################################################################
@@ -239,9 +264,11 @@ if (IQTREE_FLAGS MATCHES "omp")
   		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp")
   		include_directories("${PROJECT_SOURCE_DIR}/pll") # for PThreads headers
 	elseif (ICC)
-  		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qopenmp")
   		if (WIN32)
+            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qopenmp")
   			include_directories("${PROJECT_SOURCE_DIR}/pll") # for PThreads headers
+        else()
+            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -qopenmp")
   		endif()
   	elseif (GCC)
 		set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
@@ -275,7 +302,7 @@ elseif (GCC)
 	set(AVX_FLAGS "${AVX_FLAGS} -mavx -fabi-version=0")
 elseif (ICC)
 	if (WIN32)
-		 set(AVX_FLAGS "${AVX_FLAGS} /arch:AVX")
+		 set(AVX_FLAGS "${AVX_FLAGS} /arch:avx")
 	else()
 		 set(AVX_FLAGS "${AVX_FLAGS} -mavx")
 	endif()
@@ -288,7 +315,7 @@ elseif (GCC OR CLANG)
 	set(SSE_FLAGS "${SSE_FLAGS} -msse3")
 elseif (ICC)
 	if (WIN32)
-		set(SSE_FLAGS "${SSE_FLAGS} /arch:SSE3")
+		set(SSE_FLAGS "${SSE_FLAGS} /arch:sse3")
 	else()
 		set(SSE_FLAGS "${SSE_FLAGS} -msse3")
 	endif()
@@ -303,7 +330,7 @@ elseif (GCC)
 	set(FMA_FLAGS "${FMA_FLAGS} -mavx -fabi-version=0 -mfma")
 elseif (ICC) 
 	if (WIN32)
-		 set(FMA_FLAGS "${FMA_FLAGS} /arch:AVX /Qfma")
+		 set(FMA_FLAGS "${FMA_FLAGS} /arch:core-avx2")
 	else()
 		 set(FMA_FLAGS "${FMA_FLAGS} -march=core-avx2")
 	endif()
@@ -319,9 +346,9 @@ elseif (GCC)
 	set(AVX512_FLAGS "${AVX512_FLAGS} -mavx512f -mfma")
 elseif (ICC) 
 	if (WIN32)
-		 set(AVX512_FLAGS "${AVX512_FLAGS} /arch:MIC-AVX512 /Qfma")
+		 set(AVX512_FLAGS "${AVX512_FLAGS} /QxMIC-AVX512")
 	else()
-		 set(AVX512_FLAGS "${AVX512_FLAGS} -xMIC-AVX512 -mfma")
+		 set(AVX512_FLAGS "${AVX512_FLAGS} -xMIC-AVX512")
 	endif()
 endif()
 
@@ -339,7 +366,11 @@ elseif (IQTREE_FLAGS MATCHES "avx") # AVX instruction set
 	set(COMBINED_FLAGS "${COMBINED_FLAGS} ${AVX_FLAGS}")
 	#SET(EXE_SUFFIX "${EXE_SUFFIX}-avx")
 elseif (NOT IQTREE_FLAGS MATCHES "nosse") #SSE intruction set
-	message("Vectorization : SSE3/AVX/AVX2")
+    if (IQTREE_FLAGS MATCHES "512")
+        message("Vectorization : SSE3/AVX/AVX2/AVX-512")
+    else()
+        message("Vectorization : SSE3/AVX/AVX2")
+    endif()
 	#add_definitions(-D__SSE3)
     #set(COMBINED_FLAGS "${COMBINED_FLAGS} ${SSE_FLAGS}")
 endif()
@@ -354,6 +385,15 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMBINED_FLAGS}")
 set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS} -fno-inline-functions -fno-inline-functions-called-once -fno-optimize-sibling-calls -fno-default-inline -fno-inline -O2 -fno-omit-frame-pointer -g")
 set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS} -fno-inline-functions -fno-inline-functions-called-once -fno-optimize-sibling-calls -O2 -fno-omit-frame-pointer -g")
 
+if(CLANG AND IQTREE_FLAGS MATCHES "static")
+    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread -Wl,--allow-multiple-definition")
+endif()
+
+if (IQTREE_FLAGS MATCHES "libcxx")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
+endif()
+
+
 if (CMAKE_BUILD_TYPE STREQUAL "Release")
 	message("C flags       : ${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_RELEASE}")
 	message("CXX flags     : ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE}")
@@ -369,6 +409,8 @@ if (CMAKE_BUILD_TYPE STREQUAL "Profile")
 	message("CXX flags     : ${CMAKE_CXX_FLAGS_PROFILE} ")
 endif()
 
+message("LINKER flags  : ${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS_RELEASE}")
+
 
 if (GCC)
 	set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fno-inline-functions -fno-inline-functions-called-once -fno-default-inline -fno-inline")
@@ -551,8 +593,8 @@ else()
 	set(PLATFORM_LIB "m")
 endif()
 
-if(CLANG AND WIN32 AND IQTREE_FLAGS MATCHES "static")
-    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread -Wl,--allow-multiple-definition")
+if (IQTREE_FLAGS MATCHES "libcxx")
+    set(STD_LIB "c++abi")
 endif()
 
 set(THREAD_LIB "")
@@ -567,7 +609,7 @@ if (IQTREE_FLAGS MATCHES "omp")
 			set(THREAD_LIB "pthreadVC2")
 		endif()
 	elseif(CLANG AND APPLE)
-		set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${PROJECT_SOURCE_DIR}/libmac -fopenmp=libomp")
+		set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${PROJECT_SOURCE_DIR}/libmac")
 	elseif(CLANG AND WIN32)
         if (BINARY32)
             set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${PROJECT_SOURCE_DIR}/lib32 libiomp5md.dll")
@@ -576,10 +618,15 @@ if (IQTREE_FLAGS MATCHES "omp")
         endif()
 #        set(THREAD_LIB "ompstatic")
 	endif()
+
+    if (CLANG AND BINARY32)
+        set (ATOMIC_LIB "atomic")
+    endif()
+
 endif()
 
 # basic linking librararies
-target_link_libraries(iqtree pll ncl lbfgsb whtest sprng vectorclass model gsl ${PLATFORM_LIB} ${STD_LIB} ${THREAD_LIB})
+target_link_libraries(iqtree pll ncl lbfgsb whtest sprng vectorclass model gsl ${PLATFORM_LIB} ${STD_LIB} ${THREAD_LIB} ${ATOMIC_LIB})
 
 if (NOT IQTREE_FLAGS MATCHES "nosse")
     target_link_libraries(iqtree kernelsse)
@@ -610,7 +657,7 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release" AND (GCC OR CLANG)) # strip is not neces
 	if (WIN32)
 		ADD_CUSTOM_COMMAND(TARGET iqtree POST_BUILD COMMAND strip $<TARGET_FILE:iqtree>)
 	elseif (NOT APPLE)
-		ADD_CUSTOM_COMMAND(TARGET iqtree POST_BUILD COMMAND ${CMAKE_STRIP} $<TARGET_FILE:iqtree>)
+		ADD_CUSTOM_COMMAND(TARGET iqtree POST_BUILD COMMAND strip $<TARGET_FILE:iqtree>)
 	endif()
 endif()
 
@@ -695,10 +742,16 @@ if (BINARY32)
     set (SYSTEM_NAME "${SYSTEM_NAME}32")
 endif()
 
+if (IQTREE_FLAGS MATCHES "512")
+    set (SYSTEM_NAME "${SYSTEM_NAME}512")
+endif()
+
 set(CPACK_PACKAGE_FILE_NAME 
 	"${CMAKE_PROJECT_NAME}${EXE_SUFFIX}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}-${SYSTEM_NAME}")
 
-set(CPACK_STRIP_FILES TRUE)
+if (NOT APPLE)
+    set(CPACK_STRIP_FILES TRUE)
+endif()
 
 include (CPack)
 
diff --git a/alignment.cpp b/alignment.cpp
index 44f0146..677e292 100644
--- a/alignment.cpp
+++ b/alignment.cpp
@@ -57,9 +57,9 @@ Alignment::Alignment()
     num_states = 0;
     frac_const_sites = 0.0;
     frac_invariant_sites = 0.0;
-//    codon_table = NULL;
+    codon_table = NULL;
     genetic_code = NULL;
-//    non_stop_codon = NULL;
+    non_stop_codon = NULL;
     seq_type = SEQ_UNKNOWN;
     STATE_UNKNOWN = 126;
     pars_lower_bound = NULL;
@@ -103,6 +103,24 @@ double chi2prob (int deg, double chi2)
 } /* chi2prob */
 
 
+int Alignment::checkAbsentStates(string msg) {
+    double *state_freq = new double[num_states];
+    computeStateFreq(state_freq);
+    int count = 0;
+    for (int i = 0; i < num_states; i++)
+        if (state_freq[i] <= MIN_FREQUENCY) {
+            if (count == 0)
+                cout << "WARNING: " << convertStateBackStr(i);
+            else
+                cout << ", " << convertStateBackStr(i);
+            count++;
+        }
+    if (count)
+        cout << ((count >= 2) ? " are" : " is") << " not present in " << msg << " that may cause numerical problems" << endl;
+    delete[] state_freq;
+    return count;
+}
+
 void Alignment::checkSeqName() {
     ostringstream warn_str;
     StrVector::iterator it;
@@ -263,7 +281,9 @@ Alignment *Alignment::removeIdenticalSeq(string not_remove, bool keep_two, StrVe
 					removed_seqs.push_back(getSeqName(seq2));
 					target_seqs.push_back(getSeqName(seq1));
 					removed[seq2] = true;
-				}
+				} else {
+                    cout << "NOTE: " << getSeqName(seq2) << " is identical to " << getSeqName(seq1) << " but kept for subsequent analysis" << endl;
+                }
 				checked[seq2] = 1;
 				first_ident_seq = false;
 			}
@@ -330,9 +350,9 @@ Alignment::Alignment(char *filename, char *sequence_type, InputType &intype) : v
     num_states = 0;
     frac_const_sites = 0.0;
     frac_invariant_sites = 0.0;
-//    codon_table = NULL;
+    codon_table = NULL;
     genetic_code = NULL;
-//    non_stop_codon = NULL;
+    non_stop_codon = NULL;
     seq_type = SEQ_UNKNOWN;
     STATE_UNKNOWN = 126;
     pars_lower_bound = NULL;
@@ -389,6 +409,9 @@ Alignment::Alignment(char *filename, char *sequence_type, InputType &intype) : v
 }
 
 bool Alignment::isStopCodon(int state) {
+    // 2017-05-27: all stop codon removed from Markov process
+    return false;
+
 	if (seq_type != SEQ_CODON || state >= num_states) return false;
 	assert(genetic_code);
 	return (genetic_code[state] == '*');
@@ -405,7 +428,7 @@ int Alignment::getNumNonstopCodons() {
 
 bool Alignment::isStandardGeneticCode() {
     if (seq_type != SEQ_CODON) return false;
-	return (genetic_code == genetic_code1);
+	return (genetic_code == genetic_code1 || genetic_code == genetic_code11);
 }
 
 void Alignment::buildSeqStates(bool add_unobs_const) {
@@ -486,8 +509,8 @@ int getDataBlockMorphStates(NxsCharactersBlock *data_block) {
     char ch;
     int nstates = 0;
     
-    for (site = 0; site < nsite; site++)
-        for (seq = 0; seq < nseq; seq++) {
+    for (seq = 0; seq < nseq; seq++)
+        for (site = 0; site < nsite; site++) {
             int nstate = data_block->GetNumStates(seq, site);
             if (nstate == 0)
                 continue;
@@ -499,23 +522,11 @@ int getDataBlockMorphStates(NxsCharactersBlock *data_block) {
                 else if (ch >= 'A' && ch <= 'Z') 
                     ch = ch - 'A' + 11;
                 else 
-                    outError(data_block->GetTaxonLabel(seq) + " has invalid state at site " + convertIntToString(site));
+                    outError(data_block->GetTaxonLabel(seq) + " has invalid single state " + ch + " at site " + convertIntToString(site+1));
                 if (ch > nstates) nstates = ch;
                 continue;
             }
-            for (int state = 0; state < nstate; state++) {
-                ch = data_block->GetState(seq, site, state);
-                if (!isalnum(ch)) continue;
-                if (ch >= '0' && ch <= '9') ch = ch - '0' + 1;
-                if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 11;
-                if (ch >= '0' && ch <= '9') 
-                    ch = ch - '0' + 1;
-                else if (ch >= 'A' && ch <= 'Z') 
-                    ch = ch - 'A' + 11;
-                else 
-                    outError(data_block->GetTaxonLabel(seq) + " has invalid state at site " + convertIntToString(site));
-                if (ch > nstates) nstates = ch;
-            }
+            cout << "NOTE: " << data_block->GetTaxonLabel(seq) << " has ambiguous state at site " << site+1 << " which is treated as unknown" << endl;
         }
     return nstates;
 }
@@ -587,14 +598,17 @@ void Alignment::extractDataBlock(NxsCharactersBlock *data_block) {
                 pat += STATE_UNKNOWN;
             else if (nstate == 1) {
                 pat += char_to_state[(int)data_block->GetState(seq, site, 0)];
-            } else {
-                assert(data_type != NxsCharactersBlock::dna || data_type != NxsCharactersBlock::rna || data_type != NxsCharactersBlock::nucleotide);
+            } else if (seq_type == SEQ_DNA) {
+                // ambiguous DNA state
                 char pat_ch = 0;
                 for (int state = 0; state < nstate; state++) {
                     pat_ch |= (1 << char_to_state[(int)data_block->GetState(seq, site, state)]);
                 }
                 pat_ch += 3;
                 pat += pat_ch;
+            } else {
+                // other datatype will be treated as unknown
+                pat += STATE_UNKNOWN;
             }
         }
         num_gaps_only += addPattern(pat, site);
@@ -1132,8 +1146,8 @@ string Alignment::convertStateBackStr(char state) {
 	} else {
 		// codon data
 		if (state >= num_states) return "???";
-//		assert(codon_table);
-//		int state_back = codon_table[(int)state];
+		assert(codon_table);
+		state = codon_table[(int)state];
 		str = symbols_dna[state/16];
 		str += symbols_dna[(state%16)/4];
 		str += symbols_dna[state%4];
@@ -1184,8 +1198,7 @@ void Alignment::initCodon(char *gene_code_id) {
 	}
 	assert(strlen(genetic_code) == 64);
 
-//	int codon;
-	/*
+	int codon;
 	num_states = 0;
 	for (codon = 0; codon < strlen(genetic_code); codon++)
 		if (genetic_code[codon] != '*')
@@ -1201,8 +1214,7 @@ void Alignment::initCodon(char *gene_code_id) {
 			non_stop_codon[codon] = STATE_INVALID;
 		}
 	}
-	*/
-	num_states = strlen(genetic_code);
+//	num_states = strlen(genetic_code);
 //	codon_table = new char[num_states];
 //	non_stop_codon = new char[strlen(genetic_code)];
 //	int state = 0;
@@ -1246,9 +1258,9 @@ SeqType Alignment::getSeqType(const char *sequence_type) {
 int Alignment::buildPattern(StrVector &sequences, char *sequence_type, int nseq, int nsite) {
     int seq_id;
     ostringstream err_str;
-//    codon_table = NULL;
+    codon_table = NULL;
     genetic_code = NULL;
-//    non_stop_codon = NULL;
+    non_stop_codon = NULL;
 
 
     if (nseq != seq_names.size()) throw "Different number of sequences than specified";
@@ -1387,6 +1399,8 @@ int Alignment::buildPattern(StrVector &sequences, char *sequence_type, int nseq,
                         state = STATE_UNKNOWN;
             		} else if (nt2aa) {
                         state = AA_to_state[(int)genetic_code[(int)state]];
+                    } else {
+                        state = non_stop_codon[(int)state];
                     }
             	} else if (state == STATE_INVALID || state2 == STATE_INVALID || state3 == STATE_INVALID) {
             		state = STATE_INVALID;
@@ -1395,7 +1409,7 @@ int Alignment::buildPattern(StrVector &sequences, char *sequence_type, int nseq,
             			ostringstream warn_str;
                         warn_str << "Sequence " << seq_names[seq] << " has ambiguous character " <<
                         		sequences[seq][site] << sequences[seq][site+1] << sequences[seq][site+2] <<
-                        		" at site " << site+1 << endl;
+                        		" at site " << site+1;
                         outWarning(warn_str.str());
             		}
             		state = STATE_UNKNOWN;
@@ -1423,6 +1437,25 @@ int Alignment::buildPattern(StrVector &sequences, char *sequence_type, int nseq,
     return 1;
 }
 
+void processSeq(string &sequence, string &line, int line_num) {
+    for (string::iterator it = line.begin(); it != line.end(); it++) {
+        if ((*it) <= ' ') continue;
+        if (isalnum(*it) || (*it) == '-' || (*it) == '?'|| (*it) == '.' || (*it) == '*' || (*it) == '~')
+            sequence.append(1, toupper(*it));
+        else if (*it == '(' || *it == '{') {
+            auto start_it = it;
+            while (*it != ')' && *it != '}' && it != line.end())
+                it++;
+            if (it == line.end())
+                throw "Line " + convertIntToString(line_num) + ": No matching close-bracket ) or } found";
+            sequence.append(1, '?');
+            cout << "NOTE: Line " << line_num << ": " << line.substr(start_it-line.begin(), (it-start_it)+1) << " is treated as unknown character" << endl;
+        } else {
+            throw "Line " + convertIntToString(line_num) + ": Unrecognized character "  + *it;
+        }
+    }
+}
+
 int Alignment::readPhylip(char *filename, char *sequence_type) {
 
     StrVector sequences;
@@ -1441,7 +1474,7 @@ int Alignment::readPhylip(char *filename, char *sequence_type) {
     num_states = 0;
 
     for (; !in.eof(); line_num++) {
-        getline(in, line);
+        safeGetline(in, line);
         line = line.substr(0, line.find_first_of("\n\r"));
         if (line == "") continue;
 
@@ -1477,16 +1510,7 @@ int Alignment::readPhylip(char *filename, char *sequence_type) {
                     sequences[seq_id].append(1, state);
                     if (num_states < state+1) num_states = state+1;
                 }
-            } else
-                for (string::iterator it = line.begin(); it != line.end(); it++) {
-                    if ((*it) <= ' ') continue;
-                    if (isalnum(*it) || (*it) == '-' || (*it) == '?'|| (*it) == '.' || (*it) == '*' || (*it) == '~')
-                        sequences[seq_id].append(1, toupper(*it));
-                    else {
-                        err_str << "Line " << line_num <<": Unrecognized character " << *it;
-                        throw err_str.str();
-                    }
-                }
+            } else processSeq(sequences[seq_id], line, line_num);
             if (sequences[seq_id].length() != sequences[0].length()) {
                 err_str << "Line " << line_num << ": Sequence " << seq_names[seq_id] << " has wrong sequence length " << sequences[seq_id].length() << endl;
                 throw err_str.str();
@@ -1525,7 +1549,7 @@ int Alignment::readPhylipSequential(char *filename, char *sequence_type) {
     num_states = 0;
 
     for (; !in.eof(); line_num++) {
-        getline(in, line);
+        safeGetline(in, line);
         line = line.substr(0, line.find_first_of("\n\r"));
         if (line == "") continue;
 
@@ -1553,15 +1577,7 @@ int Alignment::readPhylipSequential(char *filename, char *sequence_type) {
                 seq_names[seq_id] = line.substr(0, pos);
                 line.erase(0, pos);
             }
-            for (string::iterator it = line.begin(); it != line.end(); it++) {
-                if ((*it) <= ' ') continue;
-                if (isalnum(*it) || (*it) == '-' || (*it) == '?'|| (*it) == '.' || (*it) == '*' || (*it) == '~')
-                    sequences[seq_id].append(1, toupper(*it));
-                else {
-                    err_str << "Line " << line_num <<": Unrecognized character " << *it;
-                    throw err_str.str();
-                }
-            }
+            processSeq(sequences[seq_id], line, line_num);
             if (sequences[seq_id].length() > nsite)
                 throw ("Line " + convertIntToString(line_num) + ": Sequence " + seq_names[seq_id] + " is too long (" + convertIntToString(sequences[seq_id].length()) + ")");
             if (sequences[seq_id].length() == nsite) {
@@ -1593,7 +1609,7 @@ int Alignment::readFasta(char *filename, char *sequence_type) {
     in.exceptions(ios::badbit);
 
     for (; !in.eof(); line_num++) {
-        getline(in, line);
+        safeGetline(in, line);
         if (line == "") continue;
 
         //cout << line << endl;
@@ -1606,15 +1622,7 @@ int Alignment::readFasta(char *filename, char *sequence_type) {
         }
         // read sequence contents
         if (sequences.empty()) throw "First line must begin with '>' to define sequence name";
-        for (string::iterator it = line.begin(); it != line.end(); it++) {
-            if ((*it) <= ' ') continue;
-            if (isalnum(*it) || (*it) == '-' || (*it) == '?'|| (*it) == '.' || (*it) == '*' || (*it) == '~')
-                sequences.back().append(1, toupper(*it));
-            else {
-                err_str << "Line " << line_num <<": Unrecognized character " << *it;
-                throw err_str.str();
-            }
-        }
+        processSeq(sequences.back(), line, line_num);
     }
     in.clear();
     // set the failbit again
@@ -1677,14 +1685,14 @@ int Alignment::readClustal(char *filename, char *sequence_type) {
     in.open(filename);
     // remove the failbit
     in.exceptions(ios::badbit);
-    getline(in, line);
+    safeGetline(in, line);
     if (line.substr(0, 7) != "CLUSTAL") {
         throw "ClustalW file does not start with 'CLUSTAL'";
     }
 
     int seq_count = 0;
     for (line_num = 2; !in.eof(); line_num++) {
-        getline(in, line);
+        safeGetline(in, line);
         trimString(line);
         if (line == "") { 
             seq_count = 0;
@@ -1711,20 +1719,17 @@ int Alignment::readClustal(char *filename, char *sequence_type) {
         pos = line.find_first_of(" \t");
         line = line.substr(0, pos);
         // read sequence contents
-        for (string::iterator it = line.begin(); it != line.end(); it++) {
-            if ((*it) <= ' ') continue;
-            if (isalnum(*it) || (*it) == '-' || (*it) == '?'|| (*it) == '.' || (*it) == '*' || (*it) == '~')
-                sequences[seq_count].append(1, toupper(*it));
-            else {
-                throw "Line " +convertIntToString(line_num) + ": Unrecognized character " + *it;
-            }
-        }
+        processSeq(sequences[seq_count], line, line_num);
         seq_count++;
     }
     in.clear();
     // set the failbit again
     in.exceptions(ios::failbit | ios::badbit);
     in.close();
+
+    if (sequences.empty())
+        throw "No sequences found. Please check input (e.g. newline character)";
+
     return buildPattern(sequences, sequence_type, seq_names.size(), sequences.front().length());
 
 
@@ -1746,7 +1751,7 @@ int Alignment::readMSF(char *filename, char *sequence_type) {
     in.open(filename);
     // remove the failbit
     in.exceptions(ios::badbit);
-    getline(in, line);
+    safeGetline(in, line);
     if (line.find("MULTIPLE_ALIGNMENT") == string::npos) {
         throw "MSF file must start with header line MULTIPLE_ALIGNMENT";
     }
@@ -1755,7 +1760,7 @@ int Alignment::readMSF(char *filename, char *sequence_type) {
     bool seq_started = false;
     
     for (line_num = 2; !in.eof(); line_num++) {
-        getline(in, line);
+        safeGetline(in, line);
         trimString(line);
         if (line == "") { 
             continue;
@@ -1819,16 +1824,7 @@ int Alignment::readMSF(char *filename, char *sequence_type) {
         
         line = line.substr(pos+1);
         // read sequence contents
-        for (string::iterator it = line.begin(); it != line.end(); it++) {
-            if ((*it) <= ' ') continue;
-            if (isalnum(*it) || (*it) == '-' || (*it) == '?'|| (*it) == '.' || (*it) == '*')
-                sequences[seq_count].append(1, toupper(*it));
-            else  if ((*it) == '~')
-                sequences[seq_count].append(1, '-');
-            else {
-                throw "Line " +convertIntToString(line_num) + ": Unrecognized character " + *it;
-            }
-        }
+        processSeq(sequences[seq_count], line, line_num);
         seq_count++;
         if (seq_count == seq_names.size())
             seq_count = 0;
@@ -2013,13 +2009,12 @@ void Alignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_t
     seq_type = aln->seq_type;
     STATE_UNKNOWN = aln->STATE_UNKNOWN;
 	genetic_code = aln->genetic_code;
-//    if (seq_type == SEQ_CODON) {
-//    	codon_table = new char[num_states];
-//    	memcpy(codon_table, aln->codon_table, num_states);
-//    	non_stop_codon = new char[strlen(genetic_code)];
-//    	memcpy(non_stop_codon, aln->non_stop_codon, strlen(genetic_code));
-//
-//    }
+    if (seq_type == SEQ_CODON) {
+    	codon_table = new char[num_states];
+    	memcpy(codon_table, aln->codon_table, num_states);
+    	non_stop_codon = new char[strlen(genetic_code)];
+    	memcpy(non_stop_codon, aln->non_stop_codon, strlen(genetic_code));
+    }
     site_pattern.resize(aln->getNSite(), -1);
     clear();
     pattern_index.clear();
@@ -2062,6 +2057,12 @@ void Alignment::extractPatterns(Alignment *aln, IntVector &ptn_id) {
     seq_type = aln->seq_type;
     STATE_UNKNOWN = aln->STATE_UNKNOWN;
     genetic_code = aln->genetic_code;
+    if (seq_type == SEQ_CODON) {
+    	codon_table = new char[num_states];
+    	memcpy(codon_table, aln->codon_table, num_states);
+    	non_stop_codon = new char[strlen(genetic_code)];
+    	memcpy(non_stop_codon, aln->non_stop_codon, strlen(genetic_code));
+    }
     site_pattern.resize(aln->getNSite(), -1);
     clear();
     pattern_index.clear();
@@ -2091,6 +2092,12 @@ void Alignment::extractPatternFreqs(Alignment *aln, IntVector &ptn_freq) {
     num_states = aln->num_states;
     seq_type = aln->seq_type;
     genetic_code = aln->genetic_code;
+    if (seq_type == SEQ_CODON) {
+    	codon_table = new char[num_states];
+    	memcpy(codon_table, aln->codon_table, num_states);
+    	non_stop_codon = new char[strlen(genetic_code)];
+    	memcpy(non_stop_codon, aln->non_stop_codon, strlen(genetic_code));
+    }
     STATE_UNKNOWN = aln->STATE_UNKNOWN;
     site_pattern.resize(accumulate(ptn_freq.begin(), ptn_freq.end(), 0), -1);
     clear();
@@ -2122,6 +2129,12 @@ void Alignment::extractSites(Alignment *aln, IntVector &site_id) {
     seq_type = aln->seq_type;
     STATE_UNKNOWN = aln->STATE_UNKNOWN;
     genetic_code = aln->genetic_code;
+    if (seq_type == SEQ_CODON) {
+    	codon_table = new char[num_states];
+    	memcpy(codon_table, aln->codon_table, num_states);
+    	non_stop_codon = new char[strlen(genetic_code)];
+    	memcpy(non_stop_codon, aln->non_stop_codon, strlen(genetic_code));
+    }
     site_pattern.resize(site_id.size(), -1);
     clear();
     pattern_index.clear();
@@ -2200,6 +2213,8 @@ void Alignment::convertToCodonOrAA(Alignment *aln, char *gene_code_id, bool nt2a
                     state = STATE_UNKNOWN;
                 } else if (nt2aa) {
                     state = AA_to_state[(int)genetic_code[(int)state]];
+                } else {
+                    state = non_stop_codon[(int)state];
                 }
             } else if (state == STATE_INVALID || state2 == STATE_INVALID || state3 == STATE_INVALID) {
                 state = STATE_INVALID;
@@ -2207,7 +2222,7 @@ void Alignment::convertToCodonOrAA(Alignment *aln, char *gene_code_id, bool nt2a
                 if (state != STATE_UNKNOWN || state2 != STATE_UNKNOWN || state3 != STATE_UNKNOWN) {
                     ostringstream warn_str;
                     warn_str << "Sequence " << seq_names[seq] << " has ambiguous character " <<
-                        " at site " << site+1 << endl;
+                        " at site " << site+1;
                     outWarning(warn_str.str());
                 }
                 state = STATE_UNKNOWN;
@@ -2330,6 +2345,12 @@ void Alignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq
     num_states = aln->num_states;
     seq_type = aln->seq_type;
     genetic_code = aln->genetic_code;
+    if (seq_type == SEQ_CODON) {
+    	codon_table = new char[num_states];
+    	memcpy(codon_table, aln->codon_table, num_states);
+    	non_stop_codon = new char[strlen(genetic_code)];
+    	memcpy(non_stop_codon, aln->non_stop_codon, strlen(genetic_code));
+    }
     STATE_UNKNOWN = aln->STATE_UNKNOWN;
     site_pattern.resize(nsite, -1);
     clear();
@@ -2554,6 +2575,12 @@ void Alignment::createGapMaskedAlignment(Alignment *masked_aln, Alignment *aln)
     num_states = aln->num_states;
     seq_type = aln->seq_type;
     genetic_code = aln->genetic_code;
+    if (seq_type == SEQ_CODON) {
+    	codon_table = new char[num_states];
+    	memcpy(codon_table, aln->codon_table, num_states);
+    	non_stop_codon = new char[strlen(genetic_code)];
+    	memcpy(non_stop_codon, aln->non_stop_codon, strlen(genetic_code));
+    }
     STATE_UNKNOWN = aln->STATE_UNKNOWN;
     site_pattern.resize(nsite, -1);
     clear();
@@ -2617,6 +2644,12 @@ void Alignment::copyAlignment(Alignment *aln) {
     num_states = aln->num_states;
     seq_type = aln->seq_type;
     genetic_code = aln->genetic_code;
+    if (seq_type == SEQ_CODON) {
+    	codon_table = new char[num_states];
+    	memcpy(codon_table, aln->codon_table, num_states);
+    	non_stop_codon = new char[strlen(genetic_code)];
+    	memcpy(non_stop_codon, aln->non_stop_codon, strlen(genetic_code));
+    }
     STATE_UNKNOWN = aln->STATE_UNKNOWN;
     site_pattern.resize(nsite, -1);
     clear();
@@ -2675,14 +2708,14 @@ int Alignment::countProperChar(int seq_id) {
 
 Alignment::~Alignment()
 {
-//	if (codon_table) {
-//		delete [] codon_table;
-//		codon_table = NULL;
-//	}
-//	if (non_stop_codon) {
-//		delete [] non_stop_codon;
-//		non_stop_codon = NULL;
-//	}
+	if (codon_table) {
+		delete [] codon_table;
+		codon_table = NULL;
+	}
+	if (non_stop_codon) {
+		delete [] non_stop_codon;
+		non_stop_codon = NULL;
+	}
     if (pars_lower_bound) {
         delete [] pars_lower_bound;
         pars_lower_bound = NULL;
@@ -2868,15 +2901,20 @@ void Alignment::computeStateFreq (double *state_freq, size_t num_unknown_states)
     
     for (i = 0; i <= STATE_UNKNOWN; i++)
         getAppearance(i, &states_app[i*num_states]);
-        
-    for (iterator it = begin(); it != end(); it++)
+
+    size_t aln_len = 0;
+
+    for (iterator it = begin(); it != end(); it++) {
+        aln_len += it->frequency;
         for (Pattern::iterator it2 = it->begin(); it2 != it->end(); it2++)
             state_count[(int)*it2] += it->frequency;
+    }
             
     for (i = 0; i < num_states; i++)
         state_freq[i] = 1.0/num_states;
         
     const int NUM_TIME = 8;
+    if (aln_len > 0)
     for (int k = 0; k < NUM_TIME; k++) {
         memset(new_state_freq, 0, sizeof(double)*num_states);
         
@@ -3139,8 +3177,8 @@ void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double
 		memset(ntfreq, 0, sizeof(double)*4);
 		for (iterator it = begin(); it != end(); it++) {
 			for (int seq = 0; seq < nseqs; seq++) if ((*it)[seq] != STATE_UNKNOWN) {
-//				int codon = codon_table[(int)(*it)[seq]];
-				int codon = (int)(*it)[seq];
+				int codon = codon_table[(int)(*it)[seq]];
+//				int codon = (int)(*it)[seq];
 				int nt1 = codon / 16;
 				int nt2 = (codon % 16) / 4;
 				int nt3 = codon % 4;
@@ -3161,17 +3199,19 @@ void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double
 		}
 		memcpy(ntfreq+4, ntfreq, sizeof(double)*4);
 		memcpy(ntfreq+8, ntfreq, sizeof(double)*4);
-        double sum_stop=0.0;
         sum = 0.0;
 		for (i = 0; i < num_states; i++) {
-            state_freq[i] = ntfreq[i/16] * ntfreq[(i%16)/4] * ntfreq[i%4];
+            int codon = codon_table[i];
+            state_freq[i] = ntfreq[codon/16] * ntfreq[(codon%16)/4] * ntfreq[codon%4];
 			if (isStopCodon(i)) {
-                sum_stop += state_freq[i];
+//                sum_stop += state_freq[i];
 				state_freq[i] = MIN_FREQUENCY;
-                sum += MIN_FREQUENCY;
-			}
+			} else {
+                sum += state_freq[i];
+            }
         }
-        sum = (1.0-sum)/(1.0-sum_stop);
+//        sum = (1.0-sum)/(1.0-sum_stop);
+        sum = 1.0/sum;
 		for (i = 0; i < num_states; i++)
             if (!isStopCodon(i))
                 state_freq[i] *= sum;
@@ -3184,8 +3224,8 @@ void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double
 		memset(ntfreq, 0, sizeof(double)*12);
 		for (iterator it = begin(); it != end(); it++) {
 			for (int seq = 0; seq < nseqs; seq++) if ((*it)[seq] != STATE_UNKNOWN) {
-//				int codon = codon_table[(int)(*it)[seq]];
-				int codon = (int)(*it)[seq];
+				int codon = codon_table[(int)(*it)[seq]];
+//				int codon = (int)(*it)[seq];
 				int nt1 = codon / 16;
 				int nt2 = (codon % 16) / 4;
 				int nt3 = codon % 4;
@@ -3207,17 +3247,19 @@ void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double
 			}
 		}
         
-        double sum_stop=0.0;
         double sum = 0.0;
 		for (i = 0; i < num_states; i++) {
-            state_freq[i] = ntfreq[i/16] * ntfreq[4+(i%16)/4] * ntfreq[8+i%4];
+            int codon = codon_table[i];
+            state_freq[i] = ntfreq[codon/16] * ntfreq[4+(codon%16)/4] * ntfreq[8+codon%4];
 			if (isStopCodon(i)) {
-                sum_stop += state_freq[i];
+//                sum_stop += state_freq[i];
 				state_freq[i] = MIN_FREQUENCY;
-                sum += MIN_FREQUENCY;
-			}
+			} else {
+                sum += state_freq[i];
+            }
         }
-        sum = (1.0-sum)/(1.0-sum_stop);
+//        sum = (1.0-sum)/(1.0-sum_stop);
+        sum = 1.0 / sum;
 		for (i = 0; i < num_states; i++)
             if (!isStopCodon(i))
                 state_freq[i] *= sum;
@@ -3372,8 +3414,6 @@ void Alignment::convfreq(double *stateFrqArr) {
 		freq = stateFrqArr[i];
 		if (freq < MIN_FREQUENCY) {
 			stateFrqArr[i] = MIN_FREQUENCY;
-			if (!isStopCodon(i))
-				cout << "WARNING: " << convertStateBackStr(i) << " is not present in alignment that may cause numerical problems" << endl;
 		}
 		if (freq > maxfreq) {
 			maxfreq = freq;
diff --git a/alignment.h b/alignment.h
index eee6169..eacd5ad 100644
--- a/alignment.h
+++ b/alignment.h
@@ -310,6 +310,13 @@ public:
      */
     int getMaxSeqNameLength();
 
+    /*
+        check if some state is absent, which may cause numerical issues
+        @param msg additional message into the warning
+        @return number of absent states in the alignment
+    */
+    virtual int checkAbsentStates(string msg);
+
     /**
             check proper and undupplicated sequence names
      */
@@ -610,13 +617,13 @@ public:
 	/**
 	 *  map from 64 codon to non-stop codon index
 	 */
-//    char *non_stop_codon;
+    char *non_stop_codon;
 
 	/**
 	 * For codon sequences: index of 61 non-stop codons to 64 codons
 	 * For other sequences: NULL
 	 */
-//	char *codon_table;
+	char *codon_table;
 
 	/**
 	 * For codon_sequences: 64 amino-acid letters for genetic code of AAA,AAC,AAG,AAT,...,TTT
diff --git a/checkpoint.cpp b/checkpoint.cpp
index 1b26991..2eb511c 100644
--- a/checkpoint.cpp
+++ b/checkpoint.cpp
@@ -38,7 +38,7 @@ void Checkpoint::load(istream &in) {
     size_t pos;
     int listid = 0;
     while (!in.eof()) {
-        getline(in, line);
+        safeGetline(in, line);
         pos = line.find('#');
         if (pos != string::npos)
             line.erase(pos);
@@ -82,7 +82,7 @@ void Checkpoint::load() {
         // remove the failbit
         in.exceptions(ios::badbit);
         string line;
-        if (!getline(in, line)) {
+        if (!safeGetline(in, line)) {
             in.close();
             return;
         }
diff --git a/checkpoint.h b/checkpoint.h
index d7c478a..5cd52e9 100644
--- a/checkpoint.h
+++ b/checkpoint.h
@@ -245,8 +245,7 @@ public:
         else
             key = struct_name + key;
         CkpStream ss;
-        if (typeid(T) == typeid(double))
-            ss.precision(10);
+        ss.precision(10);
         ss << value;
         (*this)[key] = ss.str();
     }
@@ -270,8 +269,7 @@ public:
         else
             key = struct_name + key;
         CkpStream ss;
-        if (typeid(T) == typeid(double))
-            ss.precision(10);
+        ss.precision(10);
         for (int i = 0; i < num; i++) {
             if (i > 0) ss << ", ";
             ss << value[i];
@@ -292,8 +290,7 @@ public:
         else
             key = struct_name + key;
         CkpStream ss;
-        if (typeid(T) == typeid(double))
-            ss.precision(10);
+        ss.precision(10);
         for (int i = 0; i < value.size(); i++) {
             if (i > 0) ss << ", ";
             ss << value[i];
diff --git a/constrainttree.cpp b/constrainttree.cpp
index a0f8076..12ce2d3 100644
--- a/constrainttree.cpp
+++ b/constrainttree.cpp
@@ -15,12 +15,37 @@
 ConstraintTree::ConstraintTree() : MTree(), SplitIntMap() {
 }
 
-void ConstraintTree::initConstraint(const char *constraint_file, StrVector &fulltaxname) {
+void ConstraintTree::readConstraint(const char *constraint_file, StrVector &fulltaxname) {
     bool is_rooted = false;
     MTree::init(constraint_file, is_rooted);
+    initFromTree();
+
+    // check that constraint tree has a subset of taxa
+
+    StrVector taxname;
+    StrVector::iterator it;
+    getTaxaName(taxname);
+
+    StringIntMap fulltax_index;
+    for (it = fulltaxname.begin(); it != fulltaxname.end(); it++)
+        fulltax_index[(*it)] = it - fulltaxname.begin();
+
+    bool err = false;
+        
+    for(it = taxname.begin(); it != taxname.end(); it++)
+        if (fulltax_index.find(*it) == fulltax_index.end()) {
+            cerr << "ERROR: Taxon " << (*it) << " in constraint tree does not appear in full tree" << endl;
+            err = true;
+        }
+    if (err) {
+        outError("Bad constraint tree (see above)");
+    }
+}
+
+void ConstraintTree::initFromTree() {
     if (leafNum <= 3)
         outError("Constraint tree must contain at least 4 taxa");
-    if (is_rooted)
+    if (rooted)
         outError("Rooted constraint tree not accepted");
 
 	// collapse any internal node of degree 2
@@ -60,22 +85,11 @@ void ConstraintTree::initConstraint(const char *constraint_file, StrVector &full
         insertSplit(new Split(**sit), 1);
     }
     
-    // check that constraint tree has a subset of taxa
-    StringIntMap fulltax_index;
-    for (it = fulltaxname.begin(); it != fulltaxname.end(); it++)
-        fulltax_index[(*it)] = it - fulltaxname.begin();
+}
 
-    bool err = false;
-        
-    for(it = taxname.begin(); it != taxname.end(); it++)
-        if (fulltax_index.find(*it) == fulltax_index.end()) {
-            cerr << "ERROR: Taxon " << (*it) << " in constraint tree does not appear in full tree" << endl;
-            err = true;
-        }
-    if (err) {
-        outError("Bad constraint tree (see above)");
-    }
-    
+void ConstraintTree::readConstraint(MTree &src_tree) {
+    copyTree(&src_tree);
+    initFromTree();
 }
 
 
diff --git a/constrainttree.h b/constrainttree.h
index 6749fc6..8b6a53b 100644
--- a/constrainttree.h
+++ b/constrainttree.h
@@ -26,11 +26,22 @@ public:
     ConstraintTree();
 
     /**
+        internal function to initialize splits from tree structure
+    */
+    void initFromTree();
+
+    /**
             initialize constraint tree
             @param constraint_file the name of the constraint tree file
             @param fulltaxname the full list of all taxa names
      */
-    void initConstraint(const char *constraint_file, StrVector &fulltaxname);
+    void readConstraint(const char *constraint_file, StrVector &fulltaxname);
+
+    /**
+        initialize from another constraint tree
+        @param src source constraint tree
+    */
+    void readConstraint(MTree &src_tree);
 
     /** 
         check if a "partial" split defined by two taxa name sets is compatible with the constraint tree.
@@ -78,4 +89,4 @@ protected:
 
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/iqtree.cpp b/iqtree.cpp
index e688afc..65a8493 100644
--- a/iqtree.cpp
+++ b/iqtree.cpp
@@ -242,7 +242,7 @@ void IQTree::initSettings(Params &params) {
 		params.numInitTrees = 1;
     }
     if (params.gbo_replicates)
-        params.max_iterations = max(params.max_iterations, max(params.min_iterations, 1000));
+        params.max_iterations = max(params.max_iterations, max(params.min_iterations, params.step_iterations));
 
     k_represent = params.k_representative;
 
@@ -457,7 +457,7 @@ void IQTree::createPLLPartition(Params &params, ostream &pllPartitionFileHandle)
                 if ((*it)->aln->seq_type == SEQ_DNA) {
                     pllPartitionFileHandle << "DNA";
                 } else if ((*it)->aln->seq_type == SEQ_PROTEIN) {
-                    if (siqtree->part_info[i-1].model_name != "" && siqtree->part_info[i-1].model_name.substr(0, 4) != "TEST") {
+                    if (siqtree->part_info[i-1].model_name != "" && siqtree->part_info[i-1].model_name.substr(0, 4) != "TEST" && siqtree->part_info[i-1].model_name.substr(0, 2) != "MF") {
                         string modelStr = siqtree->part_info[i - 1].model_name.
                                 substr(0, siqtree->part_info[i - 1].model_name.find_first_of("+{"));
                         if (modelStr == "LG4")
@@ -517,7 +517,7 @@ void IQTree::createPLLPartition(Params &params, ostream &pllPartitionFileHandle)
         if (aln->seq_type == SEQ_DNA) {
             model = "DNA";
         } else if (aln->seq_type == SEQ_PROTEIN) {
-        	if (params.pll && params.model_name != "" && params.model_name.substr(0, 4) != "TEST") {
+        	if (params.pll && params.model_name != "" && params.model_name.substr(0, 4) != "TEST" && params.model_name.substr(0, 2) != "MF") {
         		model = params.model_name.substr(0, params.model_name.find_first_of("+{"));
         	} else {
         		model = "WAG";
@@ -565,12 +565,7 @@ void IQTree::computeInitialTree(string &dist_file, LikelihoodKernel kernel) {
         switch (start_tree) {
         case STT_PARSIMONY:
             // Create parsimony tree using IQ-Tree kernel
-            if (kernel == LK_EIGEN_SSE)
-                cout << "Creating fast SIMD initial parsimony tree by random order stepwise addition..." << endl;
-            else if (kernel == LK_EIGEN)
-                cout << "Creating fast initial parsimony tree by random order stepwise addition..." << endl;
-            else
-                cout << "Creating initial parsimony tree by random order stepwise addition..." << endl;
+            cout << "Creating fast initial parsimony tree by random order stepwise addition..." << endl;
 //            aln->orderPatternByNumChars();
             start = getRealTime();
             score = computeParsimonyTree(params->out_prefix, aln);
@@ -664,6 +659,7 @@ void IQTree::initCandidateTreeSet(int nParTrees, int nNNITrees) {
     }
     double startTime = getRealTime();
 
+/* TODO: this does not work properly with partition model
 #ifdef _OPENMP
     StrVector pars_trees;
     if (params->start_tree == STT_PARSIMONY && nParTrees >= 1) {
@@ -673,8 +669,8 @@ void IQTree::initCandidateTreeSet(int nParTrees, int nNNITrees) {
         #pragma omp parallel
         {
             PhyloTree tree;
-            if (params->constraint_tree_file) {
-                tree.constraintTree.initConstraint(params->constraint_tree_file, aln->getSeqNames());
+            if (!constraintTree.empty()) {
+                tree.constraintTree.readConstraint(constraintTree);
             }
             tree.setParams(params);
             tree.setParsimonyKernel(params->SSE);
@@ -686,6 +682,7 @@ void IQTree::initCandidateTreeSet(int nParTrees, int nNNITrees) {
         }
     }
 #endif
+*/
 
     int init_size = candidateTrees.size();
 
@@ -713,6 +710,10 @@ void IQTree::initCandidateTreeSet(int nParTrees, int nNNITrees) {
 			curParsTree = getTreeString();
         } else if (params->start_tree == STT_PARSIMONY) {
             /********* Create parsimony tree using IQ-TREE *********/
+            computeParsimonyTree(NULL, aln);
+            curParsTree = getTreeString();
+
+/* TODO: this does not work properly with partition model
 #ifdef _OPENMP
             if (params->start_tree == STT_PARSIMONY)
                 curParsTree = pars_trees[treeNr-1];
@@ -721,6 +722,7 @@ void IQTree::initCandidateTreeSet(int nParTrees, int nNNITrees) {
 #else
             curParsTree = generateParsimonyTree(parRandSeed);
 #endif
+*/
         }
         
         int pos = addTreeToCandidateSet(curParsTree, -DBL_MAX, false, MPIHelper::getInstance().getProcessID());
@@ -3811,6 +3813,15 @@ void IQTree::sendStopMessage() {
 #endif
 }
 
+void PhyloTree::warnNumThreads() {
+    if (num_threads <= 1)
+        return;
+    size_t nptn = getAlnNPattern();
+    if (nptn < num_threads*vector_size)
+        outError("Too many threads for short alignments, please reduce number of threads or use -nt AUTO to determine it.");
+    if (nptn < num_threads*1000/aln->num_states)
+        outWarning("Number of threads seems too high for short alignments. Use -nt AUTO to determine best number of threads.");
+}
 
 int PhyloTree::testNumThreads() {
 #ifndef _OPENMP
diff --git a/mexttree.cpp b/mexttree.cpp
index 29f5789..7addbf3 100644
--- a/mexttree.cpp
+++ b/mexttree.cpp
@@ -72,16 +72,16 @@ void MExtTree::setZeroInternalBranches(int num_zero_len) {
 	}
 }
 
-void MExtTree::collapseZeroBranches(Node *node, Node *dad) {
+void MExtTree::collapseZeroBranches(Node *node, Node *dad, double threshold) {
 	if (!node) node = root;
 	FOR_NEIGHBOR_DECLARE(node, dad, it) {
-		collapseZeroBranches((*it)->node, node);
+		collapseZeroBranches((*it)->node, node, threshold);
 	}
 	NeighborVec nei_vec;
 	nei_vec.insert(nei_vec.begin(), node->neighbors.begin(), node->neighbors.end());
 	for (it = nei_vec.begin(); it != nei_vec.end(); it++) 
 	if ((*it)->node != dad) {
-		if ((*it)->length == 0.0) { // delete the child node
+		if ((*it)->length <= threshold) { // delete the child node
 			Node *child = (*it)->node;
 			bool first = true;
 			FOR_NEIGHBOR_IT(child, node, it2) {
@@ -581,8 +581,8 @@ void MExtTree::collapseLowBranchSupport(DoubleVector &minsup, Node *node, Node *
         for (int i = 0; i < vec.size(); i++)
             if (vec[i] < minsup[i]) {
                 // support smaller than threshold, mark this branch for deletion
-                dad->findNeighbor(node)->length = 0.0;
-                node->findNeighbor(dad)->length = 0.0;
+                dad->findNeighbor(node)->length = -1.0;
+                node->findNeighbor(dad)->length = -1.0;
                 break;
             }
     }
diff --git a/mexttree.h b/mexttree.h
index bf4f72d..8c8b45c 100644
--- a/mexttree.h
+++ b/mexttree.h
@@ -124,7 +124,7 @@ public:
 
 	void setZeroInternalBranches(int num_zero_len);
 
-	void collapseZeroBranches(Node *node = NULL, Node *dad = NULL);
+	void collapseZeroBranches(Node *node = NULL, Node *dad = NULL, double threshold = 0.0);
 
 /********************************************************
 	BOOTSTRAP
diff --git a/model/modelcodon.cpp b/model/modelcodon.cpp
index 4e8a91d..7de95fa 100644
--- a/model/modelcodon.cpp
+++ b/model/modelcodon.cpp
@@ -491,21 +491,23 @@ void ModelCodon::computeRateAttributes() {
             int nuc1, nuc2;
             int attr = 0;
             ts = tv = 0;
-            if (phylo_tree->aln->genetic_code[i] == phylo_tree->aln->genetic_code[j])
+            int codoni = phylo_tree->aln->codon_table[i];
+            int codonj = phylo_tree->aln->codon_table[j];
+            if (phylo_tree->aln->genetic_code[codoni] == phylo_tree->aln->genetic_code[codonj])
                 attr |= CA_SYNONYMOUS;
             else
                 attr |= CA_NONSYNONYMOUS;
                 
                 
-            int nt_changes = ((i/16) != (j/16)) + (((i%16)/4) != ((j%16)/4)) + ((i%4) != (j%4));
-            int aa1 = strchr(symbols_protein, phylo_tree->aln->genetic_code[i]) - symbols_protein;
-            int aa2 = strchr(symbols_protein, phylo_tree->aln->genetic_code[j]) - symbols_protein;
+            int nt_changes = ((codoni/16) != (codonj/16)) + (((codoni%16)/4) != ((codonj%16)/4)) + ((codoni%4) != (codonj%4));
+            int aa1 = strchr(symbols_protein, phylo_tree->aln->genetic_code[codoni]) - symbols_protein;
+            int aa2 = strchr(symbols_protein, phylo_tree->aln->genetic_code[codonj]) - symbols_protein;
             assert(aa1 >= 0 && aa1 < 20 && aa2 >= 0 && aa2 < 20);
             if (nt_changes < aa_cost_change[aa1*20+aa2]) {
                 aa_cost_change[aa1*20+aa2] = aa_cost_change[aa2*20+aa1] = nt_changes;
             }
             
-            if ((nuc1=i/16) != (nuc2=j/16)) {
+            if ((nuc1=codoni/16) != (nuc2=codonj/16)) {
                 if (abs(nuc1-nuc2)==2) { // transition 
                     attr |= CA_TRANSITION_1NT;
                     ts++;
@@ -514,7 +516,7 @@ void ModelCodon::computeRateAttributes() {
                     tv++;
                 }
             }
-            if ((nuc1=(i%16)/4) != (nuc2=(j%16)/4)) {
+            if ((nuc1=(codoni%16)/4) != (nuc2=(codonj%16)/4)) {
                 if (abs(nuc1-nuc2)==2) { // transition
                     attr |= CA_TRANSITION_2NT;
                     ts++;
@@ -523,7 +525,7 @@ void ModelCodon::computeRateAttributes() {
                     tv++;
                 }
             }
-            if ((nuc1=i%4) != (nuc2=j%4)) {
+            if ((nuc1=codoni%4) != (nuc2=codonj%4)) {
                 if (abs(nuc1-nuc2)==2) { // transition
                     attr |= CA_TRANSITION_3NT;
                     ts++;
@@ -579,15 +581,18 @@ void ModelCodon::combineRateNTFreq() {
         for (j = 0; j < num_states; j++)  {
             if (this_rate[j] == 0.0)
                 continue;
+
+            int codoni = phylo_tree->aln->codon_table[i];
+            int codonj = phylo_tree->aln->codon_table[j];
             int nuc1, nuc2;
                 
-            if ((nuc1=i/16) != (nuc2=j/16)) {
+            if ((nuc1=codoni/16) != (nuc2=codonj/16)) {
                 this_rate[j] *= ntfreq[nuc2];
             }
-            if ((nuc1=(i%16)/4) != (nuc2=(j%16)/4)) {
+            if ((nuc1=(codoni%16)/4) != (nuc2=(codonj%16)/4)) {
                 this_rate[j] *= ntfreq[nuc2+4];
             }
-            if ((nuc1=i%4) != (nuc2=j%4)) {
+            if ((nuc1=codoni%4) != (nuc2=codonj%4)) {
                 this_rate[j] *= ntfreq[nuc2+8];
             }
         }
@@ -627,8 +632,8 @@ void ModelCodon::readCodonModel(istream &in, bool reset_params) {
 		int nt3 = phylo_tree->aln->convertState(codons[i][2], SEQ_DNA);
 		if (nt1 > 3 || nt2 > 3 || nt3 > 3)
 			outError("Wrong codon triplet ", codons[i]);
-		state_map[i] = nt1*16+nt2*4+nt3;
-		if (phylo_tree->aln->isStopCodon(state_map[i]))
+		state_map[i] = phylo_tree->aln->non_stop_codon[nt1*16+nt2*4+nt3];
+		if (phylo_tree->aln->isStopCodon(state_map[i]) || state_map[i] == STATE_INVALID)
 			outError("Stop codon encountered");
 		if (verbose_mode >= VB_MAX)
 			cout << " " << codons[i] << " " << state_map[i];
@@ -977,9 +982,9 @@ double ModelCodon::optimizeParameters(double gradient_epsilon) {
 	// by BFGS algorithm
 	setVariables(variables);
 	setBounds(lower_bound, upper_bound, bound_check);
-//    if (phylo_tree->params->optimize_alg.find("BFGS-B") == string::npos)
-//        score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(gradient_epsilon, TOL_RATE));
-//    else
+    if (phylo_tree->params->optimize_alg.find("BFGS-B") == string::npos)
+        score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(gradient_epsilon, TOL_RATE));
+    else
         score = -L_BFGS_B(ndim, variables+1, lower_bound+1, upper_bound+1, max(gradient_epsilon, TOL_RATE));
 
 	bool changed = getVariables(variables);
diff --git a/model/modelfactory.cpp b/model/modelfactory.cpp
index ae6a2e6..5def205 100644
--- a/model/modelfactory.cpp
+++ b/model/modelfactory.cpp
@@ -769,21 +769,20 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info
         }
     } else {
         // Now perform testing different initial p_inv values
+        if (write_info)
+            cout << "Thoroughly optimizing +I+G parameters from 10 start values..." << endl;
         while (initPInv <= frac_const) {
-            if (write_info) {
-                cout << endl;
-                cout << "Testing with init. pinv = " << initPInv << " / init. alpha = "  << initAlpha << endl;
-            }
             vector<double> estResults; // vector of p_inv, alpha and logl
             if (Params::getInstance().opt_gammai_keep_bran)
                 estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon,
-                                                                          initPInv, initAlpha, bestLens, model_ckp);
+                    initPInv, initAlpha, bestLens, model_ckp);
             else
                 estResults = optimizeGammaInvWithInitValue(fixed_len, logl_epsilon, gradient_epsilon,
-                                                                      initPInv, initAlpha, initBranLens, model_ckp);
+                    initPInv, initAlpha, initBranLens, model_ckp);
             if (write_info) {
-                cout << "Est. p_inv: " << estResults[0] << " / Est. gamma shape: " << estResults[1]
-                << " / Logl: " << estResults[2] << endl;
+                cout << "Init pinv, alpha: " << initPInv << ", "  << initAlpha
+                     << " / Estimate: " << estResults[0] << ", " << estResults[1]
+                     << " / LogL: " << estResults[2] << endl;
             }
 
             initPInv = initPInv + testInterval;
@@ -818,9 +817,8 @@ double ModelFactory::optimizeParametersGammaInvar(int fixed_len, bool write_info
 	tree->clearAllPartialLH();
 	tree->setCurScore(tree->computeLikelihood());
     if (write_info) {
-        cout << endl;
-        cout << "Best p_inv: " << bestPInvar << " / best gamma shape: " << bestAlpha << " / ";
-        cout << "Logl: " << tree->getCurScore() << endl;
+        cout << "Optimal pinv,alpha: " << bestPInvar << ", " << bestAlpha << " / ";
+        cout << "LogL: " << tree->getCurScore() << endl << endl;
     }
     assert(fabs(tree->getCurScore() - bestLogl) < 1.0);
 
diff --git a/model/modelfactory.h b/model/modelfactory.h
index 9cdabf3..e8e8ff9 100644
--- a/model/modelfactory.h
+++ b/model/modelfactory.h
@@ -163,7 +163,7 @@ public:
 		@return the best likelihood 
 	*/
 	virtual double optimizeParameters(int fixed_len = BRLEN_OPTIMIZE, bool write_info = true,
-                                      double logl_epsilon = 0.1, double gradient_epsilon = 0.001);
+                                      double logl_epsilon = 0.1, double gradient_epsilon = 0.0001);
 
 	/**
 	 *  optimize model parameters and tree branch lengths for the +I+G model
@@ -172,7 +172,7 @@ public:
 	 *	@return the best likelihood
 	 */
 	virtual double optimizeParametersGammaInvar(int fixed_len = BRLEN_OPTIMIZE, bool write_info = true,
-												double logl_epsilon = 0.1, double gradient_epsilon = 0.001);
+												double logl_epsilon = 0.1, double gradient_epsilon = 0.0001);
 
 	/**
 	 * @return TRUE if parameters are at the boundary that may cause numerical unstability
diff --git a/ngs.cpp b/ngs.cpp
index 7fb98c9..77cc616 100644
--- a/ngs.cpp
+++ b/ngs.cpp
@@ -410,7 +410,7 @@ NGSTree::NGSTree(Params &params, NGSAlignment *alignment) {
     model_factory = NULL;
     optimize_by_newton = params.optimize_by_newton;
     //tree.sse = params.SSE;
-    setLikelihoodKernel(LK_EIGEN, params.num_threads);
+    setLikelihoodKernel(LK_386, params.num_threads);
 }
 
 double NGSTree::computeLikelihood(double *pattern_lh) {
diff --git a/pda.cpp b/pda.cpp
index 639e22f..a44579f 100644
--- a/pda.cpp
+++ b/pda.cpp
@@ -235,7 +235,8 @@ void printCopyright(ostream &out) {
 #endif
 
 #ifdef IQ_TREE
-	out << endl << "Copyright (c) 2011-2016 Nguyen Lam Tung, Olga Chernomor, Arndt von Haeseler and Bui Quang Minh." << endl << endl;
+	out << endl << "Copyright (c) 2011-2017 by Bui Quang Minh, Nguyen Lam Tung,"
+        << endl << "Olga Chernomor, Heiko Schmidt, and Arndt von Haeseler." << endl << endl;
 #else
 	out << endl << "Copyright (c) 2006-2014 Olga Chernomor, Arndt von Haeseler and Bui Quang Minh." << endl << endl;
 #endif
@@ -2155,7 +2156,7 @@ void collapseLowBranchSupport(char *user_file, char *split_threshold_str) {
     bool isrooted = false;
     tree.readTree(user_file, isrooted);
     tree.collapseLowBranchSupport(minsup);
-    tree.collapseZeroBranches();
+    tree.collapseZeroBranches(NULL, NULL, -1.0);
     if (verbose_mode >= VB_MED)
         tree.drawTree(cout);
     string outfile = (string)user_file + ".collapsed";
@@ -2180,22 +2181,24 @@ int main(){
 }
 */
 
-/*
-Instruction set ID reported by vectorclass::instrset_detect
-0           = 80386 instruction set
-1  or above = SSE (XMM) supported by CPU (not testing for O.S. support)
-2  or above = SSE2
-3  or above = SSE3
-4  or above = Supplementary SSE3 (SSSE3)
-5  or above = SSE4.1
-6  or above = SSE4.2
-7  or above = AVX supported by CPU and operating system
-8  or above = AVX2
-9  or above = AVX512F
-*/
-int instruction_set;
 
 int main(int argc, char *argv[]) {
+
+    /*
+    Instruction set ID reported by vectorclass::instrset_detect
+    0           = 80386 instruction set
+    1  or above = SSE (XMM) supported by CPU (not testing for O.S. support)
+    2  or above = SSE2
+    3  or above = SSE3
+    4  or above = Supplementary SSE3 (SSSE3)
+    5  or above = SSE4.1
+    6  or above = SSE4.2
+    7  or above = AVX supported by CPU and operating system
+    8  or above = AVX2
+    9  or above = AVX512F
+    */
+    int instruction_set;
+
 #ifdef _IQTREE_MPI
 	double time_initial, time_current;
 	int n_tasks, task_id;
@@ -2345,11 +2348,10 @@ int main(int argc, char *argv[]) {
 
 	instruction_set = instrset_detect();
 #if defined(BINARY32) || defined(__NOAVX__)
-    instruction_set = min(instruction_set, 6);
+    instruction_set = min(instruction_set, (int)LK_SSE42);
 #endif
-	if (instruction_set < 3) outError("Your CPU does not support SSE3!");
-	bool has_fma3 = (instruction_set >= 7) && hasFMA3();
-//	bool has_fma4 = (instruction_set >= 7) && hasFMA4();
+	if (instruction_set < LK_SSE2) outError("Your CPU does not support SSE2!");
+	bool has_fma3 = (instruction_set >= LK_AVX) && hasFMA3();
 
 #ifdef __FMA__
 	bool has_fma =  has_fma3;
@@ -2360,7 +2362,7 @@ int main(int argc, char *argv[]) {
 
 	cout << "Host:    " << hostname << " (";
 	switch (instruction_set) {
-	case 0: cout << "80386, "; break;
+	case 0: cout << "x86, "; break;
 	case 1: cout << "SSE, "; break;
 	case 2: cout << "SSE2, "; break;
 	case 3: cout << "SSE3, "; break;
@@ -2392,8 +2394,11 @@ int main(int argc, char *argv[]) {
 	time(&start_time);
 	cout << "Time:    " << ctime(&start_time);
 
-	if (Params::getInstance().lk_no_avx == 1)
-		instruction_set = min(instruction_set, 6);
+    // increase instruction set level with FMA
+    if (has_fma3 && instruction_set < LK_AVX_FMA)
+        instruction_set = LK_AVX_FMA;
+
+    Params::getInstance().SSE = min(Params::getInstance().SSE, (LikelihoodKernel)instruction_set);
 
 	cout << "Kernel:  ";
 
@@ -2408,23 +2413,16 @@ int main(int argc, char *argv[]) {
 		cout << "PLL-SSE3";
 #endif
 	} else {
-        bool has_fma = (has_fma3) && (instruction_set >= 7) && Params::getInstance().lk_no_avx != 2;
-		switch (Params::getInstance().SSE) {
-		case LK_EIGEN: cout << "No SSE"; break;
-		case LK_EIGEN_SSE:
-            if (has_fma) {
-                cout << "AVX+FMA";
-            } else if (instruction_set >= 7) {
-				cout << "AVX";
-			} else {
-				cout << "SSE3";
-			}
-
-#ifdef __FMA__
-			cout << "+FMA";
-#endif
-			break;
-		}
+        if (Params::getInstance().SSE >= LK_AVX512)
+            cout << "AVX-512";
+        else if (Params::getInstance().SSE >= LK_AVX_FMA) {
+            cout << "AVX+FMA";
+        } else if (Params::getInstance().SSE >= LK_AVX) {
+            cout << "AVX";
+        } else if (Params::getInstance().SSE >= LK_SSE2) {
+            cout << "SSE2";
+        } else
+            cout << "x86";
 	}
 
 #ifdef _OPENMP
@@ -2442,8 +2440,8 @@ int main(int argc, char *argv[]) {
     if (Params::getInstance().num_threads > 0)
         cout << Params::getInstance().num_threads  << " threads";
     else
-        cout << "auto-detect";
-    cout << "(" << max_procs << " CPU cores detected)";
+        cout << "auto-detect threads";
+    cout << " (" << max_procs << " CPU cores detected)";
 	if (Params::getInstance().num_threads  > max_procs) {
 		cout << endl;
 		outError("You have specified more threads than CPU cores available");
diff --git a/phyloanalysis.cpp b/phyloanalysis.cpp
index 1e71528..4891494 100644
--- a/phyloanalysis.cpp
+++ b/phyloanalysis.cpp
@@ -58,22 +58,42 @@
 
 
 void reportReferences(Params &params, ofstream &out, string &original_model) {
+    bool modelfinder_only = false;
+    if (original_model.substr(0,4) == "TEST" || original_model.substr(0, 2) == "MF" || original_model.empty()) {
+        out << "To cite ModelFinder please use: " << endl << endl
+            << "Subha Kalyaanamoorthy, Bui Quang Minh, Thomas KF Wong, Arndt von Haeseler," << endl
+            << "and Lars S Jermiin (2017) ModelFinder: Fast model selection for" << endl
+            << "accurate phylogenetic estimates. Nature Methods, 14:587–589." << endl
+            << "http://dx.doi.org/10.1038/nmeth.4285" << endl << endl;
+        if (original_model.find("ONLY") != string::npos || (original_model.substr(0,2)=="MF" && original_model.substr(0,3)!="MFP"))
+            modelfinder_only = true;
+    }
+
+    if (!modelfinder_only)
 	out << "To cite IQ-TREE please use:" << endl << endl
-		<< "Lam-Tung Nguyen, Heiko A. Schmidt, Arndt von Haeseler, and Bui Quang Minh (2015)" << endl
-		<< "IQ-TREE: A fast and effective stochastic algorithm for estimating" << endl
-		<< "maximum likelihood phylogenies. Mol. Biol. Evol., 32:268-274." << endl
+		<< "Lam-Tung Nguyen, Heiko A. Schmidt, Arndt von Haeseler, and Bui Quang Minh" << endl
+		<< "(2015) IQ-TREE: A fast and effective stochastic algorithm for estimating" << endl
+		<< "maximum likelihood phylogenies. Mol Biol Evol, 32:268-274." << endl
         << "http://dx.doi.org/10.1093/molbev/msu300" << endl << endl;
 
+    if (params.site_freq_file || params.tree_freq_file)
+    out << "Since you used site-specific frequency model please also cite: " << endl << endl
+        << "Huai-Chun Wang, Edward Susko, Bui Quang Minh, and Andrew J. Roger (2017)" << endl
+        << "Modeling site heterogeneity with posterior mean site frequency profiles" << endl
+        << "accelerates accurate phylogenomic estimation. Submitted." << endl << endl;
+
+
 	if (params.gbo_replicates)
 	out << "Since you used ultrafast bootstrap (UFBoot) please also cite: " << endl << endl
-		<< "Bui Quang Minh, Minh Anh Thi Nguyen, and Arndt von Haeseler (2013) Ultrafast" << endl
-		<< "approximation for phylogenetic bootstrap. Mol. Biol. Evol., 30:1188-1195." << endl
+		<< "Bui Quang Minh, Minh Anh Thi Nguyen, and Arndt von Haeseler (2013)" << endl
+		<< "Ultrafast approximation for phylogenetic bootstrap. Mol Biol Evol, 30:1188-95." << endl
         << "http://dx.doi.org/10.1093/molbev/mst024" << endl << endl;
 
     if (params.partition_file) 
     out << "Since you used partition models please also cite:" << endl << endl
-        << "Olga Chernomor, Arndt von Haeseler, and Bui Quang Minh (2016) Terrace aware data" << endl
-        << "structure for phylogenomic inference from supermatrices. Syst. Biol., in press." << endl
+        << "Olga Chernomor, Arndt von Haeseler, and Bui Quang Minh (2016)" << endl
+        << "Terrace aware data structure for phylogenomic inference from" << endl
+        << "supermatrices. Syst Biol, 65:997-1008." << endl
         << "http://dx.doi.org/10.1093/sysbio/syw037" << endl << endl;
 
 }
@@ -574,10 +594,10 @@ void printOutfilesInfo(Params &params, string &original_model, IQTree &tree) {
 			<< endl;
 	if (params.compute_ml_tree) {
         if (!(params.suppress_output_flags & OUT_TREEFILE)) {
-            if (original_model.find("ONLY") == string::npos)
-                cout << "  Maximum-likelihood tree:       " << params.out_prefix << ".treefile" << endl;
+            if (original_model.find("ONLY") != string::npos || (original_model.substr(0,2)=="MF" && original_model.substr(0,3)!="MFP"))
+                cout << "  Tree used for ModelFinder:     " << params.out_prefix << ".treefile" << endl;
             else
-                cout << "  Tree used for model selection: " << params.out_prefix << ".treefile" << endl;
+                cout << "  Maximum-likelihood tree:       " << params.out_prefix << ".treefile" << endl;
         }
 //		if (params.snni && params.write_local_optimal_trees) {
 //			cout << "  Locally optimal trees (" << tree.candidateTrees.getNumLocalOptTrees() << "):    " << params.out_prefix << ".suboptimal_trees" << endl;
@@ -596,7 +616,7 @@ void printOutfilesInfo(Params &params, string &original_model, IQTree &tree) {
 		cout << "  Concatenated alignment:        " << params.out_prefix
 					<< ".conaln" << endl;
 	}
-	if (original_model.find("TEST") != string::npos && tree.isSuperTree()) {
+	if ((original_model.find("TEST") != string::npos || original_model.substr(0,2) == "MF") && tree.isSuperTree()) {
 		cout << "  Best partitioning scheme:      " << params.out_prefix << ".best_scheme.nex" << endl;
 		bool raxml_format_printed = true;
 
@@ -717,20 +737,22 @@ void reportPhyloAnalysis(Params &params, string &original_model,
 		if (params.user_file)
 			out << "User tree file name: " << params.user_file << endl;
 		out << "Type of analysis: ";
-        if (original_model.find("TEST") != string::npos && original_model.find("ONLY") != string::npos) {
-            out << "model selection";
-        } else {
+        bool modelfinder = original_model.substr(0,4)=="TEST" || original_model.substr(0,2) == "MF" || original_model.empty();
+        if (modelfinder)
+            out << "ModelFinder";
+        if (params.compute_ml_tree) {
+            if (modelfinder)
+                out << " + ";
+            out << "tree reconstruction";
+        }
+        if (params.num_bootstrap_samples > 0) {
             if (params.compute_ml_tree)
-                out << "tree reconstruction";
-            if (params.num_bootstrap_samples > 0) {
-                if (params.compute_ml_tree)
-                    out << " + ";
-                out << "non-parametric bootstrap (" << params.num_bootstrap_samples
-                        << " replicates)";
-            }
-            if (params.gbo_replicates > 0) {
-                out << " + ultrafast bootstrap (" << params.gbo_replicates << " replicates)";
-            }
+                out << " + ";
+            out << "non-parametric bootstrap (" << params.num_bootstrap_samples
+                    << " replicates)";
+        }
+        if (params.gbo_replicates > 0) {
+            out << " + ultrafast bootstrap (" << params.gbo_replicates << " replicates)";
         }
 		out << endl;
 		out << "Random seed number: " << params.ran_seed << endl << endl;
@@ -785,7 +807,7 @@ void reportPhyloAnalysis(Params &params, string &original_model,
 		out << fixed;
 
 		if (!model_info.empty()) {
-			out << "MODEL SELECTION" << endl << "---------------" << endl << endl;
+			out << "ModelFinder" << endl << "-----------" << endl << endl;
 			if (tree.isSuperTree())
 				pruneModelInfo(model_info, (PhyloSuperTree*)&tree);
 			reportModelSelection(out, params, model_info, tree.isSuperTree());
@@ -889,9 +911,9 @@ void reportPhyloAnalysis(Params &params, string &original_model,
 				<< endl;
 */
 		if (params.compute_ml_tree) {
-			if (original_model.find("ONLY") != string::npos) {
-				out << "TREE USED FOR MODEL SELECTION" << endl
-					<< "-----------------------------" << endl << endl;
+			if (original_model.find("ONLY") != string::npos || (original_model.substr(0,2) == "MF" && original_model.substr(0,3) != "MFP")) {
+				out << "TREE USED FOR ModelFinder" << endl
+					<< "-------------------------" << endl << endl;
             } else if (params.min_iterations == 0) {
                 if (params.user_file)
                     out << "USER TREE" << endl
@@ -1313,16 +1335,29 @@ void computeInitialDist(Params &params, IQTree &iqtree, string &dist_file) {
 
 void initializeParams(Params &params, IQTree &iqtree, vector<ModelInfo> &model_info, ModelsBlock *models_block) {
 //    iqtree.setCurScore(-DBL_MAX);
-    bool test_only = params.model_name.find("ONLY") != string::npos;
+    bool test_only = (params.model_name.find("ONLY") != string::npos) || (params.model_name.substr(0,2) == "MF" && params.model_name.substr(0,3) != "MFP");
+
+    bool empty_model_found = params.model_name.empty() && !iqtree.isSuperTree();
+
+    if (params.model_name.empty() && iqtree.isSuperTree()) {
+        // check whether any partition has empty model_name
+        PhyloSuperTree *stree = (PhyloSuperTree*)&iqtree;
+        for (auto i = stree->part_info.begin(); i != stree->part_info.end(); i++)
+            if (i->model_name.empty()) {
+                empty_model_found = true;
+                break;
+            }
+    }
+
     /* initialize substitution model */
-    if (params.model_name.substr(0, 4) == "TEST") {
+    if (empty_model_found || params.model_name.substr(0, 4) == "TEST" || params.model_name.substr(0, 2) == "MF") {
         if (MPIHelper::getInstance().getNumProcesses() > 1)
             outError("Please use only 1 MPI process! We are currently working on the MPI parallelization of model selection.");
     	// TODO: check if necessary
 //        if (iqtree.isSuperTree())
 //            ((PhyloSuperTree*) &iqtree)->mapTrees();
-        double start_cpu_time = getCPUTime();
-        double start_real_time = getRealTime();
+        double cpu_time = getCPUTime();
+        double real_time = getRealTime();
         ofstream fmodel;
         string fmodel_str = ((string)params.out_prefix + ".model"); 
 
@@ -1331,9 +1366,11 @@ void initializeParams(Params &params, IQTree &iqtree, vector<ModelInfo> &model_i
             ok_model_file = checkModelFile(fmodel_str, iqtree.isSuperTree(), model_info);
         }
 
+        cout << endl;
+
         ok_model_file &= model_info.size() > 0;
         if (ok_model_file) {
-            cout << "Reusing information from model file " << fmodel_str << endl;
+            cout << "NOTE: Reusing information from model file " << fmodel_str << endl;
             fmodel.open(fmodel_str.c_str(), ios::app);
             if (!fmodel.is_open())
                 outError("cannot append to file ", fmodel_str);            
@@ -1360,9 +1397,15 @@ void initializeParams(Params &params, IQTree &iqtree, vector<ModelInfo> &model_i
 
         params.model_name = testModel(params, &iqtree, model_info, fmodel, models_block, params.num_threads, "", true);
         fmodel.close();
-        params.startCPUTime = start_cpu_time;
-        params.start_real_time = start_real_time;
-        cout << "CPU time for model selection: " << getCPUTime() - start_cpu_time << " seconds." << endl;
+        params.startCPUTime = cpu_time;
+        params.start_real_time = real_time;
+        cpu_time = getCPUTime() - cpu_time;
+        real_time = getRealTime() - real_time;
+        cout << endl;
+        cout << "All model information printed to " << params.out_prefix << ".model" << endl;
+        cout << "CPU time for ModelFinder: " << cpu_time << " seconds (" << convert_time(cpu_time) << ")" << endl;
+        cout << "Wall-clock time for ModelFinder: " << real_time << " seconds (" << convert_time(real_time) << ")" << endl;
+
 //        alignment = iqtree.aln;
         if (test_only) {
             params.min_iterations = 0;
@@ -1710,6 +1753,20 @@ void runTreeReconstruction(Params &params, string &original_model, IQTree &iqtre
     params.startCPUTime = getCPUTime();
     params.start_real_time = getRealTime();
 
+    int absent_states = 0;
+    if (iqtree.isSuperTree()) {
+        SuperAlignment *saln = (SuperAlignment*)iqtree.aln;
+        PhyloSuperTree *stree = (PhyloSuperTree*)&iqtree;
+        for (int i = 0; i < saln->partitions.size(); i++)
+            absent_states += saln->partitions[i]->checkAbsentStates("partition " + stree->part_info[i].name);
+    } else {
+        absent_states = iqtree.aln->checkAbsentStates("alignment");
+    }
+    if (absent_states > 0) {
+        outWarning(convertIntToString(absent_states) + " states (see above) are not present that may cause numerical problems");
+        cout << endl;
+    }
+
     // Make sure that no partial likelihood of IQ-TREE is initialized when PLL is used to save memory
     if (params.pll) {
         iqtree.deleteAllPartialLh();
@@ -1840,6 +1897,7 @@ void runTreeReconstruction(Params &params, string &original_model, IQTree &iqtre
         omp_set_num_threads(bestThreads);
         params.num_threads = bestThreads;
     }
+    iqtree.warnNumThreads();
 #endif
 
 
@@ -2263,17 +2321,20 @@ void searchGAMMAInvarByRestarting(IQTree &iqtree) {
 // Test alpha fom 0.1 to 15 and p_invar from 0.1 to 0.99, stepsize = 0.01
 void exhaustiveSearchGAMMAInvar(Params &params, IQTree &iqtree) {
 	double alphaMin = 0.01;
-	double alphaMax = 2.00;
+	double alphaMax = 10.00;
 	double p_invarMin = 0.01;
 	double p_invarMax = 1.00;
+//	double p_invarMax = iqtree.aln->frac_const_sites;
 	double stepSize = 0.01;
 	int numAlpha = (int) floor((alphaMax - alphaMin)/stepSize);
 	int numInvar = (int) floor((p_invarMax - p_invarMin)/stepSize);
 
-	cout << "EVALUATING COMBINATIONS OF " << numAlpha << " ALPHAS AND " << numInvar << " P_INVARS ... " << endl;
+	cout << "EVALUATING " << numAlpha*numInvar << " COMBINATIONS OF " << " alpha=" << alphaMin << ".." << alphaMax
+         << " AND " << " p-invar=" << p_invarMin << ".." << p_invarMax
+         << " (epsilon: " << params.modelEps << ")" << endl;
 
-	vector<string> results;
-	results.reserve((unsigned long) (numAlpha * numInvar));
+//	vector<string> results;
+//	results.reserve((unsigned long) (numAlpha * numInvar));
 	DoubleVector lenvec;
 	iqtree.saveBranchLengths(lenvec);
 
@@ -2281,28 +2342,31 @@ void exhaustiveSearchGAMMAInvar(Params &params, IQTree &iqtree) {
 	site_rates->setFixPInvar(true);
 	site_rates->setFixGammaShape(true);
 
+	string aiResultsFileName = string(params.out_prefix) + ".ai_results";
+	ofstream aiFileResults;
+	aiFileResults.open(aiResultsFileName.c_str());
+	aiFileResults << fixed;
+	aiFileResults.precision(4);
+	aiFileResults << "alpha p_invar logl tree_len\n";
+
     for (double alpha = alphaMin; alpha < alphaMax; alpha = alpha + stepSize) {
+        cout << "alpha = " << alpha << endl;
         for (double p_invar = p_invarMin; p_invar < p_invarMax; p_invar = p_invar + stepSize) {
             site_rates->setGammaShape(alpha);
             site_rates->setPInvar(p_invar);
             iqtree.clearAllPartialLH();
-            double lh = iqtree.getModelFactory()->optimizeParameters(params.fixed_branch_length, false, 0.001);
-            stringstream ss;
-            ss << fixed << setprecision(2) << alpha << " " << p_invar << " " << lh << " " << iqtree.treeLength();
+            double lh = iqtree.getModelFactory()->optimizeParameters(params.fixed_branch_length, false, params.modelEps);
+//            stringstream ss;
+//            ss << fixed << setprecision(2) << alpha << " " << p_invar << " " << lh << " " << iqtree.treeLength();
+            aiFileResults << alpha << " " << p_invar << " " << lh << " " << iqtree.treeLength() << endl;
             //cout << ss.str() << endl;
-            results.push_back(ss.str());
+//            results.push_back(ss.str());
             iqtree.restoreBranchLengths(lenvec);
         }
     }
-	string aiResultsFileName = string(params.out_prefix) + ".ai_results";
-	ofstream aiFileResults;
-	aiFileResults.open(aiResultsFileName.c_str());
-	aiFileResults << fixed;
-	aiFileResults.precision(4);
-	aiFileResults << "alpha p_invar logl tree_len\n";
-	for (vector<string>::iterator it = results.begin(); it != results.end(); it++) {
-				aiFileResults << (*it) << endl;
-			}
+//	for (vector<string>::iterator it = results.begin(); it != results.end(); it++) {
+//				aiFileResults << (*it) << endl;
+//			}
 	aiFileResults.close();
 	cout << "Results were written to: " << aiResultsFileName << endl;
 	cout << "Wall clock time used: " << getRealTime() - params.start_real_time << endl;
@@ -2339,7 +2403,7 @@ void runStandardBootstrap(Params &params, string &original_model, Alignment *ali
     int bootSample = 0;
     if (tree->getCheckpoint()->get("bootSample", bootSample)) {
         cout << "CHECKPOINT: " << bootSample << " bootstrap analyses restored" << endl;
-    } else {
+    } else if (MPIHelper::getInstance().isMaster()) {
         // first empty the boottrees file
         try {
             ofstream tree_out;
@@ -2416,6 +2480,18 @@ void runStandardBootstrap(Params &params, string &original_model, Alignment *ali
                 bootstrap_alignment->printPhylip(bootaln_name.c_str(), true);
         }
 
+		if (params.print_boot_site_freq && MPIHelper::getInstance().isMaster()) {
+            printSiteStateFreq((((string)params.out_prefix)+"."+convertIntToString(sample)+".bootsitefreq").c_str(), bootstrap_alignment);
+            if (bootstrap_alignment->isSuperAlignment())
+                ((SuperAlignment*)bootstrap_alignment)->printCombinedAlignment((((string)params.out_prefix)+"."+convertIntToString(sample)+".bootaln").c_str());
+            else
+                bootstrap_alignment->printPhylip((((string)params.out_prefix)+"."+convertIntToString(sample)+".bootaln").c_str());
+        }
+
+        if (!tree->constraintTree.empty()) {
+            boot_tree->constraintTree.readConstraint(tree->constraintTree);
+        }
+
         // set checkpoint
         boot_tree->setCheckpoint(tree->getCheckpoint());
         boot_tree->num_precision = tree->num_precision;
@@ -2445,7 +2521,7 @@ void runStandardBootstrap(Params &params, string &original_model, Alignment *ali
 			outError(ERR_WRITE_OUTPUT, boottrees_name);
 		}
 		// fix bug: set the model for original tree after testing
-		if (original_model.substr(0,4) == "TEST" && tree->isSuperTree()) {
+		if ((original_model.substr(0,4) == "TEST" || original_model.substr(0,2) == "MF") && tree->isSuperTree()) {
 			PhyloSuperTree *stree = ((PhyloSuperTree*)tree);
 			stree->part_info =  ((PhyloSuperTree*)boot_tree)->part_info;
 //			for (int i = 0; i < ((PhyloSuperTree*)tree)->part_info.size(); i++)
@@ -2603,6 +2679,7 @@ void computeSiteFrequencyModel(Params &params, Alignment *alignment) {
         int bestThreads = tree->testNumThreads();
         omp_set_num_threads(bestThreads);
     }
+    tree->warnNumThreads();
 #endif
 
     tree->initializeAllPartialLh();
@@ -2684,7 +2761,7 @@ void runPhyloAnalysis(Params &params, Checkpoint *checkpoint) {
     tree->setCheckpoint(checkpoint);
     if (params.min_branch_length <= 0.0) {
         params.min_branch_length = 1e-6;
-        if (tree->getAlnNSite() >= 100000) {
+        if (!tree->isSuperTree() && tree->getAlnNSite() >= 100000) {
             params.min_branch_length = 0.1 / (tree->getAlnNSite());
             tree->num_precision = max((int)ceil(-log10(Params::getInstance().min_branch_length))+1, 6);
             cout.precision(12);
@@ -2703,12 +2780,13 @@ void runPhyloAnalysis(Params &params, Checkpoint *checkpoint) {
 
     if (params.constraint_tree_file) {
         cout << "Reading constraint tree " << params.constraint_tree_file << "..." << endl;
-        tree->constraintTree.initConstraint(params.constraint_tree_file, alignment->getSeqNames());
+        tree->constraintTree.readConstraint(params.constraint_tree_file, alignment->getSeqNames());
         if (params.start_tree == STT_PLL_PARSIMONY)
             params.start_tree = STT_PARSIMONY;
         else if (params.start_tree == STT_BIONJ)
             outError("Constraint tree does not work with -t BIONJ");
-            
+        if (params.num_bootstrap_samples || params.gbo_replicates)
+            cout << "INFO: Constraint tree will be applied to ML tree and all bootstrap trees." << endl;
     }
 
     if (params.compute_seq_identity_along_tree) {
@@ -2775,8 +2853,10 @@ void runPhyloAnalysis(Params &params, Checkpoint *checkpoint) {
 			cout << endl << "Computing bootstrap consensus tree..." << endl;
 			string splitsfile = params.out_prefix;
 			splitsfile += ".splits.nex";
-			computeConsensusTree(splitsfile.c_str(), 0, 1e6, params.split_threshold,
-					params.split_weight_threshold, NULL, params.out_prefix, NULL, &params);
+            double weight_threshold = (params.split_threshold<1) ? params.split_threshold : (params.gbo_replicates-1.0)/params.gbo_replicates;
+            weight_threshold *= 100.0;
+			computeConsensusTree(splitsfile.c_str(), 0, 1e6, -1,
+					weight_threshold, NULL, params.out_prefix, NULL, &params);
 			// now optimize branch lengths of the consensus tree
 			string current_tree = tree->getTreeString();
 			splitsfile = params.out_prefix;
@@ -3010,8 +3090,18 @@ void computeConsensusTree(const char *input_trees, int burnin, int max_count,
 		params->split_weight_summary = SW_COUNT; // count number of splits
 		sg.init(*params);
 		params->user_file = user_file;
-		for (SplitGraph::iterator it = sg.begin(); it != sg.end(); it++)
-			hash_ss.insertSplit((*it), (*it)->getWeight());
+		for (SplitGraph::iterator it = sg.begin(); it != sg.end();)
+            if ((*it)->getWeight() > weight_threshold) {
+                hash_ss.insertSplit((*it), (*it)->getWeight());
+                it++;
+            } else {
+                // delete the split
+                if (it != sg.end()-1) {
+                    *(*it) = (*sg.back());
+                }
+                delete sg.back();
+                sg.pop_back();
+            }
 		/*		StrVector sgtaxname;
 		 sg.getTaxaName(sgtaxname);
 		 i = 0;
diff --git a/phylokernel.h b/phylokernel.h
index 8c9ab66..3498eb4 100644
--- a/phylokernel.h
+++ b/phylokernel.h
@@ -1638,7 +1638,7 @@ void PhyloTree::computePartialParsimonyFastSIMD(PhyloNeighbor *dad_branch, Phylo
         switch (nstates) {
         case 4:
             #ifdef _OPENMP
-            #pragma omp parallel for private (site) reduction(+: score) if(nsites>200)
+            #pragma omp parallel for private (site) reduction(+: score) if(nsites>num_threads*10)
             #endif
 			for (site = 0; site<nsites; site++) {
                 size_t offset = entry_size*site;
@@ -1666,7 +1666,7 @@ void PhyloTree::computePartialParsimonyFastSIMD(PhyloNeighbor *dad_branch, Phylo
 			break;
         default:
             #ifdef _OPENMP
-            #pragma omp parallel for private (site) reduction(+: score) if(nsites > 800/nstates)
+            #pragma omp parallel for private (site) reduction(+: score) if(nsites>num_threads*10)
             #endif
 			for (site = 0; site<nsites; site++) {
                 size_t offset = entry_size*site;
@@ -1730,7 +1730,7 @@ int PhyloTree::computeParsimonyBranchFastSIMD(PhyloNeighbor *dad_branch, PhyloNo
     switch (nstates) {
     case 4:
         #ifdef _OPENMP
-        #pragma omp parallel for private (site) reduction(+: score) if(nsites>200)
+        #pragma omp parallel for private (site) reduction(+: score) if(nsites>num_threads*10)
         #endif
 		for (site = 0; site < nsites; site++) {
             size_t offset = entry_size*site;
@@ -1749,7 +1749,7 @@ int PhyloTree::computeParsimonyBranchFastSIMD(PhyloNeighbor *dad_branch, PhyloNo
 		break;
     default:
         #ifdef _OPENMP
-        #pragma omp parallel for private (site) reduction(+: score) if(nsites > 800/nstates)
+        #pragma omp parallel for private (site) reduction(+: score) if(nsites>num_threads*10)
         #endif
 		for (site = 0; site < nsites; site++) {
             size_t offset = entry_size*site;
diff --git a/phylokernelfma.cpp b/phylokernelfma.cpp
index 1e9909c..717c47c 100644
--- a/phylokernelfma.cpp
+++ b/phylokernelfma.cpp
@@ -75,10 +75,10 @@ void PhyloTree::setLikelihoodKernelFMA() {
             computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, NORM_LH, 20, true, true>;
             break;
         default:
-            computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD        <Vec4d, NORM_LH, true, true>;
-            computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD            <Vec4d, NORM_LH, true, true>;
-            computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD      <Vec4d, NORM_LH, true, true>;
-            computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec4d, NORM_LH, true, true>;
+            computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD        <Vec4d, SAFE_LH, true, true>;
+            computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD            <Vec4d, SAFE_LH, true, true>;
+            computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD      <Vec4d, SAFE_LH, true, true>;
+            computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec4d, SAFE_LH, true, true>;
             break;
         }
         return;
@@ -154,10 +154,10 @@ void PhyloTree::setLikelihoodKernelFMA() {
 		break;
     */
 	default:
-        computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec4d, NORM_LH, true>;
-        computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec4d, NORM_LH, true>;
-        computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec4d, NORM_LH, true>;
-        computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec4d, NORM_LH, true>;
+        computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec4d, SAFE_LH, true>;
+        computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec4d, SAFE_LH, true>;
+        computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec4d, SAFE_LH, true>;
+        computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec4d, SAFE_LH, true>;
 		break;
 	}
 }
diff --git a/phylokernelnew.h b/phylokernelnew.h
index b68e46a..fffd525 100644
--- a/phylokernelnew.h
+++ b/phylokernelnew.h
@@ -44,7 +44,10 @@ template <class VectorClass, const bool append>
 inline void sumVec(VectorClass *A, VectorClass &X, size_t N)
 {
     if (N == 1) {
-        X = A[0];
+        if (append)
+            X += A[0];
+        else
+            X = A[0];
         return;
     }
 
@@ -60,7 +63,7 @@ inline void sumVec(VectorClass *A, VectorClass &X, size_t N)
             V[0] += A[i];
             V[1] += A[i+1];
             V[2] += A[i+2];
-            V[2] += A[i+3];
+            V[3] += A[i+3];
         }
         if (append)
             X += (V[0] + V[1]) + (V[2] + V[3]);
@@ -120,6 +123,24 @@ template <class VectorClass, class Numeric, const bool FMA>
 inline void dotProductVec(Numeric *A, VectorClass *B, VectorClass &X, size_t N)
 #endif
 {
+    // quick treatment for small N <= 4
+    switch (N) {
+    case 1:
+        X = A[0]*B[0];
+        return;
+    case 2:
+        X = mul_add(A[1], B[1], A[0]*B[0]);
+        return;
+    case 3:
+        X = mul_add(A[2], B[2], mul_add(A[1], B[1], A[0]*B[0]));
+        return;
+    case 4:
+        X = mul_add(A[1], B[1], A[0]*B[0]) + mul_add(A[3], B[3], A[2]*B[2]);
+        return;
+    default: break;
+    }
+
+    // For N > 4, add the rest with 4-way unrolling
     size_t i, j;
     switch (N % 4) {
     case 0: {
@@ -134,6 +155,19 @@ inline void dotProductVec(Numeric *A, VectorClass *B, VectorClass &X, size_t N)
         break;
     }
 
+    case 1: {
+        const size_t Nm1 = N-1;
+        VectorClass V[4];
+        for (j = 0; j < 4; j++)
+            V[j] = A[j] * B[j];
+        for (i = 4; i < Nm1; i+=4) {
+            for (j = 0; j < 4; j++)
+                V[j] = mul_add(A[i+j], B[i+j], V[j]);
+        }
+        X = mul_add(A[Nm1], B[Nm1], (V[0]+V[1]) + (V[2]+V[3]));
+        break;
+    }
+
     case 2: {
         VectorClass V[2];
         for (j = 0; j < 2; j++)
@@ -182,6 +216,26 @@ template <class VectorClass, class Numeric, const bool FMA>
 inline void dotProductDualVec(Numeric *A, VectorClass *B, Numeric *C, VectorClass *D, VectorClass &X, size_t N)
 #endif
 {
+    // quick treatment for small N <= 4
+    switch (N) {
+    case 1:
+        X = (A[0]*B[0])*(C[0]*D[0]);
+        return;
+    case 2:
+        X = mul_add(A[1],B[1],A[0]*B[0]) * mul_add(C[1],D[1],C[0]*D[0]);
+        return;
+    case 3:
+        X = mul_add(A[2], B[2], mul_add(A[1], B[1], A[0]*B[0])) *
+            mul_add(C[2], D[2], mul_add(C[1], D[1], C[0]*D[0]));
+        return;
+    case 4:
+        X = (mul_add(A[1], B[1], A[0]*B[0]) + mul_add(A[3], B[3], A[2]*B[2])) *
+            (mul_add(C[1], D[1], C[0]*D[0]) + mul_add(C[3], D[3], C[2]*D[2]));
+        return;
+    default: break;
+    }
+
+    // For N > 4, add the rest with 4-way unrolling
     size_t i, j;
     switch (N % 4) {
     case 0: {
@@ -201,6 +255,24 @@ inline void dotProductDualVec(Numeric *A, VectorClass *B, Numeric *C, VectorClas
         break;
     }
 
+    case 1: {
+        const size_t Nm1 = N-1;
+        VectorClass AB[4], CD[4];
+        for (j = 0; j < 4; j++) {
+            AB[j] = A[j] * B[j];
+            CD[j] = C[j] * D[j];
+        }
+        for (i = 4; i < Nm1; i+=4) {
+
+            for (j = 0; j < 4; j++) {
+                AB[j] = mul_add(A[i+j],  B[i+j],  AB[j]);
+                CD[j] = mul_add(C[i+j], D[i+j], CD[j]);
+            }
+        }
+        X = mul_add(A[Nm1], B[Nm1], (AB[0]+AB[1])+(AB[2]+AB[3])) * mul_add(C[Nm1], D[Nm1], (CD[0]+CD[1])+CD[2]+CD[3]);
+        break;
+    }
+
     case 2: {
         VectorClass AB[2], CD[2];
         for (j = 0; j < 2; j++) {
@@ -255,7 +327,35 @@ template <class VectorClass, class Numeric, const bool FMA>
 inline void productVecMat(VectorClass *A, Numeric *M, VectorClass *X, size_t N)
 #endif
 {
-    size_t i, j, x;
+    // quick treatment for small N <= 4
+    size_t i;
+    switch (N) {
+    case 1:
+        X[0] = A[0]*M[0];
+        return;
+    case 2:
+        X[0] = mul_add(A[1],M[1],A[0]*M[0]);
+        X[1] = mul_add(A[1],M[3],A[0]*M[2]);
+        return;
+    case 3:
+        for (i = 0; i < 3; i++) {
+            // manual unrolling
+            X[i] = mul_add(A[2],M[2],mul_add(A[1],M[1],A[0]*M[0]));
+            M += 3;
+        }
+        return;
+    case 4:
+        for (i = 0; i < 4; i++) {
+            // manual unrolling
+            X[i] = mul_add(A[1],M[1],A[0]*M[0]) + mul_add(A[3],M[3],A[2]*M[2]);
+            M += 4;
+        }
+        return;
+    default: break;
+    }
+
+    // For N > 4, add the rest with 4-way unrolling
+    size_t j, x;
 
     switch (N % 4) {
     case 0:
@@ -274,6 +374,23 @@ inline void productVecMat(VectorClass *A, Numeric *M, VectorClass *X, size_t N)
         }
         break;
 
+    case 1: {
+        const size_t Nm1 = N-1;
+        for (i = 0; i < N; i++) {
+            // manual unrolling
+            VectorClass V[4];
+            for (j = 0; j < 4; j++)
+                V[j] = A[j] * M[j];
+
+            for (x = 4; x < Nm1; x+=4) {
+                for (j = 0; j < 4; j++)
+                    V[j] = mul_add(A[x+j], M[x+j], V[j]);
+            }
+            X[i] = mul_add(A[Nm1], M[Nm1], (V[0]+V[1])+(V[2]+V[3]));
+            M += N;
+        }
+        break;
+    }
     case 2:
         for (i = 0; i < N; i++) {
             // manual unrolling
@@ -328,7 +445,39 @@ template <class VectorClass, class Numeric, const bool FMA>
 inline void productVecMat(VectorClass *A, Numeric *M, VectorClass *X, VectorClass &Xmax, size_t N)
 #endif
 {
-    size_t i, j, x;
+    // quick treatment for small N <= 4
+    size_t i;
+    switch (N) {
+    case 1:
+        X[0] = A[0]*M[0];
+        Xmax = max(Xmax, abs(X[0]));
+        return;
+    case 2:
+        X[0] = mul_add(A[1],M[1],A[0]*M[0]);
+        X[1] = mul_add(A[1],M[3],A[0]*M[2]);
+        Xmax = max(Xmax, max(abs(X[0]), abs(X[1])));
+        return;
+    case 3:
+        for (i = 0; i < 3; i++) {
+            // manual unrolling
+            X[i] = mul_add(A[2],M[2],mul_add(A[1],M[1],A[0]*M[0]));
+            Xmax = max(Xmax, abs(X[i]));
+            M += 3;
+        }
+        return;
+    case 4:
+        for (i = 0; i < 4; i++) {
+            // manual unrolling
+            X[i] = mul_add(A[1],M[1],A[0]*M[0]) + mul_add(A[3],M[3],A[2]*M[2]);
+            Xmax = max(Xmax, abs(X[i]));
+            M += 4;
+        }
+        return;
+    default: break;
+    }
+
+    // For N > 4, add the rest with 4-way unrolling
+    size_t j, x;
 
     switch (N % 4) {
     case 0:
@@ -348,6 +497,24 @@ inline void productVecMat(VectorClass *A, Numeric *M, VectorClass *X, VectorClas
         }
         break;
 
+    case 1: {
+        const size_t Nm1 = N-1;
+        for (i = 0; i < N; i++) {
+            // manual unrolling
+            VectorClass V[4];
+            for (j = 0; j < 4; j++)
+                V[j] = A[j] * M[j];
+
+            for (x = 4; x < Nm1; x+=4) {
+                for (j = 0; j < 4; j++)
+                    V[j] = mul_add(A[x+j], M[x+j], V[j]);
+            }
+            X[i] = mul_add(A[Nm1], M[Nm1], (V[0]+V[1])+(V[2]+V[3]));
+            M += N;
+            Xmax = max(Xmax, abs(X[i]));
+        }
+        break;
+    }
     case 2:
         for (i = 0; i < N; i++) {
             // manual unrolling
@@ -480,6 +647,18 @@ inline void dotProduct3Vec(Numeric *A, VectorClass *B, VectorClass *C, VectorCla
         break;
     }
 
+    case 1: {
+        VectorClass V[4];
+        const size_t Nm1 = N-1;
+        for (j = 0; j < 4; j++)
+            V[j] = A[j] * B[j] * C[j];
+        for (i = 4; i < Nm1; i+=4)
+            for (j = 0; j < 4; j++)
+                V[j] = mul_add(A[i+j]*B[i+j], C[i+j], V[j]);
+        X = mul_add(A[Nm1]*B[Nm1], C[Nm1], (V[0]+V[1])+(V[2]+V[3]));
+        break;
+    }
+
     case 2: {
         VectorClass V[2];
         for (j = 0; j < 2; j++)
@@ -604,116 +783,6 @@ inline void scaleLikelihood(VectorClass &lh_max, double *invar, double *dad_part
  *
  ******************************************************/
 
-#ifndef KERNEL_FIX_STATES
-inline bool PhyloTree::computeTraversalInfo(PhyloNeighbor *dad_branch, PhyloNode *dad, double* &buffer) {
-
-    size_t nstates = aln->num_states;
-    PhyloNode *node = (PhyloNode*)dad_branch->node;
-
-    if ((dad_branch->partial_lh_computed & 1) || node->isLeaf()) {
-        return mem_slots.lock(dad_branch);
-    }
-
-
-    size_t num_leaves = 0;
-    bool locked[node->degree()];
-    memset(locked, 0, node->degree());
-
-    // sort neighbor in desceding size order
-    NeighborVec neivec = node->neighbors;
-    NeighborVec::iterator it, i2;
-    for (it = neivec.begin(); it != neivec.end(); it++)
-        for (i2 = it+1; i2 != neivec.end(); i2++)
-            if (((PhyloNeighbor*)*it)->size < ((PhyloNeighbor*)*i2)->size) {
-                Neighbor *nei = *it;
-                *it = *i2;
-                *i2 = nei;
-            }
-
-
-    // recursive
-    for (it = neivec.begin(); it != neivec.end(); it++)
-        if ((*it)->node != dad) {
-            locked[it - neivec.begin()] = computeTraversalInfo((PhyloNeighbor*)(*it), node, buffer);
-            if ((*it)->node->isLeaf())
-                num_leaves++;
-        }
-    dad_branch->partial_lh_computed |= 1;
-
-    // prepare information for this branch
-    TraversalInfo info(dad_branch, dad);
-    info.echildren = info.partial_lh_leaves = NULL;
-
-    // re-orient partial_lh
-    reorientPartialLh(dad_branch, dad);
-
-    if (!dad_branch->partial_lh || mem_slots.locked(dad_branch)) {
-        // still no free entry found, memory saving technique
-        int slot_id = mem_slots.allocate(dad_branch);
-        if (slot_id < 0) {
-            cout << "traversal order:";
-            for (auto it = traversal_info.begin(); it != traversal_info.end(); it++) {
-                it->dad_branch->node->name = convertIntToString(it->dad_branch->size);
-                cout << "  ";
-                if (it->dad->isLeaf())
-                    cout << it->dad->name;
-                else
-                    cout << it->dad->id;
-                cout << "->";
-                if (it->dad_branch->node->isLeaf())
-                    cout << it->dad_branch->node->name;
-                else
-                    cout << it->dad_branch->node->id;
-                if (params->lh_mem_save == LM_MEM_SAVE) {
-                    if (it->dad_branch->partial_lh_computed)
-                        cout << " [";
-                    else
-                        cout << " (";
-                    cout << mem_slots.findNei(it->dad_branch) - mem_slots.begin();
-                    if (it->dad_branch->partial_lh_computed)
-                        cout << "]";
-                    else
-                        cout << ")";
-                }
-            }
-            cout << endl;
-            drawTree(cout);
-            assert(0 && "No free/unlocked mem slot found!");
-        }
-    } else
-        mem_slots.update(dad_branch);
-
-        if (verbose_mode >= VB_MED && params->lh_mem_save == LM_MEM_SAVE) {
-            int slot_id = mem_slots.findNei(dad_branch) - mem_slots.begin();
-            node->name = convertIntToString(slot_id);
-            cout << "Branch " << dad->id << "-" << node->id << " assigned slot " << slot_id << endl;
-        }
-
-    if (params->lh_mem_save == LM_MEM_SAVE) {
-        for (it = neivec.begin(); it != neivec.end(); it++)
-            if ((*it)->node != dad) {
-                if (!(*it)->node->isLeaf() && locked[it-neivec.begin()])
-                    mem_slots.unlock((PhyloNeighbor*)*it);
-            }
-    }
-
-    if (!model->isSiteSpecificModel()) {
-        //------- normal model -----
-        info.echildren = buffer;
-        size_t block = nstates * ((model_factory->fused_mix_rate) ? site_rate->getNRate() : site_rate->getNRate()*model->getNMixtures());
-        buffer += get_safe_upper_limit(block*nstates*(node->degree()-1));
-        if (num_leaves) {
-            info.partial_lh_leaves = buffer;
-            buffer += get_safe_upper_limit((aln->STATE_UNKNOWN+1)*block*num_leaves);
-        }
-    }
-
-    traversal_info.push_back(info);
-    return mem_slots.lock(dad_branch);
-}
-#endif
-
-
 
 #ifdef KERNEL_FIX_STATES
 template<class VectorClass, const int nstates>
@@ -1134,13 +1203,19 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
             } else
                 memset(&dad_branch->scale_num[ptn], 0, sizeof(UBYTE)*VectorClass::size());
 
-            if (SITE_MODEL) {
-                VectorClass *expchild = partial_lh_all + block;
-                VectorClass *eval_ptr = (VectorClass*) &eval[ptn*nstates];
-                VectorClass *evec_ptr = (VectorClass*) &evec[ptn*states_square];
-                double *len_child = len_children;
-                VectorClass vchild;
-                FOR_NEIGHBOR_IT(node, dad, it) {
+            // SITE_MODEL variables
+            VectorClass *expchild = partial_lh_all + block;
+            VectorClass *eval_ptr = (VectorClass*) &eval[ptn*nstates];
+            VectorClass *evec_ptr = (VectorClass*) &evec[ptn*states_square];
+            double *len_child = len_children;
+            VectorClass vchild;
+
+            // normal model
+            double *partial_lh_leaf = partial_lh_leaves;
+            double *echild = echildren;
+
+            FOR_NEIGHBOR_IT(node, dad, it) {
+                if (SITE_MODEL) {
                     PhyloNeighbor *child = (PhyloNeighbor*)*it;
                     UBYTE *scale_child = SAFE_NUMERIC ? child->scale_num + ptn*ncat_mix : NULL;
                     VectorClass *partial_lh = partial_lh_all;
@@ -1192,14 +1267,8 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
                         }
                     } // if
                     len_child += ncat;
-                } // FOR_NEIGHBOR
-
-            } else {
-                // non site specific model
-                double *partial_lh_leaf = partial_lh_leaves;
-                double *echild = echildren;
-
-                FOR_NEIGHBOR_IT(node, dad, it) {
+                } else {
+                    // non site specific model
                     PhyloNeighbor *child = (PhyloNeighbor*)*it;
                     UBYTE *scale_child = SAFE_NUMERIC ? child->scale_num + ptn*ncat_mix : NULL;
                     if (child->node->isLeaf()) {
@@ -1257,9 +1326,52 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
                         }
                     } // if
                     echild += block*nstates;
-                } // FOR_NEIGHBOR
-            } // if SITE_MODEL
-            
+                } // if SITE_MODEL
+
+                /***** now do likelihood rescaling ******/
+                if (SAFE_NUMERIC) {
+                    VectorClass *partial_lh_tmp = partial_lh_all;
+                    for (c = 0; c < ncat_mix; c++) {
+                        VectorClass lh_max = 0.0;
+                        for (x = 0; x < nstates; x++)
+                            lh_max = max(lh_max,abs(partial_lh_tmp[x]));
+                        // check if one should scale partial likelihoods
+                        auto underflown = ((lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
+                        if (horizontal_or(underflown)) { // at least one site has numerical underflown
+                            for (x = 0; x < VectorClass::size(); x++)
+                            if (underflown[x]) {
+                                // BQM 2016-05-03: only scale for non-constant sites
+                                // now do the likelihood scaling
+                                double *partial_lh = (double*)partial_lh_tmp + (x);
+                                for (i = 0; i < nstates; i++)
+                                    partial_lh[i*VectorClass::size()] *= SCALING_THRESHOLD_INVER;
+                                dad_branch->scale_num[(ptn+x)*ncat_mix+c] += 1;
+                            }
+                        }
+                        partial_lh_tmp += nstates;
+                    }
+                } else {
+                    // not -safe numeric
+                    VectorClass lh_max = 0.0;
+                    for (x = 0; x < block; x++)
+                        lh_max = max(lh_max,abs(partial_lh_all[x]));
+                    auto underflown = (lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0);
+                    if (horizontal_or(underflown)) { // at least one site has numerical underflown
+                        for (x = 0; x < VectorClass::size(); x++)
+                        if (underflown[x]) {
+                            double *partial_lh = (double*)partial_lh_all + (x);
+                            // now do the likelihood scaling
+                            for (i = 0; i < block; i++) {
+                                partial_lh[i*VectorClass::size()] *= SCALING_THRESHOLD_INVER;
+                            }
+    //                        sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn+x];
+                            dad_branch->scale_num[ptn+x] += 1;
+                        }
+                    }
+                } // if-else
+
+            } // FOR_NEIGHBOR
+
         
             // compute dot-product with inv_eigenvector
             VectorClass *partial_lh_tmp = partial_lh_all;
@@ -1267,8 +1379,6 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
             VectorClass lh_max = 0.0;
             double *inv_evec_ptr = SITE_MODEL ? &inv_evec[ptn*states_square] : NULL;
             for (c = 0; c < ncat_mix; c++) {
-                if (SAFE_NUMERIC)
-                    lh_max = 0.0;
                 if (SITE_MODEL) {
                     // compute dot-product with inv_eigenvector
 #ifdef KERNEL_FIX_STATES
@@ -1284,41 +1394,10 @@ void PhyloTree::computePartialLikelihoodGenericSIMD(TraversalInfo &info, size_t
                     productVecMat<VectorClass, double, FMA> (partial_lh_tmp, inv_evec_ptr, partial_lh, lh_max, nstates);
 #endif
                 }
-                // check if one should scale partial likelihoods
-                if (SAFE_NUMERIC) {
-                    auto underflown = ((lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0));
-                    if (horizontal_or(underflown)) { // at least one site has numerical underflown
-                        for (x = 0; x < VectorClass::size(); x++)
-                        if (underflown[x]) {
-                            // BQM 2016-05-03: only scale for non-constant sites
-                            // now do the likelihood scaling
-                            double *partial_lh = dad_branch->partial_lh + (ptn*block + c*nstates*VectorClass::size() + x);
-                            for (i = 0; i < nstates; i++)
-                                partial_lh[i*VectorClass::size()] *= SCALING_THRESHOLD_INVER;
-                            dad_branch->scale_num[(ptn+x)*ncat_mix+c] += 1;
-                        }
-                    }
-                }
                 partial_lh += nstates;
                 partial_lh_tmp += nstates;
             }
 
-            if (!SAFE_NUMERIC) {
-                auto underflown = (lh_max < SCALING_THRESHOLD) & (VectorClass().load_a(&ptn_invar[ptn]) == 0.0);
-                if (horizontal_or(underflown)) { // at least one site has numerical underflown
-                    for (x = 0; x < VectorClass::size(); x++)
-                    if (underflown[x]) {
-                        double *partial_lh = dad_branch->partial_lh + (ptn*block + x);
-                        // now do the likelihood scaling
-                        for (i = 0; i < block; i++) {
-                            partial_lh[i*VectorClass::size()] *= SCALING_THRESHOLD_INVER;
-                        }
-//                        sum_scale += LOG_SCALING_THRESHOLD * ptn_freq[ptn+x];
-                        dad_branch->scale_num[ptn+x] += 1;
-                    }
-                }
-            }
-
         } // for ptn
 
         // end multifurcating treatment
@@ -2114,6 +2193,11 @@ void PhyloTree::computeLikelihoodDervGenericSIMD(PhyloNeighbor *dad_branch, Phyl
 	df = horizontal_add(all_df);
 	ddf = horizontal_add(all_ddf);
 
+    if (std::isnan(df) || std::isinf(df)) {
+        getModel()->writeInfo(cout);
+        getRate()->writeInfo(cout);
+    }
+
     if (!SAFE_NUMERIC && (std::isnan(df) || std::isinf(df)))
         outError("Numerical underflow (lh-derivative). Run again with the safe likelihood kernel via `-safe` option");
 
diff --git a/phylokernelsse.cpp b/phylokernelsse.cpp
index bb3ee7f..50101f3 100644
--- a/phylokernelsse.cpp
+++ b/phylokernelsse.cpp
@@ -80,10 +80,10 @@ void PhyloTree::setLikelihoodKernelSSE() {
             computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec2d, NORM_LH, 20, false, true>;
             break;
         default:
-            computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD        <Vec2d, NORM_LH, false, true>;
-            computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD            <Vec2d, NORM_LH, false, true>;
-            computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD      <Vec2d, NORM_LH, false, true>;
-            computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec2d, NORM_LH, false, true>;
+            computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD        <Vec2d, SAFE_LH, false, true>;
+            computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD            <Vec2d, SAFE_LH, false, true>;
+            computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD      <Vec2d, SAFE_LH, false, true>;
+            computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec2d, SAFE_LH, false, true>;
             break;
         }
         return;
@@ -159,10 +159,10 @@ void PhyloTree::setLikelihoodKernelSSE() {
 		break;
     */
 	default:
-        computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec2d, NORM_LH>;
-        computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec2d, NORM_LH>;
-        computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec2d, NORM_LH>;
-        computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec2d, NORM_LH>;
+        computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec2d, SAFE_LH>;
+        computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec2d, SAFE_LH>;
+        computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec2d, SAFE_LH>;
+        computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec2d, SAFE_LH>;
 		break;
 	}
 }
diff --git a/phylosupertree.cpp b/phylosupertree.cpp
index 8fb8918..5f69fa1 100644
--- a/phylosupertree.cpp
+++ b/phylosupertree.cpp
@@ -112,7 +112,7 @@ void PhyloSuperTree::readPartition(Params &params) {
 			if (info.aln_file == "" && params.aln_file) info.aln_file = params.aln_file;
 			getline(in, info.sequence_type, ',');
 			if (info.sequence_type=="" && params.sequence_type) info.sequence_type = params.sequence_type;
-			getline(in, info.position_spec);
+			safeGetline(in, info.position_spec);
             trimString(info.sequence_type);
 			cout << endl << "Reading partition " << info.name << " (model=" << info.model_name << ", aln=" <<
 					info.aln_file << ", seq=" << info.sequence_type << ", pos=" << ((info.position_spec.length() >= 20) ? info.position_spec.substr(0,20)+"..." : info.position_spec) << ") ..." << endl;
@@ -221,7 +221,7 @@ void PhyloSuperTree::readPartitionRaxml(Params &params) {
             if (info.name.empty())
                 outError("Please give partition names in partition file!");
 
-			getline(in, info.position_spec);
+			safeGetline(in, info.position_spec);
             trimString(info.position_spec);
             if (info.position_spec.empty())
                 outError("Please specify alignment positions for partition" + info.name);
diff --git a/phylosupertreeplen.cpp b/phylosupertreeplen.cpp
index 753d84f..9123925 100644
--- a/phylosupertreeplen.cpp
+++ b/phylosupertreeplen.cpp
@@ -549,7 +549,7 @@ double PhyloSuperTreePlen::computeLikelihoodFromBuffer() {
 	double score = 0.0;
 	int part, ntrees = size();
 	for (part = 0; part < ntrees; part++) {
-		assert(part_info[part].cur_score != 0.0);
+//		assert(part_info[part].cur_score != 0.0);
 		score += part_info[part].cur_score;
 	}
 	return score;
@@ -885,7 +885,11 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
 //		}
 //		cout<<"------------------------------------------------------"<<endl;
 
-
+    // reorient partial_lh in case of nni1
+    if (!params->nni5) {
+        reorientPartialLh((PhyloNeighbor*)node1->findNeighbor(node2), node1);
+        reorientPartialLh((PhyloNeighbor*)node2->findNeighbor(node1), node2);
+    }
 
 	/*------------------------------------------------------------------------------------
 	 * Saving original neighbors:
@@ -1033,9 +1037,9 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
 					// update link_neighbor[part]
 					((SuperNeighbor*)*saved_it[id])->link_neighbors[part] = (PhyloNeighbor*)*sub_saved_it[part*6 + id];
 				}
+                assert(mem_id == 2);
 			}
 
-            assert(mem_id == 2);
 
 		} else if(is_nni[part]==NNI_ONE_EPSILON){
 
@@ -1180,8 +1184,8 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
 				//allNNIcases_computed[0] += 1;
 
                 // reorient partial_lh before swap
-                reorientPartialLh((PhyloNeighbor*)node1_link[part]->findNeighbor(node2_link[part]), node1_link[part]);
-                reorientPartialLh((PhyloNeighbor*)node2_link[part]->findNeighbor(node1_link[part]), node2_link[part]);
+                at(part)->reorientPartialLh((PhyloNeighbor*)node1_link[part]->findNeighbor(node2_link[part]), node1_link[part]);
+                at(part)->reorientPartialLh((PhyloNeighbor*)node2_link[part]->findNeighbor(node1_link[part]), node2_link[part]);
 
 				// Do NNI swap on partition
 				node1_link[part]->updateNeighbor(node1_link_it[part], node2_link_nei[part]);
@@ -1476,8 +1480,8 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
 			if(is_nni[part]==NNI_NO_EPSILON){
 
                 // reorient partial_lh before swap
-                reorientPartialLh((PhyloNeighbor*)node1_link[part]->findNeighbor(node2_link[part]), node1_link[part]);
-                reorientPartialLh((PhyloNeighbor*)node2_link[part]->findNeighbor(node1_link[part]), node2_link[part]);
+                at(part)->reorientPartialLh((PhyloNeighbor*)node1_link[part]->findNeighbor(node2_link[part]), node1_link[part]);
+                at(part)->reorientPartialLh((PhyloNeighbor*)node2_link[part]->findNeighbor(node1_link[part]), node2_link[part]);
 
 				node1_link[part]->updateNeighbor(node1_link_it[part], node1_link_nei[part]);
 				node1_link_nei[part]->node->updateNeighbor(node2_link[part], node1_link[part]);
@@ -2041,6 +2045,15 @@ void PhyloSuperTreePlen::initializeAllPartialLh(int &index, int &indexlh, PhyloN
 	assert(0);
 }
 
+void PhyloSuperTreePlen::reorientPartialLh(PhyloNeighbor* dad_branch, Node *dad) {
+    SuperNeighbor *sdad_branch = (SuperNeighbor*) dad_branch;
+    SuperNeighbor *snode_branch = (SuperNeighbor*) dad_branch->node->findNeighbor(dad);
+    for (int part = 0; part < size(); part++)
+        if (sdad_branch->link_neighbors[part]) {
+            at(part)->reorientPartialLh(sdad_branch->link_neighbors[part], snode_branch->link_neighbors[part]->node);
+        }
+}
+
 
 string PhyloSuperTreePlen::getTreeString() {
     return PhyloTree::getTreeString();
diff --git a/phylosupertreeplen.h b/phylosupertreeplen.h
index 88dd177..00c6155 100644
--- a/phylosupertreeplen.h
+++ b/phylosupertreeplen.h
@@ -389,6 +389,8 @@ public:
      */
     virtual int fixNegativeBranch(bool force = false, Node *node = NULL, Node *dad = NULL);
 
+    virtual void reorientPartialLh(PhyloNeighbor* dad_branch, Node *dad);
+
 protected:
 	vector<uint64_t> partial_lh_entries, scale_num_entries, partial_pars_entries, block_size, scale_block_size;
 
diff --git a/phylotesting.cpp b/phylotesting.cpp
index 2ddc637..246fb13 100644
--- a/phylotesting.cpp
+++ b/phylotesting.cpp
@@ -611,6 +611,33 @@ void printSiteStateFreq(const char*filename, PhyloTree *tree, double *state_freq
         delete [] ptn_state_freq;
 }
 
+void printSiteStateFreq(const char* filename, Alignment *aln) {
+    if (aln->site_state_freq.empty())
+        return;
+    int i, j, nsites = aln->getNSite(), nstates = aln->num_states;
+	try {
+		ofstream out;
+		out.exceptions(ios::failbit | ios::badbit);
+		out.open(filename);
+		IntVector pattern_index;
+		aln->getSitePatternIndex(pattern_index);
+		for (i = 0; i < nsites; i++) {
+			out.width(6);
+			out << left << i+1 << " ";
+            double *state_freq = aln->site_state_freq[pattern_index[i]];
+			for (j = 0; j < nstates; j++) {
+				out.width(15);
+				out << state_freq[j] << " ";
+			}
+			out << endl;
+		}
+		out.close();
+		cout << "Site state frequency vectors printed to " << filename << endl;
+	} catch (ios::failure) {
+		outError(ERR_WRITE_OUTPUT, filename);
+	}
+}
+
 bool checkModelFile(ifstream &in, bool is_partitioned, vector<ModelInfo> &infos) {
 	if (!in.is_open()) return false;
 	in.exceptions(ios::badbit);
@@ -634,7 +661,7 @@ bool checkModelFile(ifstream &in, bool is_partitioned, vector<ModelInfo> &infos)
         outWarning(".model file was produced from a previous version of IQ-TREE");
 		return false;
     }
-	getline(in, str);
+	safeGetline(in, str);
 	while (!in.eof()) {
 		in >> str;
 		if (in.eof())
@@ -646,7 +673,7 @@ bool checkModelFile(ifstream &in, bool is_partitioned, vector<ModelInfo> &infos)
 		}
 		info.name = str;
 		in >> info.df >> info.logl >> info.tree_len;
-		getline(in, str);
+		safeGetline(in, str);
         info.tree = "";
         if (*str.rbegin() == ';') {
             size_t pos = str.rfind('\t');
@@ -817,7 +844,7 @@ int getModelList(Params &params, Alignment *aln, StrVector &models, bool separat
         }
     }
 
-	bool with_new = params.model_name.find("NEW") != string::npos;
+	bool with_new = (params.model_name.find("NEW") != string::npos || params.model_name.substr(0,2) == "MF" || params.model_name.empty());
 	bool with_asc = params.model_name.find("ASC") != string::npos;
 
 //	if (seq_type == SEQ_CODON) {
@@ -829,16 +856,16 @@ int getModelList(Params &params, Alignment *aln, StrVector &models, bool separat
         if (with_new) {
             if (with_asc)
                 test_options = test_options_asc_new;
-            else if (seq_type == SEQ_PROTEIN)
-                test_options = test_options_noASC_I_new;
-            else
+            else if (seq_type == SEQ_DNA || seq_type == SEQ_BINARY || seq_type == SEQ_MORPH)
                 test_options = test_options_morph_new;
+            else
+                test_options = test_options_noASC_I_new;
         } else if (with_asc)
             test_options = test_options_asc;
-        else if (seq_type == SEQ_PROTEIN)
-            test_options = test_options_noASC_I;
-        else
+        else if (seq_type == SEQ_DNA || seq_type == SEQ_BINARY || seq_type == SEQ_MORPH)
             test_options = test_options_morph;
+        else
+            test_options = test_options_noASC_I;
 	} else {
         // normal data, use +I instead
         if (with_new) {
@@ -1146,10 +1173,13 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, vector<ModelInf
         }
     }
 
+    bool parallel_over_partitions = false;
+
 #ifdef _OPENMP
+    parallel_over_partitions = !params.model_test_and_tree && (in_tree->size() >= num_threads);
 //        for (i = 0; i < in_tree->size(); i++)
 //            cout << distID[i]+1 << "\t" << in_tree->part_info[distID[i]].name << "\t" << -dist[i] << endl;
-#pragma omp parallel for private(i) schedule(dynamic) reduction(+: lhsum, dfsum) if(!params.model_test_and_tree)
+#pragma omp parallel for private(i) schedule(dynamic) reduction(+: lhsum, dfsum) if(parallel_over_partitions)
 #endif
 	for (int j = 0; j < in_tree->size(); j++) {
         i = distID[j];
@@ -1159,11 +1189,10 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, vector<ModelInf
 		extractModelInfo(in_tree->part_info[i].name, model_info, part_model_info);
         stringstream this_fmodel;
 		// do the computation
-//#ifdef _OPENMP
-		string model = testModel(params, this_tree, part_model_info, this_fmodel, models_block, 1, in_tree->part_info[i].name);
-//#else
-//		string model = testModel(params, this_tree, part_model_info, fmodel, in_tree->part_info[i].name);
-//#endif
+        string part_model_name;
+        if (params.model_name.empty())
+            part_model_name = in_tree->part_info[i].model_name;
+		string model = testModel(params, this_tree, part_model_info, this_fmodel, models_block, (parallel_over_partitions ? 1 : num_threads), in_tree->part_info[i].name, false, part_model_name);
 		double score = computeInformationScore(part_model_info[0].logl,part_model_info[0].df,
 				this_tree->getAlnNSite(),params.model_test_criterion);
 		in_tree->part_info[i].model_name = model;
@@ -1236,7 +1265,8 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, vector<ModelInf
         // compute distance between gene_sets
 		for (int part1 = 0; part1 < gene_sets.size()-1; part1++)
 			for (int part2 = part1+1; part2 < gene_sets.size(); part2++)
-			if (super_aln->partitions[gene_sets[part1][0]]->seq_type == super_aln->partitions[gene_sets[part2][0]]->seq_type)
+			if (super_aln->partitions[gene_sets[part1][0]]->seq_type == super_aln->partitions[gene_sets[part2][0]]->seq_type &&
+                super_aln->partitions[gene_sets[part1][0]]->genetic_code == super_aln->partitions[gene_sets[part2][0]]->genetic_code)
             {
 				// only merge partitions of the same data type
                 dist[num_pairs] = fabs(lenvec[part1] - lenvec[part2]);
@@ -1305,11 +1335,7 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, vector<ModelInf
                 if (params.model_test_and_tree) {
                     tree->setCheckpoint(new Checkpoint());
                 }
-//#ifdef _OPENMP
                 model = testModel(params, tree, part_model_info, this_fmodel, models_block, 1, set_name);
-//#else
-//                model = testModel(params, tree, part_model_info, fmodel, set_name);
-//#endif
                 logl = part_model_info[0].logl;
                 df = part_model_info[0].df;
                 treelen = part_model_info[0].tree_len;
@@ -1433,7 +1459,7 @@ bool isMixtureModel(ModelsBlock *models_block, string &model_str) {
 }
 
 string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_info, ostream &fmodel, ModelsBlock *models_block,
-    int num_threads, string set_name, bool print_mem_usage)
+    int num_threads, string set_name, bool print_mem_usage, string in_model_name)
 {
 	SeqType seq_type = in_tree->aln->seq_type;
 	if (in_tree->isSuperTree())
@@ -1446,13 +1472,18 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
 	sitelh_file += ".sitelh";
 	in_tree->params = ¶ms;
 	StrVector model_names;
-	int max_cats = getModelList(params, in_tree->aln, model_names, params.model_test_separate_rate);
+	int max_cats;
+    if (in_model_name.empty())
+        max_cats = getModelList(params, in_tree->aln, model_names, params.model_test_separate_rate);
+    else {
+        max_cats = params.max_rate_cats;
+        model_names.push_back(in_model_name);
+    }
 	int model;
 
     if (print_mem_usage) {
         uint64_t mem_size = in_tree->getMemoryRequired(max_cats);
-        cout << "NOTE: MODEL SELECTION REQUIRES " << (mem_size / 1024) / 1024
-                << " MB MEMORY!" << endl;
+        cout << "NOTE: ModelFinder requires " << (mem_size / 1024) / 1024 << " MB RAM!" << endl;
         if (mem_size >= getMemorySize()) {
             outError("Memory required exceeds your computer RAM size!");
         }
@@ -1524,10 +1555,15 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
 	if (params.model_test_sample_size)
 		ssize = params.model_test_sample_size;
 	if (set_name == "") {
-		cout << "Testing " << model_names.size() << " "
+		cout << "ModelFinder will test " << model_names.size() << " "
 			<< ((seq_type == SEQ_BINARY) ? "binary" : ((seq_type == SEQ_DNA) ? "DNA" :
 				((seq_type == SEQ_PROTEIN) ? "protein": ((seq_type == SEQ_CODON) ? "codon": "morphological"))))
 			<< " models (sample size: " << ssize << ") ..." << endl;
+        if (verbose_mode >= VB_MED) {
+            for (auto i = model_names.begin(); i != model_names.end(); i++)
+                cout << *i << " ";
+            cout << endl;
+        }
         if (params.model_test_and_tree == 0)
             cout << " No. Model         -LnL         df  AIC          AICc         BIC" << endl;
 	}
@@ -1689,6 +1725,7 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
         num_threads = tree->testNumThreads();
         omp_set_num_threads(num_threads);
     }
+    tree->warnNumThreads();
 #endif
 
 
@@ -1732,6 +1769,15 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
 		} else {
             if (params.model_test_and_tree) {
                 string original_model = params.model_name;
+                // BQM 2017-03-29: disable bootstrap
+                int orig_num_bootstrap_samples = params.num_bootstrap_samples;
+                int orig_gbo_replicates = params.gbo_replicates;
+                params.num_bootstrap_samples = 0;
+                params.gbo_replicates = 0;
+                STOP_CONDITION orig_stop_condition = params.stop_condition;
+                if (params.stop_condition == SC_BOOTSTRAP_CORRELATION)
+                    params.stop_condition = SC_UNSUCCESS_ITERATION;
+
                 params.model_name = model_names[model];
                 char *orig_user_tree = params.user_file;
                 string new_user_tree = (string)params.out_prefix+".treefile";
@@ -1758,8 +1804,14 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
                 info.logl = iqtree->computeLikelihood();
                 info.tree_len = iqtree->treeLength();
 //                info.tree = iqtree->getTreeString();
+
+                // restore original parameters
                 params.model_name = original_model;
                 params.user_file = orig_user_tree;
+                // 2017-03-29: restore bootstrap replicates
+                params.num_bootstrap_samples = orig_num_bootstrap_samples;
+                params.gbo_replicates = orig_gbo_replicates;
+                params.stop_condition = orig_stop_condition;
                 tree = iqtree;
 
                 // clear all checkpointed information
@@ -2448,7 +2500,7 @@ void performAUTest(Params &params, PhyloTree *tree, double *pattern_lhs, vector<
             for (tid = 0; tid < ntrees; tid++) {
                 double *pattern_lh = pattern_lhs + (tid*maxnptn);
                 double tree_lh;
-                if (params.SSE == LK_EIGEN) {
+                if (params.SSE == LK_386) {
                     tree_lh = 0.0;
                     for (ptn = 0; ptn < nptn; ptn++)
                         tree_lh += pattern_lh[ptn] * boot_sample_dbl[ptn];
diff --git a/phylotesting.h b/phylotesting.h
index 63f9889..1ab34d8 100644
--- a/phylotesting.h
+++ b/phylotesting.h
@@ -69,7 +69,7 @@ bool checkModelFile(string model_file, bool is_partitioned, vector<ModelInfo> &i
  @return name of best-fit-model
  */
 string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_info, ostream &fmodel,
-		ModelsBlock *models_block, int num_threads, string set_name = "", bool print_mem_usage = false);
+		ModelsBlock *models_block, int num_threads, string set_name = "", bool print_mem_usage = false, string in_model_name = "");
 
 /**
  * print site log likelihoods to a fileExists
@@ -115,6 +115,13 @@ void printSiteProbCategory(const char*filename, PhyloTree *tree, SiteLoglType ws
 void printSiteStateFreq(const char*filename, PhyloTree *tree, double *state_freqs = NULL);
 
 /**
+ * print site state frequency vectors (for Huaichun)
+ * @param filename output file name
+ * @param aln alignment
+*/
+void printSiteStateFreq(const char* filename, Alignment *aln);
+
+/**
     print ancestral sequences
     @param filename output file name
     @param tree phylogenetic tree
diff --git a/phylotree.cpp b/phylotree.cpp
index ffa9886..79fba26 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -96,8 +96,7 @@ void PhyloTree::init() {
     dist_matrix = NULL;
     var_matrix = NULL;
     params = NULL;
-    setLikelihoodKernel(LK_EIGEN_SSE, 1);  // FOR TUNG: you forgot to initialize this variable!
-    sse = LK_EIGEN_SSE;
+    setLikelihoodKernel(LK_SSE2, 1);  // FOR TUNG: you forgot to initialize this variable!
     num_threads = 0;
     max_lh_slots = 0;
     save_all_trees = 0;
@@ -1437,7 +1436,7 @@ void PhyloTree::computePatternProbabilityCategory(double *ptn_prob_cat, SiteLogl
 }
 
 int PhyloTree::computePatternCategories(IntVector *pattern_ncat) {
-    if (sse != LK_EIGEN) {
+    if (sse != LK_386) {
         // compute _pattern_lh_cat
         computePatternLhCat(WSL_MIXTURE_RATECAT);
     }
@@ -3129,6 +3128,11 @@ NNIMove PhyloTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove
 	}
 	assert(id == IT_NUM);
 
+    if (!params->nni5) {
+        reorientPartialLh((PhyloNeighbor*)node1->findNeighbor(node2), node1);
+        reorientPartialLh((PhyloNeighbor*)node2->findNeighbor(node1), node2);
+    }
+
 	Neighbor *saved_nei[6];
     int mem_id = 0;
 	// save Neighbor and allocate new Neighbor pointer
@@ -3143,7 +3147,8 @@ NNIMove PhyloTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove
         }
 //		((PhyloNeighbor*) (*saved_it[id]))->scale_num = newScaleNum();
 	}
-    assert(mem_id == 2);
+    if (params->nni5)
+        assert(mem_id == 2);
 
 	// get the Neighbor again since it is replaced for saving purpose
 	PhyloNeighbor* node12_it = (PhyloNeighbor*) node1->findNeighbor(node2);
@@ -3198,10 +3203,8 @@ NNIMove PhyloTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove
         Neighbor *node2_nei = *node2_it;
 
         // reorient partial_lh before swap
-        if (!isSuperTree()) {
-            reorientPartialLh(node12_it, node1);
-            reorientPartialLh(node21_it, node2);
-        }
+        reorientPartialLh(node12_it, node1);
+        reorientPartialLh(node21_it, node2);
 
         node1->updateNeighbor(node1_it, node2_nei);
         node2_nei->node->updateNeighbor(node2, node1);
@@ -3262,10 +3265,8 @@ NNIMove PhyloTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove
 		}
 
         // reorient partial_lh before swap
-        if (!isSuperTree()) {
-            reorientPartialLh(node12_it, node1);
-            reorientPartialLh(node21_it, node2);
-        }
+        reorientPartialLh(node12_it, node1);
+        reorientPartialLh(node21_it, node2);
 
         // else, swap back, also recover the branch lengths
 		node1->updateNeighbor(node1_it, node1_nei);
@@ -4579,10 +4580,11 @@ void PhyloTree::printTransMatrices(Node *node, Node *dad) {
 void PhyloTree::removeIdenticalSeqs(Params &params) {
 	// commented out because it also works for SuperAlignment now!
 	Alignment *new_aln;
+    // 2017-03-31: always keep two identical sequences no matter if -bb or not, to avoid conflict between 2 subsequent runs
 	if (params.root)
-		new_aln = aln->removeIdenticalSeq((string)params.root, params.gbo_replicates > 0, removed_seqs, twin_seqs);
+		new_aln = aln->removeIdenticalSeq((string)params.root, true, removed_seqs, twin_seqs);
 	else
-		new_aln = aln->removeIdenticalSeq("", params.gbo_replicates > 0, removed_seqs, twin_seqs);
+		new_aln = aln->removeIdenticalSeq("", true, removed_seqs, twin_seqs);
 	if (removed_seqs.size() > 0) {
 		cout << "NOTE: " << removed_seqs.size() << " identical sequences (see below) will be ignored for subsequent analysis" << endl;
         for (int i = 0; i < removed_seqs.size(); i++) {
@@ -4720,6 +4722,7 @@ void PhyloTree::sortNeighborBySubtreeSize(PhyloNode *node, PhyloNode *dad) {
 */
 
 void PhyloTree::reorientPartialLh(PhyloNeighbor* dad_branch, Node *dad) {
+    ASSERT(!isSuperTree());
     if (dad_branch->partial_lh)
         return;
     Node * node = dad_branch->node;
@@ -4734,3 +4737,113 @@ void PhyloTree::reorientPartialLh(PhyloNeighbor* dad_branch, Node *dad) {
         assert(dad_branch->partial_lh && "partial_lh is not re-oriented");
 }
 
+/****************************************************************************
+        helper functions for computing tree traversal
+ ****************************************************************************/
+
+bool PhyloTree::computeTraversalInfo(PhyloNeighbor *dad_branch, PhyloNode *dad, double* &buffer) {
+
+    size_t nstates = aln->num_states;
+    PhyloNode *node = (PhyloNode*)dad_branch->node;
+
+    if ((dad_branch->partial_lh_computed & 1) || node->isLeaf()) {
+        return mem_slots.lock(dad_branch);
+    }
+
+
+    size_t num_leaves = 0;
+    bool locked[node->degree()];
+    memset(locked, 0, node->degree());
+
+    // sort neighbor in desceding size order
+    NeighborVec neivec = node->neighbors;
+    NeighborVec::iterator it, i2;
+    for (it = neivec.begin(); it != neivec.end(); it++)
+        for (i2 = it+1; i2 != neivec.end(); i2++)
+            if (((PhyloNeighbor*)*it)->size < ((PhyloNeighbor*)*i2)->size) {
+                Neighbor *nei = *it;
+                *it = *i2;
+                *i2 = nei;
+            }
+
+
+    // recursive
+    for (it = neivec.begin(); it != neivec.end(); it++)
+        if ((*it)->node != dad) {
+            locked[it - neivec.begin()] = computeTraversalInfo((PhyloNeighbor*)(*it), node, buffer);
+            if ((*it)->node->isLeaf())
+                num_leaves++;
+        }
+    dad_branch->partial_lh_computed |= 1;
+
+    // prepare information for this branch
+    TraversalInfo info(dad_branch, dad);
+    info.echildren = info.partial_lh_leaves = NULL;
+
+    // re-orient partial_lh
+    reorientPartialLh(dad_branch, dad);
+
+    if (!dad_branch->partial_lh || mem_slots.locked(dad_branch)) {
+        // still no free entry found, memory saving technique
+        int slot_id = mem_slots.allocate(dad_branch);
+        if (slot_id < 0) {
+            cout << "traversal order:";
+            for (auto it = traversal_info.begin(); it != traversal_info.end(); it++) {
+                it->dad_branch->node->name = convertIntToString(it->dad_branch->size);
+                cout << "  ";
+                if (it->dad->isLeaf())
+                    cout << it->dad->name;
+                else
+                    cout << it->dad->id;
+                cout << "->";
+                if (it->dad_branch->node->isLeaf())
+                    cout << it->dad_branch->node->name;
+                else
+                    cout << it->dad_branch->node->id;
+                if (params->lh_mem_save == LM_MEM_SAVE) {
+                    if (it->dad_branch->partial_lh_computed)
+                        cout << " [";
+                    else
+                        cout << " (";
+                    cout << mem_slots.findNei(it->dad_branch) - mem_slots.begin();
+                    if (it->dad_branch->partial_lh_computed)
+                        cout << "]";
+                    else
+                        cout << ")";
+                }
+            }
+            cout << endl;
+            drawTree(cout);
+            assert(0 && "No free/unlocked mem slot found!");
+        }
+    } else
+        mem_slots.update(dad_branch);
+
+        if (verbose_mode >= VB_MED && params->lh_mem_save == LM_MEM_SAVE) {
+            int slot_id = mem_slots.findNei(dad_branch) - mem_slots.begin();
+            node->name = convertIntToString(slot_id);
+            cout << "Branch " << dad->id << "-" << node->id << " assigned slot " << slot_id << endl;
+        }
+
+    if (params->lh_mem_save == LM_MEM_SAVE) {
+        for (it = neivec.begin(); it != neivec.end(); it++)
+            if ((*it)->node != dad) {
+                if (!(*it)->node->isLeaf() && locked[it-neivec.begin()])
+                    mem_slots.unlock((PhyloNeighbor*)*it);
+            }
+    }
+
+    if (!model->isSiteSpecificModel()) {
+        //------- normal model -----
+        info.echildren = buffer;
+        size_t block = nstates * ((model_factory->fused_mix_rate) ? site_rate->getNRate() : site_rate->getNRate()*model->getNMixtures());
+        buffer += get_safe_upper_limit(block*nstates*(node->degree()-1));
+        if (num_leaves) {
+            info.partial_lh_leaves = buffer;
+            buffer += get_safe_upper_limit((aln->STATE_UNKNOWN+1)*block*num_leaves);
+        }
+    }
+
+    traversal_info.push_back(info);
+    return mem_slots.lock(dad_branch);
+}
diff --git a/phylotree.h b/phylotree.h
index 09f66a9..0949410 100644
--- a/phylotree.h
+++ b/phylotree.h
@@ -50,7 +50,7 @@
 #define BootValType float
 //#define BootValType double
 
-extern int instruction_set;
+//extern int instruction_set;
 
 #define SAFE_LH   true  // safe likelihood scaling to avoid numerical underflow for ultra large trees
 #define NORM_LH  false // normal likelihood scaling
@@ -73,11 +73,11 @@ const int SPR_DEPTH = 2;
 //using namespace Eigen;
 
 inline size_t get_safe_upper_limit(size_t cur_limit) {
-	if (instruction_set >= 9)
+	if (Params::getInstance().SSE >= LK_AVX512)
 		// AVX-512
 		return ((cur_limit+7)/8)*8;
 	else
-	if (instruction_set >= 7)
+	if (Params::getInstance().SSE >= LK_AVX)
 		// AVX
 		return ((cur_limit+3)/4)*4;
 	else
@@ -86,11 +86,11 @@ inline size_t get_safe_upper_limit(size_t cur_limit) {
 }
 
 inline size_t get_safe_upper_limit_float(size_t cur_limit) {
-	if (instruction_set >= 9)
-		// AVX
+	if (Params::getInstance().SSE >= LK_AVX512)
+		// AVX-512
 		return ((cur_limit+15)/16)*16;
 	else
-	if (instruction_set >= 7)
+	if (Params::getInstance().SSE >= LK_AVX)
 		// AVX
 		return ((cur_limit+7)/8)*8;
 	else
@@ -98,21 +98,9 @@ inline size_t get_safe_upper_limit_float(size_t cur_limit) {
 		return ((cur_limit+3)/4)*4;
 }
 
-//inline double *aligned_alloc_double(size_t size) {
-//	size_t MEM_ALIGNMENT = (instruction_set >= 7) ? 32 : 16;
-//
-//#if defined WIN32 || defined _WIN32 || defined __WIN32__
-//	return (double*)_aligned_malloc(size*sizeof(double), MEM_ALIGNMENT);
-//#else
-//	void *res;
-//	posix_memalign(&res, MEM_ALIGNMENT, size*sizeof(double));
-//	return (double*)res;
-//#endif
-//}
-
 template< class T>
 inline T *aligned_alloc(size_t size) {
-	size_t MEM_ALIGNMENT = (instruction_set >= 9) ? 64 : ((instruction_set >= 7) ? 32 : 16);
+	size_t MEM_ALIGNMENT = (Params::getInstance().SSE >= LK_AVX512) ? 64 : ((Params::getInstance().SSE >= LK_AVX) ? 32 : 16);
     void *mem;
 
 #if defined WIN32 || defined _WIN32 || defined __WIN32__
@@ -721,7 +709,7 @@ public:
     /**
         compute traversal_info of a subtree
     */
-    inline bool computeTraversalInfo(PhyloNeighbor *dad_branch, PhyloNode *dad, double* &buffer);
+    bool computeTraversalInfo(PhyloNeighbor *dad_branch, PhyloNode *dad, double* &buffer);
 
 
     /**
@@ -1509,6 +1497,11 @@ public:
     */
     int testNumThreads();
 
+    /**
+        print warning about too many threads for short alignments
+    */
+    void warnNumThreads();
+
     /****************************************************************************
             Subtree Pruning and Regrafting by maximum likelihood
             NOTE: NOT DONE YET
@@ -1950,7 +1943,7 @@ protected:
      */
     UINT *central_partial_pars;
 
-    void reorientPartialLh(PhyloNeighbor* dad_branch, Node *dad);
+    virtual void reorientPartialLh(PhyloNeighbor* dad_branch, Node *dad);
 
     //----------- memory saving technique ------//
 
diff --git a/phylotreeavx.cpp b/phylotreeavx.cpp
index ff39ddf..991c834 100644
--- a/phylotreeavx.cpp
+++ b/phylotreeavx.cpp
@@ -79,10 +79,10 @@ void PhyloTree::setLikelihoodKernelAVX() {
             computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferSIMD<Vec4d, NORM_LH, 20, false, true>;
             break;
         default:
-            computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD        <Vec4d, NORM_LH, false, true>;
-            computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD            <Vec4d, NORM_LH, false, true>;
-            computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD      <Vec4d, NORM_LH, false, true>;
-            computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec4d, NORM_LH, false, true>;
+            computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD        <Vec4d, SAFE_LH, false, true>;
+            computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD            <Vec4d, SAFE_LH, false, true>;
+            computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD      <Vec4d, SAFE_LH, false, true>;
+            computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec4d, SAFE_LH, false, true>;
             break;
         }
         return;
@@ -158,10 +158,10 @@ void PhyloTree::setLikelihoodKernelAVX() {
 		break;
     */
 	default:
-        computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec4d, NORM_LH>;
-        computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec4d, NORM_LH>;
-        computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec4d, NORM_LH>;
-        computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec4d, NORM_LH>;
+        computeLikelihoodBranchPointer = &PhyloTree::computeLikelihoodBranchGenericSIMD<Vec4d, SAFE_LH>;
+        computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec4d, SAFE_LH>;
+        computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec4d, SAFE_LH>;
+        computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec4d, SAFE_LH>;
 		break;
 	}
 }
diff --git a/phylotreesse.cpp b/phylotreesse.cpp
index 32f3ecb..ae47e8c 100644
--- a/phylotreesse.cpp
+++ b/phylotreesse.cpp
@@ -42,16 +42,16 @@
 
 void PhyloTree::setParsimonyKernel(LikelihoodKernel lk) {
     // set parsimony kernel
-    if (lk == LK_EIGEN || instruction_set < 2) {
+    if (lk < LK_SSE2) {
         computeParsimonyBranchPointer = &PhyloTree::computeParsimonyBranchFast;
         computePartialParsimonyPointer = &PhyloTree::computePartialParsimonyFast;
     	return;
     }
-    if (instruction_set >= 7) {
+    if (lk >= LK_AVX) {
         setParsimonyKernelAVX();
         return;
     }
-    if (instruction_set >= 2) {
+    if (lk >= LK_SSE2) {
         setParsimonyKernelSSE();
         return;
     }
@@ -67,13 +67,17 @@ void PhyloTree::setLikelihoodKernel(LikelihoodKernel lk, int num_threads) {
     //--- parsimony kernel ---
     setParsimonyKernel(lk);
 
-    bool has_fma = (hasFMA3()) && (instruction_set >= 7) && (Params::getInstance().lk_no_avx != 2);
     //--- dot-product kernel ---
-    if (has_fma) {
+#ifdef INCLUDE_AVX512
+    if (lk >= LK_AVX512) {
+		setDotProductAVX512();
+    } else
+#endif
+    if (lk >= LK_AVX_FMA) {
 		setDotProductFMA();
-	} else if (instruction_set >= 7) {
+	} else if (lk >= LK_AVX) {
 		setDotProductAVX();
-    } else if (instruction_set >= 2) {
+    } else if (lk >= LK_SSE2) {
         setDotProductSSE();
 	} else {
 
@@ -96,29 +100,29 @@ void PhyloTree::setLikelihoodKernel(LikelihoodKernel lk, int num_threads) {
         computeLikelihoodDervPointer = &PhyloTree::computeLikelihoodDervGenericSIMD<Vec1d, SAFE_LH>;
         computePartialLikelihoodPointer = &PhyloTree::computePartialLikelihoodGenericSIMD<Vec1d, SAFE_LH>;
         computeLikelihoodFromBufferPointer = &PhyloTree::computeLikelihoodFromBufferGenericSIMD<Vec1d, SAFE_LH>;
-        sse = LK_EIGEN;
+        sse = LK_386;
 #else
         computeLikelihoodBranchPointer = NULL;
         computeLikelihoodDervPointer = NULL;
         computePartialLikelihoodPointer = NULL;
         computeLikelihoodFromBufferPointer = NULL;
-        sse = LK_EIGEN;
+        sse = LK_386;
 #endif
         return;
     }
 
     //--- SIMD kernel ---
-    if (sse == LK_EIGEN_SSE && instruction_set >= 2) {
+    if (lk >= LK_SSE2) {
 #ifdef INCLUDE_AVX512
-    	if (instruction_set >= 9) {
+    	if (lk >= LK_AVX512) {
     		setLikelihoodKernelAVX512();
     		return;
     	}
 #endif
-    	if (has_fma) {
+    	if (lk >= LK_AVX_FMA) {
             // CPU supports AVX and FMA
             setLikelihoodKernelFMA();
-        } else if (instruction_set >= 7) {
+        } else if (lk >= LK_AVX) {
             // CPU supports AVX
             setLikelihoodKernelAVX();
         } else {
diff --git a/superalignment.cpp b/superalignment.cpp
index 8f56c40..7857377 100644
--- a/superalignment.cpp
+++ b/superalignment.cpp
@@ -200,7 +200,9 @@ Alignment *SuperAlignment::removeIdenticalSeq(string not_remove, bool keep_two,
 					removed_seqs.push_back(getSeqName(seq2));
 					target_seqs.push_back(getSeqName(seq1));
 					removed[seq2] = true;
-				}
+				} else {
+                    cout << "NOTE: " << getSeqName(seq2) << " is identical to " << getSeqName(seq1) << " but kept for subsequent analysis" << endl;
+                }
 				checked[seq2] = 1;
 				first_ident_seq = false;
 			}
@@ -223,6 +225,13 @@ Alignment *SuperAlignment::removeIdenticalSeq(string not_remove, bool keep_two,
 	return aln;
 }
 
+int SuperAlignment::checkAbsentStates(string msg) {
+    int count = 0;
+    for (auto it = partitions.begin(); it != partitions.end(); it++)
+        count += (*it)->checkAbsentStates("partition " + convertIntToString((it-partitions.begin())+1));
+    return count;
+}
+
 /*
 void SuperAlignment::checkGappySeq() {
 	int nseq = getNSeq(), part = 0, i;
@@ -271,7 +280,7 @@ void SuperAlignment::getSitePatternIndex(IntVector &pattern_index) {
 }
 
 void SuperAlignment::getPatternFreq(IntVector &pattern_freq) {
-	if (!isSuperAlignment()) outError("Internal error: ", __func__);
+	ASSERT(isSuperAlignment());
 	int offset = 0;
 	if (!pattern_freq.empty()) pattern_freq.resize(0);
 	for (vector<Alignment*>::iterator it = partitions.begin(); it != partitions.end(); it++) {
@@ -283,22 +292,78 @@ void SuperAlignment::getPatternFreq(IntVector &pattern_freq) {
 }
 
 void SuperAlignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq, const char *spec) {
-	if (!aln->isSuperAlignment()) outError("Internal error: ", __func__);
-	if (pattern_freq) outError("Unsupported yet.", __func__);
-	if (spec) outError("Unsupported yet.", __func__);
+	ASSERT(aln->isSuperAlignment());
 	Alignment::copyAlignment(aln);
 	SuperAlignment *super_aln = (SuperAlignment*) aln;
-	if (!partitions.empty()) outError("Internal error: ", __func__);
-	for (vector<Alignment*>::iterator it = super_aln->partitions.begin(); it != super_aln->partitions.end(); it++) {
-		Alignment *boot_aln = new Alignment;
-		boot_aln->createBootstrapAlignment(*it);
-		partitions.push_back(boot_aln);
-	}
+	ASSERT(partitions.empty());
+
+	if (spec && strncmp(spec, "GENE", 4) == 0) {
+		// resampling whole genes
+        partitions.resize(super_aln->partitions.size(), NULL);
+        int i, ptn;
+        for (i = 0; i < super_aln->partitions.size(); i++) {
+			int part = random_int(super_aln->partitions.size());
+            if (!partitions[part]) {
+                // allocate the partition
+                partitions[part] = new Alignment;
+                if (strncmp(spec,"GENESITE",8) == 0) {
+                    partitions[part]->createBootstrapAlignment(super_aln->partitions[part]);
+                } else
+                    partitions[part]->copyAlignment(super_aln->partitions[part]);
+            } else {
+                Alignment *newaln;
+                if (strncmp(spec,"GENESITE",8) == 0) {
+                    Alignment *newaln = new Alignment;
+                    newaln->createBootstrapAlignment(super_aln->partitions[part]);
+                } else
+                     newaln = super_aln->partitions[part];
+
+                for (ptn = 0; ptn < super_aln->partitions[part]->size(); ptn++)
+                    partitions[part]->at(ptn).frequency += newaln->at(ptn).frequency;
+                if (strncmp(spec,"GENESITE",8) == 0)
+                    delete newaln;
+            }
+        }
+
+        // fulfill genes that are missing
+        for (i = 0; i < partitions.size(); i++)
+            if (!partitions[i]) {
+                partitions[i] = new Alignment;
+                partitions[i]->copyAlignment(super_aln->partitions[i]);
+                // reset all frequency
+                for (ptn = 0; ptn < partitions[i]->size(); ptn++)
+                    partitions[i]->at(ptn).frequency = 0;
+            }
+
+        // fill up pattern_freq vector
+        if (pattern_freq) {
+            pattern_freq->resize(0);
+            for (i = 0; i < partitions.size(); i++)
+                for (ptn = 0; ptn < partitions[i]->size(); ptn++)
+                    pattern_freq->push_back(partitions[i]->at(ptn).frequency);
+        }
+    } else if (!spec) {
+		// resampling sites within genes
+        for (vector<Alignment*>::iterator it = super_aln->partitions.begin(); it != super_aln->partitions.end(); it++) {
+            Alignment *boot_aln = new Alignment;
+            if (pattern_freq) {
+                IntVector part_pattern_freq;
+                boot_aln->createBootstrapAlignment(*it, &part_pattern_freq);
+                pattern_freq->insert(pattern_freq->end(), part_pattern_freq.begin(), part_pattern_freq.end());
+            } else {
+                boot_aln->createBootstrapAlignment(*it);
+            }
+            partitions.push_back(boot_aln);
+        }
+    } else {
+        outError("Wrong -bsam, either -bsam GENE or -bsam GENESITE");
+    }
 	taxa_index = super_aln->taxa_index;
+    countConstSite();
 }
 
 void SuperAlignment::createBootstrapAlignment(IntVector &pattern_freq, const char *spec) {
-	if (!isSuperAlignment()) outError("Internal error: ", __func__);
+	ASSERT(isSuperAlignment());
 	int nptn = 0;
 	for (vector<Alignment*>::iterator it = partitions.begin(); it != partitions.end(); it++) {
 		nptn += (*it)->getNPattern();
@@ -309,39 +374,11 @@ void SuperAlignment::createBootstrapAlignment(IntVector &pattern_freq, const cha
 	pattern_freq.insert(pattern_freq.end(), internal_freq, internal_freq + nptn);
 	delete [] internal_freq;
 
-/*	if (spec && strncmp(spec, "GENE", 4) != 0) outError("Unsupported yet.", __func__);
-
-	int offset = 0;
-	if (!pattern_freq.empty()) pattern_freq.resize(0);
-
-	if (spec && strncmp(spec, "GENE", 4) == 0) {
-		// resampling whole genes
-		int nptn = 0;
-		IntVector part_pos;
-		for (vector<Alignment*>::iterator it = partitions.begin(); it != partitions.end(); it++) {
-			part_pos.push_back(nptn);
-			nptn += (*it)->getNPattern();
-		}
-		pattern_freq.resize(nptn, 0);
-		for (int i = 0; i < partitions.size(); i++) {
-			int part = random_int(partitions.size());
-			for (int j = 0; j < partitions[part]->getNPattern(); j++)
-				pattern_freq[j + part_pos[part]] += partitions[part]->at(j).frequency;
-		}
-	} else {
-		// resampling sites within genes
-		for (vector<Alignment*>::iterator it = partitions.begin(); it != partitions.end(); it++) {
-			IntVector freq;
-			(*it)->createBootstrapAlignment(freq);
-			pattern_freq.insert(pattern_freq.end(), freq.begin(), freq.end());
-			offset += freq.size();
-		}
-	}*/
 }
 
 
 void SuperAlignment::createBootstrapAlignment(int *pattern_freq, const char *spec, int *rstream) {
-	if (!isSuperAlignment()) outError("Internal error: ", __func__);
+	ASSERT(isSuperAlignment());
 //	if (spec && strncmp(spec, "GENE", 4) != 0) outError("Unsupported yet. ", __func__);
 
 	if (spec && strncmp(spec, "GENE", 4) == 0) {
@@ -385,7 +422,7 @@ void SuperAlignment::createBootstrapAlignment(int *pattern_freq, const char *spe
  * shuffle alignment by randomizing the order of sites
  */
 void SuperAlignment::shuffleAlignment() {
-	if (!isSuperAlignment()) outError("Internal error: ", __func__);
+	ASSERT(isSuperAlignment());
 	for (vector<Alignment*>::iterator it = partitions.begin(); it != partitions.end(); it++) {
 		(*it)->shuffleAlignment();
 	}
@@ -551,8 +588,7 @@ Alignment *SuperAlignment::concatenateAlignments(IntVector &ids) {
 	SeqType sub_type = SEQ_UNKNOWN;
 	for (i = 0; i < ids.size(); i++) {
 		int id = ids[i];
-		if (id < 0 || id >= partitions.size())
-			outError("Internal error ", __func__);
+		ASSERT(id >= 0 && id < partitions.size());
 		if (nstates == 0) nstates = partitions[id]->num_states;
 		if (sub_type == SEQ_UNKNOWN) sub_type = partitions[id]->seq_type;
 		if (sub_type != partitions[id]->seq_type)
@@ -580,6 +616,12 @@ Alignment *SuperAlignment::concatenateAlignments(IntVector &ids) {
     aln->pattern_index.clear();
     aln->STATE_UNKNOWN = partitions[ids[0]]->STATE_UNKNOWN;
     aln->genetic_code = partitions[ids[0]]->genetic_code;
+    if (aln->seq_type == SEQ_CODON) {
+    	aln->codon_table = new char[aln->num_states];
+    	memcpy(aln->codon_table, partitions[ids[0]]->codon_table, aln->num_states);
+    	aln->non_stop_codon = new char[strlen(aln->genetic_code)];
+    	memcpy(aln->non_stop_codon, partitions[ids[0]]->non_stop_codon, strlen(aln->genetic_code));
+    }
 
     int site = 0;
     for (i = 0; i < ids.size(); i++) {
diff --git a/superalignment.h b/superalignment.h
index ba47436..f4605dd 100644
--- a/superalignment.h
+++ b/superalignment.h
@@ -124,6 +124,13 @@ public:
     virtual Alignment *removeIdenticalSeq(string not_remove, bool keep_two, StrVector &removed_seqs, StrVector &target_seqs);
 
 
+    /*
+        check if some state is absent, which may cause numerical issues
+        @param msg additional message into the warning
+        @return number of absent states in the alignment
+    */
+    virtual int checkAbsentStates(string msg);
+
 	/**
 		Quit if some sequences contain only gaps or missing data
 	*/
diff --git a/tools.cpp b/tools.cpp
index fecb07f..255e802 100644
--- a/tools.cpp
+++ b/tools.cpp
@@ -158,6 +158,39 @@ double randomLen(Params &params) {
     return len;
 }
 
+std::istream& safeGetline(std::istream& is, std::string& t)
+{
+    t.clear();
+
+    // The characters in the stream are read one-by-one using a std::streambuf.
+    // That is faster than reading them one-by-one using the std::istream.
+    // Code that uses streambuf this way must be guarded by a sentry object.
+    // The sentry object performs various tasks,
+    // such as thread synchronization and updating the stream state.
+
+    std::istream::sentry se(is, true);
+    std::streambuf* sb = is.rdbuf();
+
+    for(;;) {
+        int c = sb->sbumpc();
+        switch (c) {
+        case '\n':
+            return is;
+        case '\r':
+            if(sb->sgetc() == '\n')
+                sb->sbumpc();
+            return is;
+        case EOF:
+            // Also handle the case when the last line has no line ending
+            if(t.empty())
+                is.setstate(std::ios::eofbit);
+            return is;
+        default:
+            t += (char)c;
+        }
+    }
+}
+
 //From Tung
 
 string convertIntToString(int number) {
@@ -758,7 +791,7 @@ void parseArg(int argc, char *argv[], Params &params) {
     params.popSize = 5;
     params.p_delete = -1;
     params.min_iterations = -1;
-    params.max_iterations = 1;
+    params.max_iterations = 1000;
     params.num_param_iterations = 100;
     params.stop_condition = SC_UNSUCCESS_ITERATION;
     params.stop_confidence = 0.95;
@@ -806,8 +839,11 @@ void parseArg(int argc, char *argv[], Params &params) {
     params.aLRT_test = false;
     params.aBayes_test = false;
     params.localbp_replicates = 0;
-    params.SSE = LK_EIGEN_SSE;
-    params.lk_no_avx = 0;
+#ifdef INCLUDE_AVX512
+    params.SSE = LK_AVX512;
+#else
+    params.SSE = LK_AVX_FMA;
+#endif
     params.lk_safe_scaling = false;
     params.numseq_safe_scaling = 2000;
     params.print_site_lh = WSL_NONE;
@@ -919,6 +955,7 @@ void parseArg(int argc, char *argv[], Params &params) {
     params.model_test_sample_size = 0;
     params.root_state = NULL;
     params.print_bootaln = false;
+    params.print_boot_site_freq = false;
 	params.print_subaln = false;
 	params.print_partition_info = false;
 	params.print_conaln = false;
@@ -1889,29 +1926,23 @@ void parseArg(int argc, char *argv[], Params &params) {
 					throw "Lambda must be in (0,1]";
 				continue;
 			}
-//			if (strcmp(argv[cnt], "-nosse") == 0) {
-//				params.SSE = LK_NORMAL;
-//				continue;
-//			}
-//			if (strcmp(argv[cnt], "-slowsse") == 0) {
-//				params.SSE = LK_SSE;
-//				continue;
-//			}
-			if (strcmp(argv[cnt], "-fastlk") == 0) {
-				params.SSE = LK_EIGEN;
-				continue;
-			}
-			if (strcmp(argv[cnt], "-fastsse") == 0
-					|| strcmp(argv[cnt], "-fasttipsse") == 0) {
-				params.SSE = LK_EIGEN_SSE;
-				continue;
-			}
-			if (strcmp(argv[cnt], "-noavx") == 0) {
-				params.lk_no_avx = 1;
-				continue;
-			}
-			if (strcmp(argv[cnt], "-nofma") == 0) {
-				params.lk_no_avx = 2;
+
+            if (strcmp(argv[cnt], "-lk") == 0) {
+				cnt++;
+				if (cnt >= argc)
+                    throw "-lk x86|SSE|AVX|FMA|AVX512";
+                if (strcmp(argv[cnt], "x86") == 0)
+                    params.SSE = LK_386;
+                else if (strcmp(argv[cnt], "SSE") == 0)
+                    params.SSE = LK_SSE2;
+                else if (strcmp(argv[cnt], "AVX") == 0)
+                    params.SSE = LK_AVX;
+                else if (strcmp(argv[cnt], "FMA") == 0)
+                    params.SSE = LK_AVX_FMA;
+                else if (strcmp(argv[cnt], "AVX512") == 0)
+                    params.SSE = LK_AVX512;
+                else
+                    throw "Incorrect -lk likelihood kernel option";
 				continue;
 			}
 
@@ -2155,10 +2186,10 @@ void parseArg(int argc, char *argv[], Params &params) {
 					params.consensus_type = CT_NONE;
 				continue;
 			}
-			if (strcmp(argv[cnt], "-bspec") == 0) {
+			if (strcmp(argv[cnt], "-bspec") == 0 || strcmp(argv[cnt], "-bsam") == 0) {
 				cnt++;
 				if (cnt >= argc)
-					throw "Use -bspec <bootstrap_specification>";
+					throw "Use -bsam <bootstrap_specification>";
 				params.bootstrap_spec = argv[cnt];
 				continue;
 			}
@@ -2375,6 +2406,10 @@ void parseArg(int argc, char *argv[], Params &params) {
 				params.print_bootaln = true;
 				continue;
 			}
+			if (strcmp(argv[cnt], "-wbsf") == 0) {
+				params.print_boot_site_freq = true;
+				continue;
+			}
 			if (strcmp(argv[cnt], "-wsa") == 0) {
 				params.print_subaln = true;
 				continue;
@@ -2547,7 +2582,7 @@ void parseArg(int argc, char *argv[], Params &params) {
 				cnt++;
 				if (cnt >= argc)
 					throw "Use -bb <#replicates>";
-                if (params.min_iterations != -1) {
+                if (params.stop_condition == SC_FIXED_ITERATION) {
                     outError("Ultrafast bootstrap does not work with -te or -n option");
                 }
 				params.gbo_replicates = convert_int(argv[cnt]);
@@ -2878,6 +2913,7 @@ void parseArg(int argc, char *argv[], Params &params) {
 				params.unsuccess_iteration = convert_int(argv[cnt]);
                 if (params.unsuccess_iteration <= 0)
                     throw "-nstop iterations must be positive";
+                params.max_iterations = max(params.max_iterations, params.unsuccess_iteration*10);
 				continue;
 			}
 			if (strcmp(argv[cnt], "-lsbran") == 0) {
@@ -3224,6 +3260,10 @@ void parseArg(int argc, char *argv[], Params &params) {
     if (params.gbo_replicates && params.num_bootstrap_samples)
         outError("UFBoot (-bb) and standard bootstrap (-b) must not be specified together");
 
+    if ((params.model_name.find("ONLY") != string::npos || (params.model_name.substr(0,2) == "MF" && params.model_name.substr(0,3) != "MFP")) && (params.gbo_replicates || params.num_bootstrap_samples))
+        outError("ModelFinder only cannot be combined with bootstrap analysis");
+
+
     if (!params.out_prefix) {
     	if (params.eco_dag_file)
     		params.out_prefix = params.eco_dag_file;
@@ -3322,7 +3362,7 @@ void usage_iqtree(char* argv[], bool full_command) {
     printCopyright(cout);
     cout << "Usage: " << argv[0] << " -s <alignment> [OPTIONS]" << endl << endl;
     cout << "GENERAL OPTIONS:" << endl
-            << "  -? or -h             Printing this help dialog" << endl
+            << "  -? or -h             Print this help dialog" << endl
             << "  -s <alignment>       Input alignment in PHYLIP/FASTA/NEXUS/CLUSTAL/MSF format" << endl
             << "  -st <data_type>      BIN, DNA, AA, NT2AA, CODON, MORPH (default: auto-detect)" << endl
             << "  -q <partition_file>  Edge-linked partition model (file in NEXUS/RAxML format)" << endl
@@ -3332,13 +3372,13 @@ void usage_iqtree(char* argv[], bool full_command) {
             << "                       Starting tree (default: 99 parsimony tree and BIONJ)" << endl
             << "  -te <user_tree_file> Like -t but fixing user tree (no tree search performed)" << endl
             << "  -o <outgroup_taxon>  Outgroup taxon name for writing .treefile" << endl
-            << "  -pre <PREFIX>        Using <PREFIX> for output files (default: aln/partition)" << endl
+            << "  -pre <PREFIX>        Prefix for all output files (default: aln/partition)" << endl
 #ifdef _OPENMP
             << "  -nt <#cpu_cores>     Number of cores/threads to use (REQUIRED)" << endl
 #endif
             << "  -seed <number>       Random seed number, normally used for debugging purpose" << endl
             << "  -v, -vv, -vvv        Verbose mode, printing more messages to screen" << endl
-            << "  -quiet               Silent mode, suppress printing to screen (stdout)" << endl
+            << "  -quiet               Quiet mode, suppress printing to screen (stdout)" << endl
             << "  -keep-ident          Keep identical sequences (default: remove & finally add)" << endl
             << "  -safe                Safe likelihood kernel to avoid numerical underflow" << endl
             << "  -mem RAM             Maximal RAM usage for memory saving mode" << endl
@@ -3354,7 +3394,7 @@ void usage_iqtree(char* argv[], bool full_command) {
             << "  -ninit <number>      Number of initial parsimony trees (default: 100)" << endl
             << "  -ntop <number>       Number of top initial trees (default: 20)" << endl
             << "  -nbest <number>      Number of best trees retained during search (defaut: 5)" << endl
-            << "  -n <#iterations>     Fix number of iterations to <#iterations> (default: auto)" << endl
+            << "  -n <#iterations>     Fix number of iterations to stop (default: auto)" << endl
             << "  -nstop <number>      Number of unsuccessful iterations to stop (default: 100)" << endl
             << "  -pers <proportion>   Perturbation strength for randomized NNI (default: 0.5)" << endl
             << "  -sprrad <number>     Radius for parsimony SPR search (default: 6)" << endl
@@ -3364,6 +3404,7 @@ void usage_iqtree(char* argv[], bool full_command) {
 //            << "  -iqpnni              Switch back to the old IQPNNI tree search algorithm" << endl
             << endl << "ULTRAFAST BOOTSTRAP:" << endl
             << "  -bb <#replicates>    Ultrafast bootstrap (>=1000)" << endl
+            << "  -bsam GENE|GENESITE  Resample GENE or GENE+SITE for partition (default: SITE)" << endl
             << "  -wbt                 Write bootstrap trees to .ufboot file (default: none)" << endl
             << "  -wbtl                Like -wbt but also writing branch lengths" << endl
 //            << "  -n <#iterations>     Minimum number of iterations (default: 100)" << endl
@@ -3381,32 +3422,32 @@ void usage_iqtree(char* argv[], bool full_command) {
             << "  -alrt 0              Parametric aLRT test (Anisimova and Gascuel 2006)" << endl
             << "  -abayes              approximate Bayes test (Anisimova et al. 2011)" << endl
             << "  -lbp <#replicates>   Fast local bootstrap probabilities" << endl
-            << endl << "AUTOMATIC MODEL SELECTION:" << endl
+            << endl << "MODEL-FINDER:" << endl
             << "  -m TESTONLY          Standard model selection (like jModelTest, ProtTest)" << endl
-            << "  -m TEST              Like -m TESTONLY but followed by tree reconstruction" << endl
-            << "  -m TESTNEWONLY       Extended model selection incl. FreeRate (+R) heterogeneity" << endl
-            << "  -m TESTNEW           Like -m TESTNEWONLY but followed by tree reconstruction" << endl
-            << "  -m TESTMERGEONLY     Select best-fit partition scheme (like PartitionFinder)" << endl
-            << "  -m TESTMERGE         Like -m TESTMERGEONLY but followed by tree reconstruction" << endl
-            << "  -m TESTNEWMERGEONLY  Like -m TESTMERGEONLY but includes FreeRate heterogeneity" << endl
-            << "  -m TESTNEWMERGE      Like -m TESTNEWMERGEONLY followed by tree reconstruction" << endl
+            << "  -m TEST              Standard model selection followed by tree inference" << endl
+            << "  -m MF                Extended model selection with FreeRate heterogeneity" << endl
+            << "  -m MFP               Extended model selection followed by tree inference" << endl
+            << "  -m TESTMERGEONLY     Find best partition scheme (like PartitionFinder)" << endl
+            << "  -m TESTMERGE         Find best partition scheme followed by tree inference" << endl
+            << "  -m MF+MERGE          Find best partition scheme incl. FreeRate heterogeneity" << endl
+            << "  -m MFP+MERGE         Like -m MF+MERGE followed by tree inference" << endl
             << "  -rcluster <percent>  Percentage of partition pairs (relaxed clustering alg.)" << endl
             << "  -mset program        Restrict search to models supported by other programs" << endl
-            << "                       (i.e., raxml, phyml or mrbayes)" << endl
+            << "                       (raxml, phyml or mrbayes)" << endl
             << "  -mset m1,...,mk      Restrict search to models in a comma-separated list" << endl
             << "                       (e.g. -mset WAG,LG,JTT)" << endl            
-            << "  -msub source         Restrict search to AA models designed for specific sources" << endl
-            << "                       (i.e., nuclear, mitochondrial, chloroplast or viral)" << endl            
+            << "  -msub source         Restrict search to AA models for specific sources" << endl
+            << "                       (nuclear, mitochondrial, chloroplast or viral)" << endl
             << "  -mfreq f1,...,fk     Restrict search to using a list of state frequencies" << endl
-            << "                       (default protein: -mfreq FU,F; codon: -mfreq ,F1x4,F3x4,F)" << endl            
-            << "  -mrate r1,...,rk     Restrict search to using a list of rate-across-sites models" << endl
-            << "                       (e.g. -mrate E,I,G,I+G,R)" << endl
+            << "                       (default AA: -mfreq FU,F; codon: -mfreq ,F1x4,F3x4,F)" << endl
+            << "  -mrate r1,...,rk     Restrict search to a list of rate-across-sites models" << endl
+            << "                       (e.g. -mrate E,I,G,I+G,R is used for -m MF)" << endl
             << "  -cmin <kmin>         Min #categories for FreeRate model [+R] (default: 2)" << endl
             << "  -cmax <kmax>         Max #categories for FreeRate model [+R] (default: 10)" << endl
-            << "  –merit AIC|AICc|BIC  Optimality criterion to use (default: all)" << endl
+            << "  -merit AIC|AICc|BIC  Optimality criterion to use (default: all)" << endl
 //            << "  -msep                Perform model selection and then rate selection" << endl
-            << "  -mtree               Performing full tree search for each model considered" << endl
-            << "  -mredo               Ignoring model results computed earlier (default: no)" << endl
+            << "  -mtree               Perform full tree search for each model considered" << endl
+            << "  -mredo               Ignore model results computed earlier (default: reuse)" << endl
             << "  -madd mx1,...,mxk    List of mixture models to also consider" << endl
             << "  -mdef <nexus_file>   A model definition NEXUS file (see Manual)" << endl
 
@@ -3418,8 +3459,7 @@ void usage_iqtree(char* argv[], bool full_command) {
             << "              Protein: LG (default), Poisson, cpREV, mtREV, Dayhoff, mtMAM," << endl
             << "                       JTT, WAG, mtART, mtZOA, VT, rtREV, DCMut, PMB, HIVb," << endl
             << "                       HIVw, JTTDCMut, FLU, Blosum62, GTR20" << endl
-            << "      Protein mixture: C10,...,C60, EX2, EX3, EHO, UL2, UL3, EX_EHO, LG4M, LG4X," << endl
-            << "                       JTTCF4G" << endl
+            << "      Protein mixture: C10,...,C60, EX2, EX3, EHO, UL2, UL3, EX_EHO, LG4M, LG4X" << endl
             << "               Binary: JC2 (default), GTR2" << endl
             << "      Empirical codon: KOSI07, SCHN05" << endl 
             << "    Mechanistic codon: GY (default), MG, MGK, GY0K, GY1KTS, GY1KTV, GY2K," << endl
@@ -3435,10 +3475,10 @@ void usage_iqtree(char* argv[], bool full_command) {
             << "  -m <model_name>+ASC  Ascertainment bias correction for morphological/SNP data" << endl
             << "  -m \"MIX{m1,...mK}\"   Mixture model with K components" << endl
             << "  -m \"FMIX{f1,...fK}\"  Frequency mixture model with K components" << endl
-            << "  -mwopt               Turn on optimizing mixture weights (default: none)" << endl
+            << "  -mwopt               Turn on optimizing mixture weights (default: auto)" << endl
             << endl << "RATE HETEROGENEITY AMONG SITES:" << endl
             << "  -m <model_name>+I or +G[n] or +I+G[n] or +R[n]" << endl
-            << "                       Invar, Gamma, Invar+Gamma, or FreeRate model where 'n' is" << endl
+            << "                       Invar, Gamma, Invar+Gamma, or FreeRate model where n is" << endl
             << "                       number of categories (default: n=4)" << endl
             << "  -a <Gamma_shape>     Gamma shape parameter for site rates (default: estimate)" << endl
             << "  -amin <min_shape>    Min Gamma shape parameter for site rates (default: 0.02)" << endl
@@ -3451,7 +3491,7 @@ void usage_iqtree(char* argv[], bool full_command) {
             << endl << "SITE-SPECIFIC FREQUENCY MODEL:" << endl 
             << "  -ft <tree_file>      Input tree to infer site frequency model" << endl
             << "  -fs <in_freq_file>   Input site frequency model file" << endl
-            << "  -fmax                Posterior maximum instead of posterior mean approximation" << endl
+            << "  -fmax                Posterior maximum instead of mean approximation" << endl
             //<< "  -wsf                 Write site frequency model to .sitefreq file" << endl
             //<< "  -c <#categories>     Number of Gamma rate categories (default: 4)" << endl
 //            << endl << "TEST OF MODEL HOMOGENEITY:" << endl
@@ -3525,7 +3565,7 @@ void usage_iqtree(char* argv[], bool full_command) {
             << "  -wspmr               Write site probabilities per mixture+rate class" << endl
 			<< "  -wpl                 Write partition log-likelihoods to .partlh file" << endl
             << "  -fconst f1,...,fN    Add constant patterns into alignment (N=#nstates)" << endl
-            << "  -me <epsilon>        Logl epsilon for model parameter optimization (default 0.01)" << endl
+            << "  -me <epsilon>        LogL epsilon for parameter estimation (default 0.01)" << endl
             << "  --no-outfiles        Suppress printing output files" << endl;
 //            << "  -d <file>            Reading genetic distances from file (default: JC)" << endl
 //			<< "  -d <outfile>         Calculate the distance matrix inferred from tree" << endl
@@ -3544,22 +3584,25 @@ void usage_iqtree(char* argv[], bool full_command) {
 
 void quickStartGuide() {
     printCopyright(cout);
-    cout << "Minimal command-line examples (replace 'iqtree ...' with actual path to executable):" << endl << endl
-        << "1. Reconstruct maximum-likelihood tree from a sequence alignment (example.phy)" << endl
-         << "   with the best-fit substitution model automatically selected:" << endl
-         << "     iqtree -s example.phy -m TEST" << endl << endl
-         << "2. Reconstruct ML tree and assess branch supports with ultrafast bootstrap" << endl
-         << "   and SH-aLRT test (1000 replicates):" << endl
-         << "     iqtree -s example.phy -m TEST -alrt 1000 -bb 1000" << endl << endl
-         << "3. Perform partitioned analysis with partition definition file (example.nex)" << endl
-         << "   in Nexus or RAxML format using edge-linked model and gene-specific rates:" << endl
-         << "     iqtree -s example.phy -spp example.nex -m TEST" << endl << endl
-         << "   (for edge-unlinked model replace '-spp' with '-sp' option)" << endl << endl
-         << "4. Merge partitions to reduce model complexity:" << endl
-         << "     iqtree -s example.phy -sp example.nex -m TESTMERGE" << endl << endl
-         << "5. Perform model selection only: use '-m TESTONLY' or '-m TESTMERGEONLY'" << endl << endl
+    cout << "Command-line examples (replace 'iqtree ...' by actual path to executable):" << endl << endl
+         << "1. Infer maximum-likelihood tree from a sequence alignment (example.phy)" << endl
+         << "   with the best-fit model automatically selected by ModelFinder:" << endl
+         << "     iqtree -s example.phy" << endl << endl
+         << "2. Perform ModelFinder without subsequent tree inference:" << endl
+         << "     iqtree -s example.phy -m MF" << endl
+         << "   (use '-m TEST' to resemble jModelTest/ProtTest)" << endl << endl
+         << "3. Combine ModelFinder, tree search, ultrafast bootstrap and SH-aLRT test:" << endl
+         << "     iqtree -s example.phy -alrt 1000 -bb 1000" << endl << endl
+         << "4. Perform edge-linked proportional partition model (example.nex):" << endl
+         << "     iqtree -s example.phy -spp example.nex" << endl
+         << "   (replace '-spp' by '-sp' for edge-unlinked model)" << endl << endl
+         << "5. Find best partition scheme by possibly merging partitions:" << endl
+         << "     iqtree -s example.phy -sp example.nex -m MF+MERGE" << endl
+         << "   (use '-m TESTMERGEONLY' to resemble PartitionFinder)" << endl << endl
+         << "6. Find best partition scheme followed by tree inference and bootstrap:" << endl
+         << "     iqtree -s example.phy -spp example.nex -m MFP+MERGE -bb 1000" << endl << endl
 #ifdef _OPENMP
-         << "6. Use 4 CPU cores to speed up computation: add '-nt 4' option" << endl << endl
+         << "7. Use 4 CPU cores to speed up computation: add '-nt 4' option" << endl << endl
 #endif
          << "To show all available options: run 'iqtree -h'" << endl << endl
          << "Have a look at the tutorial and manual for more information:" << endl
diff --git a/tools.h b/tools.h
index 2e2f380..7e4613d 100644
--- a/tools.h
+++ b/tools.h
@@ -425,8 +425,20 @@ struct NNIInfo {
     int iqpnni_iteration;
 };
 
+/*
+    0           = 80386 instruction set
+    1  or above = SSE (XMM) supported by CPU (not testing for O.S. support)
+    2  or above = SSE2
+    3  or above = SSE3
+    4  or above = Supplementary SSE3 (SSSE3)
+    5  or above = SSE4.1
+    6  or above = SSE4.2
+    7  or above = AVX supported by CPU and operating system
+    8  or above = AVX2
+    9  or above = AVX512F
+*/
 enum LikelihoodKernel {
-	LK_EIGEN, LK_EIGEN_SSE
+	LK_386, LK_SSE, LK_SSE2, LK_SSE3, LK_SSSE3, LK_SSE41, LK_SSE42, LK_AVX, LK_AVX_FMA, LK_AVX512
 };
 
 enum LhMemSave {
@@ -1450,9 +1462,6 @@ public:
      */
     LikelihoodKernel SSE;
 
-    /** TRUE to not use AVX even available in CPU, default: FALSE */
-    int lk_no_avx;
-
     /** TRUE for safe numerical scaling (per category; used for large trees), default: FALSE */
     bool lk_safe_scaling;
 
@@ -1807,6 +1816,9 @@ public:
 	 */
 	bool print_bootaln;
 
+    /** TRUE to print bootstrapped site frequency for e.g. PMSF */
+    bool print_boot_site_freq;
+
 	/** true to print sub alignments of super alignment, default: false */
 	bool print_subaln;
 
@@ -1961,6 +1973,9 @@ void outWarning(const char *warn);
 void outWarning(string warn);
 
 
+/** safe version of std::getline to deal with files from different platforms */ 
+std::istream& safeGetline(std::istream& is, std::string& t);
+
 /*--------------------------------------------------------------*/
 /*--------------------------------------------------------------*/
 
@@ -2317,11 +2332,11 @@ int random_int(int n, int *rstream = NULL);
 double random_double(int *rstream = NULL);
 
 template <class T>
-void my_random_shuffle (T first, T last)
+void my_random_shuffle (T first, T last, int *rstream = NULL)
 {
 	int n = last - first;
 	for (int i=n-1; i>0; --i) {
-		swap (first[i],first[random_int(i+1)]);
+		swap (first[i],first[random_int(i+1, rstream)]);
 	}
 }
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/iqtree.git



More information about the debian-med-commit mailing list