[med-svn] [iqtree] 02/08: Imported Upstream version 1.3.11+dfsg

Mon Dec 14 08:24:13 UTC 2015

This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository iqtree.

commit 073d744f273f1659db73f666d18d68b5fbdffba2
Author: Andreas Tille <tille at debian.org>
Date:   Mon Dec 14 08:43:31 2015 +0100

    Imported Upstream version 1.3.11+dfsg
---
 CMakeLists.txt                              |  125 +-
 FindEigen3.cmake                            |   81 +
 LICENSE                                     |  340 ++
 README.md                                   |    6 +
 alignment.cpp                               |   20 +-
 alignmentpairwise.cpp                       |    8 +-
 bionj.h                                     |   10 +-
 eigendecomposition.cpp                      |   19 +-
 example/example.nex                         |    9 +
 example/example.phy                         |   46 +
 {model => example}/models.nex               |    0
 examples/example.phy                        |   46 +
 gss.cpp                                     |    2 +-
 gzstream.h                                  |    3 +-
 iqtree.cpp                                  |  662 +-
 iqtree.h                                    |   29 +-
 iqtree_config.h.in                          |    4 +-
 mexttree.cpp                                |   22 +
 mexttree.h                                  |    9 +
 model/modelcodon.cpp                        |   68 +-
 model/modelcodon.h                          |    3 +-
 model/modeldna.cpp                          |   56 +-
 model/modeldna.h                            |    3 +-
 model/modelfactory.cpp                      |   53 +-
 model/modelfactory.h                        |    3 +-
 model/modelgtr.cpp                          |  160 +-
 model/modelgtr.h                            |   19 +-
 model/modelmixture.cpp                      |  279 +-
 model/modelmixture.h                        |   35 +-
 model/modelprotein.cpp                      |    4 +-
 model/modelset.cpp                          |    6 +-
 model/modelset.h                            |    3 +-
 model/modelsubst.h                          |   13 +-
 model/partitionmodel.cpp                    |   41 +-
 model/partitionmodel.h                      |   13 +
 model/ratefree.cpp                          |  135 +-
 model/ratefree.h                            |   12 +-
 model/ratefreeinvar.cpp                     |    8 +-
 model/ratefreeinvar.h                       |    3 +-
 model/rategamma.cpp                         |   49 +-
 model/rategamma.h                           |   14 +-
 model/rategammainvar.cpp                    |   40 +-
 model/rategammainvar.h                      |    3 +-
 model/rateheterogeneity.h                   |   19 +-
 model/rateinvar.cpp                         |   12 +-
 model/rateinvar.h                           |   21 +-
 model/ratekategory.cpp                      |   22 +-
 model/ratekategory.h                        |    3 +-
 msetsblock.cpp                              |    5 +
 mtree.cpp                                   |  103 +-
 mtree.h                                     |   20 +
 mtreeset.cpp                                |   29 +
 mtreeset.h                                  |    2 +
 ncl/nxstoken.h                              |    6 +-
 ngs.cpp                                     |    7 +-
 ngs.h                                       |    3 +-
 optimization.cpp                            |    5 +-
 optimization.h                              |    2 +-
 pda.cpp                                     |   21 +
 pdnetwork.cpp                               |    4 +-
 phyloanalysis.cpp                           |  164 +-
 phylokernel.h                               |    6 +-
 phylokernelmixrate.h                        |    2 +
 phylokernelmixture.h                        |    2 +
 phylolib.h                                  |   26 -
 phylonode.cpp                               |   10 +-
 phylosupertree.cpp                          |   27 +-
 phylosupertree.h                            |    4 +-
 phylosupertreeplen.cpp                      |  351 +-
 phylosupertreeplen.h                        |   43 +-
 phylotesting.cpp                            |  299 +-
 phylotesting.h                              |    4 +-
 phylotree.cpp                               |  556 +-
 phylotree.h                                 |   51 +-
 phylotreepars.cpp                           |    2 +
 phylotreesse.cpp                            |  103 +-
 pllnni.cpp                                  |   61 +-
 pllnni.h                                    |    6 +-
 pllrepo/AUTHORS                             |    0
 pllrepo/COPYING                             |  674 ---
 pllrepo/ChangeLog                           |    0
 pllrepo/Doxyfile                            | 2299 -------
 pllrepo/INSTALL                             |  370 --
 pllrepo/Makefile.am                         |    7 -
 pllrepo/NEWS                                |    0
 pllrepo/README                              |    0
 pllrepo/configure.ac                        |  123 -
 pllrepo/sources.am                          |    2 -
 pllrepo/src/CMakeLists.txt                  |   67 -
 pllrepo/src/Makefile.ALL                    |   54 -
 pllrepo/src/Makefile.ARM                    |   51 -
 pllrepo/src/Makefile.AVX                    |   60 -
 pllrepo/src/Makefile.AVX-MPI                |   59 -
 pllrepo/src/Makefile.AVX-PTHREADS           |   61 -
 pllrepo/src/Makefile.AVX.clang              |   57 -
 pllrepo/src/Makefile.AVX.shared             |   68 -
 pllrepo/src/Makefile.MIC-PTHREADS           |   62 -
 pllrepo/src/Makefile.SSE3                   |   52 -
 pllrepo/src/Makefile.SSE3-MPI               |   50 -
 pllrepo/src/Makefile.SSE3-PTHREADS          |   52 -
 pllrepo/src/Makefile.am                     |   53 -
 pllrepo/src/alignment.c                     |  754 ---
 pllrepo/src/avxLikelihood.c                 | 4111 -------------
 pllrepo/src/bipartitionList.c               |  434 --
 pllrepo/src/cycle.h                         |  516 --
 pllrepo/src/errcodes.h                      |   69 -
 pllrepo/src/evaluateGenericSpecial.c        | 3321 ----------
 pllrepo/src/evaluatePartialGenericSpecial.c | 1378 -----
 pllrepo/src/fastDNAparsimony.c              | 1941 ------
 pllrepo/src/genericParallelization.c        | 2283 -------
 pllrepo/src/genericParallelization.h        |  127 -
 pllrepo/src/globalVariables.h               |  170 -
 pllrepo/src/hardware.c                      |  165 -
 pllrepo/src/hardware.h                      |   48 -
 pllrepo/src/hash.c                          |  219 -
 pllrepo/src/hash.h                          |   50 -
 pllrepo/src/lexer.c                         |  299 -
 pllrepo/src/lexer.h                         |   88 -
 pllrepo/src/makenewzGenericSpecial.c        | 3145 ----------
 pllrepo/src/mem_alloc.c                     |  228 -
 pllrepo/src/mem_alloc.h                     |   70 -
 pllrepo/src/mic_native.h                    |   56 -
 pllrepo/src/mic_native_aa.c                 | 1254 ----
 pllrepo/src/mic_native_dna.c                |  676 ---
 pllrepo/src/models.c                        | 4377 --------------
 pllrepo/src/newick.c                        |  583 --
 pllrepo/src/newick.h                        |   61 -
 pllrepo/src/newviewGenericSpecial.c         | 8736 ---------------------------
 pllrepo/src/optimizeModel.c                 | 3145 ----------
 pllrepo/src/parsePartition.c                |  388 --
 pllrepo/src/parsePartition.h                |   51 -
 pllrepo/src/parsimony.c                     |  865 ---
 pllrepo/src/pll.h                           | 1692 ------
 pllrepo/src/pllInternal.h                   |  313 -
 pllrepo/src/pthread.h                       | 1368 -----
 pllrepo/src/queue.c                         |   96 -
 pllrepo/src/queue.h                         |   48 -
 pllrepo/src/randomTree.c                    |  177 -
 pllrepo/src/recom.c                         |  689 ---
 pllrepo/src/restartHashTable.c              |  357 --
 pllrepo/src/sched.h                         |  183 -
 pllrepo/src/searchAlgo.c                    | 3310 ----------
 pllrepo/src/semaphore.h                     |  169 -
 pllrepo/src/ssort.c                         |  121 -
 pllrepo/src/stack.c                         |   85 -
 pllrepo/src/stack.h                         |   48 -
 pllrepo/src/topologies.c                    |  778 ---
 pllrepo/src/trash.c                         |  129 -
 pllrepo/src/treeIO.c                        |  236 -
 pllrepo/src/treeIO.h                        |   23 -
 pllrepo/src/utils.c                         | 3734 ------------
 stoprule.cpp                                |    4 +-
 test_scripts/README                         |   19 +
 test_scripts/compile.sh                     |  120 +
 test_scripts/gen_test_standard.py           |   92 +
 test_scripts/gen_test_webserver.py          |   85 +
 test_scripts/generate_test_cmds.py          |   97 +
 test_scripts/jobmanager.py                  |  102 +
 test_scripts/submit_jobs.sh                 |   45 +
 test_scripts/submitjob.sh                   |    2 +
 test_scripts/test_configs.txt               |   27 +
 test_scripts/test_data/d59_8.nex            |   13 +
 test_scripts/test_data/d59_8.phy            |   60 +
 test_scripts/test_data/example.nex          |    5 +
 test_scripts/test_data/example.phy          |   46 +
 test_scripts/test_data/prot_M126_27_269.phy |   28 +
 tools.cpp                                   |  193 +-
 tools.h                                     |   68 +-
 vectorclass/changelog.txt                   |   25 +-
 vectorclass/special.zip                     |  Bin 34472 -> 34477 bytes
 vectorclass/vectorclass.h                   |    6 +-
 vectorclass/vectorclass.pdf                 |  Bin 466946 -> 476370 bytes
 vectorclass/vectorf128.h                    |   32 +-
 vectorclass/vectorf256.h                    |   30 +-
 vectorclass/vectorf256e.h                   |    8 +-
 vectorclass/vectorf512.h                    |   29 +-
 vectorclass/vectori128.h                    |  103 +-
 vectorclass/vectori256.h                    |  529 +-
 vectorclass/vectori256e.h                   |   68 +-
 vectorclass/vectori512.h                    |    4 +-
 vectorclass/vectormath_exp.h                |   94 +-
 vectorclass/vectormath_hyp.h                |  120 +-
 vectorclass/vectormath_trig.h               |   86 +-
 183 files changed, 4516 insertions(+), 59474 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ccfa868..5d93cb1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,33 +1,41 @@
 ##################################################################
 # IQ-TREE cmake build definition
-# Copyright (c) 2012-2014 Bui Quang Minh, Lam Tung Nguyen
+# Copyright (c) 2012-2015 Bui Quang Minh, Lam Tung Nguyen
 ##################################################################
 
 # Windows example usages:
-# cmake -G "Visual Studio 12" <source_dir>			(32-bit SSE3 version, compiled with MSVC)
-# cmake -G "Visual Studio 12 Win64" <source_dir>	(64-bit SSE3 version, compiled with MSVC)
-# cmake -G "Visual Studio 12 Win64" -T "Intel C++ Compiler XE 15.0" <source_dir>		(64-bit SSE3 version, compiled with ICC)
-# cmake -G "Visual Studio 12 Win64" -T "Intel C++ Compiler XE 15.0" -DIQTREE_FLAGS="avx" <source_dir>		(64-bit AVX version, compiled with ICC)
-# cmake -G "Visual Studio 12 Win64" -T "Intel C++ Compiler XE 15.0" -DIQTREE_FLAGS="omp" <source_dir>		(64-bit SSE3+OpenMP version, compiled with ICC)
-# cmake -G "Visual Studio 12 Win64" -T "Intel C++ Compiler XE 15.0" -DIQTREE_FLAGS="avx omp" <source_dir>	(64-bit AVX+OpenMP version, compiled with ICC)
-#
+#------------------------
+# cmake -G "Visual Studio 12" <source_dir>			(32-bit version, compiled with MSVC)
+# cmake -G "Visual Studio 12 Win64" <source_dir>	(64-bit version, compiled with MSVC)
+# cmake -G "Visual Studio 12 Win64" -T "Intel C++ Compiler XE 15.0" <source_dir>		(64-bit version, compiled with ICC)
+# cmake -G "MinGW Makefiles" <source_dir> (TDM-GCC)
+# cmake -G "Unix Makefiles" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_MAKE_PROGRAM=mingw32-make <source_dir> (TDM-GCC and clang)
+
 # Linux example usages:
-# cmake <source_dir>						   (SSE3 version)
-# cmake -DIQTREE_FLAGS="avx" <source_dir>      (AVX version)
+#----------------------
+# cmake <source_dir>						   (sequential version)
 # cmake -DIQTREE_FLAGS="omp" <source_dir>      (OpenMP version)
-# cmake -DIQTREE_FLAGS="omp avx" <source_dir>  (AVX+OpenMP version)
-# cmake -DIQTREE_FLAGS="m32" <source_dir>      (32-bit SSE3 version)
-# cmake -DIQTREE_FLAGS="m32 omp" <source_dir>  (32-bit SSE3+OpenMP version)
+# cmake -DIQTREE_FLAGS="m32" <source_dir>      (32-bit sequential version)
+# cmake -DIQTREE_FLAGS="m32 omp" <source_dir>  (32-bit OpenMP version)
 #
+
 # Mac OSX example usages:
-# cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ <source_dir>							(SSE3 version)
-# cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DIQTREE_FLAGS="avx" <source_dir>		(AVX version)
-# To build OpenMP version one needs to download and compile Clang-OpenMP. 
-# Then assuming clang2 and clang2++ are the newly built compilers, then:
-# cmake -DCMAKE_C_COMPILER=clang2 -DCMAKE_CXX_COMPILER=clang2++ -DIQTREE_FLAGS="omp" <source_dir>		(SSE3+OpenMP version)
-# cmake -DCMAKE_C_COMPILER=clang2 -DCMAKE_CXX_COMPILER=clang2++ -DIQTREE_FLAGS="omp avx" <source_dir>	(AVX+OpenMP version)
+#------------------------
+# cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ <source_dir>							(sequential version)
 #
+# To build OpenMP version one needs to download Clang version 3.7 or later (as of November 2015) 
+# Then assuming clang3.7 and clang++3.7 are the newly built compilers, then:
+# cmake -DCMAKE_C_COMPILER=clang3.7 -DCMAKE_CXX_COMPILER=clang++3.7 -DIQTREE_FLAGS="omp" <source_dir>		(OpenMP version)
+#
+
 
+# Compile OpenMP version: cmake -DIQTREE_FLAGS=omp ....
+# Compile 32-bit version: cmake -DIQTREE_FLAGS=m32 ....
+# Compile static version: cmake -DIQTREE_FLAGS=static ....
+# Compile static OpenMP version: cmake -DIQTREE_FLAGS="omp static" ....
+
+#NOTE: Static linking with clang windows: make a symlink libgcc_eh.a to libgcc.a (administrator required)
+# C:\TDM-GCC-64\lib\gcc\x86_64-w64-mingw32\5.1.0>mklink libgcc_eh.a libgcc.a
 
 cmake_minimum_required(VERSION 2.8)
 set(CMAKE_LEGACY_CYGWIN_WIN32 0)
@@ -37,7 +45,7 @@ add_definitions(-DIQ_TREE)
 # The version number.
 set (iqtree_VERSION_MAJOR 1)
 set (iqtree_VERSION_MINOR 3)
-set (iqtree_VERSION_PATCH 8) 
+set (iqtree_VERSION_PATCH 11) 
 
 set(BUILD_SHARED_LIBS OFF)
 
@@ -195,7 +203,8 @@ if (IQTREE_FLAGS MATCHES "omp")
 		set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
   		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
   	elseif (CLANG) 
-  		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
+		set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
+  		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp=libomp")
   	endif()
 else()
 	message("Parallel      : None")
@@ -317,7 +326,9 @@ configure_file (
 # add the binary tree to the search path for include files
 # so that we will find iqtree_config.h
 include_directories("${PROJECT_BINARY_DIR}")
-include_directories("${PROJECT_BINARY_DIR}/zlib-1.2.7")
+
+#zlib will be detected for appearance
+#include_directories("${PROJECT_BINARY_DIR}/zlib-1.2.7")
 
 
 ##################################################################
@@ -328,8 +339,13 @@ add_subdirectory(ncl)
 add_subdirectory(lbfgsb)
 add_subdirectory(whtest)
 add_subdirectory(sprng)
-add_subdirectory(zlib-1.2.7)
+#add_subdirectory(zlib-1.2.7)
 add_subdirectory(vectorclass)
+LIST(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}")
+find_package(Eigen3)
+if(EIGEN3_FOUND)
+  include_directories(${EIGEN3_INCLUDE_DIR})
+endif(EIGEN3_FOUND)
 add_subdirectory(model)
 
 ##################################################################
@@ -347,7 +363,7 @@ circularnetwork.cpp
 eigendecomposition.cpp
 greedy.cpp
 gss.cpp
-guidedbootstrap.cpp
+#guidedbootstrap.cpp
 gurobiwrapper.cpp
 gzstream.cpp
 hashsplitset.cpp
@@ -401,8 +417,23 @@ checkpoint.cpp
 upperbounds.cpp
 )
 
+if (NOT IQTREE_FLAGS MATCHES "nozlib")
+    find_package(ZLIB)
+endif()
+
+if(ZLIB_FOUND)
+  message ("Using system zlib")
+  include_directories(${ZLIB_INCLUDE_DIRS})
+  target_link_libraries(iqtree ${ZLIB_LIBRARIES})
+else(ZLIB_FOUND)
+  message ("Using own zlib-1.2.7")
+  include_directories("${PROJECT_BINARY_DIR}/zlib-1.2.7" "${PROJECT_SOURCE_DIR}/zlib-1.2.7")
+  add_subdirectory(zlib-1.2.7)
+  target_link_libraries(iqtree zlibstatic)
+endif(ZLIB_FOUND)
+
 if (NOT IQTREE_FLAGS MATCHES "avx" AND NOT IQTREE_FLAGS MATCHES "fma")
-	set_target_properties(iqtree pll ncl lbfgsb whtest zlibstatic sprng vectorclass model PROPERTIES COMPILE_FLAGS "${SSE_FLAGS}")
+	set_target_properties(iqtree pll ncl lbfgsb whtest sprng vectorclass model PROPERTIES COMPILE_FLAGS "${SSE_FLAGS}")
 	if (NOT BINARY32 AND NOT IQTREE_FLAGS MATCHES "novx")
 		set_target_properties(avxkernel pllavx PROPERTIES COMPILE_FLAGS "${AVX_FLAGS}")
 	endif()
@@ -419,6 +450,10 @@ else()
 	set(PLATFORM_LIB "m")
 endif()
 
+if(CLANG AND WIN32 AND IQTREE_FLAGS MATCHES "static")
+    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread -Wl,--allow-multiple-definition")
+endif()
+
 set(THREAD_LIB "")
 if (IQTREE_FLAGS MATCHES "omp") 
 	link_directories(${PROJECT_SOURCE_DIR}/lib)
@@ -431,14 +466,21 @@ if (IQTREE_FLAGS MATCHES "omp")
 			set(THREAD_LIB "pthreadVC2")
 		endif()
 	elseif(CLANG AND APPLE)
-		set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${PROJECT_SOURCE_DIR}/lib")
+		set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${PROJECT_SOURCE_DIR}/libmac")
+	elseif(CLANG AND WIN32)
+        if (BINARY32) 
+            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${PROJECT_SOURCE_DIR}/lib32 libiomp5md.dll")
+        else()
+            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${PROJECT_SOURCE_DIR}/lib libiomp5md.dll")
+        endif()
+#        set(THREAD_LIB "ompstatic")
 	endif()
 endif()
 
 if (BINARY32 OR IQTREE_FLAGS MATCHES "novx")
-    target_link_libraries(iqtree pll ncl lbfgsb whtest zlibstatic sprng vectorclass model ${PLATFORM_LIB} ${THREAD_LIB})	
+    target_link_libraries(iqtree pll ncl lbfgsb whtest sprng vectorclass model ${PLATFORM_LIB} ${STD_LIB} ${THREAD_LIB})	
 else()
-    target_link_libraries(iqtree pll pllavx ncl lbfgsb whtest zlibstatic sprng vectorclass model avxkernel ${PLATFORM_LIB} ${THREAD_LIB})	
+    target_link_libraries(iqtree pll pllavx ncl lbfgsb whtest sprng vectorclass model avxkernel ${PLATFORM_LIB} ${STD_LIB} ${THREAD_LIB})	
 endif()
 
 ##################################################################
@@ -473,22 +515,33 @@ endif()
 # add the install targets
 ##############################################################
 install (TARGETS iqtree DESTINATION bin)
-install (FILES "${PROJECT_SOURCE_DIR}/model/models.nex" DESTINATION bin)
-install (FILES "${PROJECT_SOURCE_DIR}/examples/example.phy" DESTINATION .)
+install (FILES "${PROJECT_SOURCE_DIR}/example/models.nex" DESTINATION .)
+install (FILES "${PROJECT_SOURCE_DIR}/example/example.phy" DESTINATION .)
+install (FILES "${PROJECT_SOURCE_DIR}/example/example.nex" DESTINATION .)
 install (FILES "${PROJECT_SOURCE_DIR}/Documents/iqtree-manual-1.0.pdf" DESTINATION .)
 
 if (WIN32)
 	install (FILES "${BINARY_DIR}/iqtree${EXE_SUFFIX}-click.exe" DESTINATION bin)
 	if (EXE_SUFFIX MATCHES "omp" AND MSVC)
-if (BINARY32)
-		install(FILES  "${PROJECT_SOURCE_DIR}/lib32/pthreadVC2.dll" DESTINATION bin)
-else()
-		install(FILES  "${PROJECT_SOURCE_DIR}/lib/pthreadVC2.dll" DESTINATION bin)
-endif()
-		install(FILES  "${PROJECT_SOURCE_DIR}/lib/libiomp5md.dll" DESTINATION bin)
+        if (BINARY32)
+            install(FILES  "${PROJECT_SOURCE_DIR}/lib32/pthreadVC2.dll" DESTINATION bin)
+            install(FILES  "${PROJECT_SOURCE_DIR}/lib32/libiomp5md.dll" DESTINATION bin)
+        else()
+            install(FILES  "${PROJECT_SOURCE_DIR}/lib/pthreadVC2.dll" DESTINATION bin)
+            install(FILES  "${PROJECT_SOURCE_DIR}/lib/libiomp5md.dll" DESTINATION bin)
+        endif()
 #		install(FILES  "${PROJECT_SOURCE_DIR}/lib/pthreadGC2.dll" DESTINATION bin)
 #		install(FILES  "${PROJECT_SOURCE_DIR}/lib/pthreadGC2_64.dll" DESTINATION bin)
 	endif()
+
+	if (EXE_SUFFIX MATCHES "omp" AND CLANG)
+        if (BINARY32)
+            install(FILES  "${PROJECT_SOURCE_DIR}/lib32/libiomp5md.dll" DESTINATION bin)
+        else()
+            install(FILES  "${PROJECT_SOURCE_DIR}/lib/libiomp5md.dll" DESTINATION bin)
+        endif()
+	endif()
+
 endif()
 
 ##############################################################
diff --git a/FindEigen3.cmake b/FindEigen3.cmake
new file mode 100644
index 0000000..9c546a0
--- /dev/null
+++ b/FindEigen3.cmake
@@ -0,0 +1,81 @@
+# - Try to find Eigen3 lib
+#
+# This module supports requiring a minimum version, e.g. you can do
+#   find_package(Eigen3 3.1.2)
+# to require version 3.1.2 or newer of Eigen3.
+#
+# Once done this will define
+#
+#  EIGEN3_FOUND - system has eigen lib with correct version
+#  EIGEN3_INCLUDE_DIR - the eigen include directory
+#  EIGEN3_VERSION - eigen version
+
+# Copyright (c) 2006, 2007 Montel Laurent, <montel at kde.org>
+# Copyright (c) 2008, 2009 Gael Guennebaud, <g.gael at free.fr>
+# Copyright (c) 2009 Benoit Jacob <jacob.benoit.1 at gmail.com>
+# Redistribution and use is allowed according to the terms of the 2-clause BSD license.
+
+if(NOT Eigen3_FIND_VERSION)
+  if(NOT Eigen3_FIND_VERSION_MAJOR)
+    set(Eigen3_FIND_VERSION_MAJOR 2)
+  endif(NOT Eigen3_FIND_VERSION_MAJOR)
+  if(NOT Eigen3_FIND_VERSION_MINOR)
+    set(Eigen3_FIND_VERSION_MINOR 91)
+  endif(NOT Eigen3_FIND_VERSION_MINOR)
+  if(NOT Eigen3_FIND_VERSION_PATCH)
+    set(Eigen3_FIND_VERSION_PATCH 0)
+  endif(NOT Eigen3_FIND_VERSION_PATCH)
+
+  set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}")
+endif(NOT Eigen3_FIND_VERSION)
+
+macro(_eigen3_check_version)
+  file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header)
+
+  string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}")
+  set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}")
+  string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}")
+  set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}")
+  string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}")
+  set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}")
+
+  set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION})
+  if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
+    set(EIGEN3_VERSION_OK FALSE)
+  else(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
+    set(EIGEN3_VERSION_OK TRUE)
+  endif(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
+
+  if(NOT EIGEN3_VERSION_OK)
+
+    message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, "
+                   "but at least version ${Eigen3_FIND_VERSION} is required")
+  endif(NOT EIGEN3_VERSION_OK)
+endmacro(_eigen3_check_version)
+
+if (EIGEN3_INCLUDE_DIR)
+
+  # in cache already
+  _eigen3_check_version()
+  set(EIGEN3_FOUND ${EIGEN3_VERSION_OK})
+
+else (EIGEN3_INCLUDE_DIR)
+
+  find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library
+      PATHS
+      ${CMAKE_INSTALL_PREFIX}/include
+      ${KDE4_INCLUDE_DIR}
+      PATH_SUFFIXES eigen3 eigen
+    )
+
+  if(EIGEN3_INCLUDE_DIR)
+    _eigen3_check_version()
+  endif(EIGEN3_INCLUDE_DIR)
+
+  include(FindPackageHandleStandardArgs)
+  find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK)
+
+  mark_as_advanced(EIGEN3_INCLUDE_DIR)
+
+endif(EIGEN3_INCLUDE_DIR)
+
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..8cdb845
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,340 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    {description}
+    Copyright (C) {year}  {fullname}
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  {signature of Ty Coon}, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9f60b26
--- /dev/null
+++ b/README.md
@@ -0,0 +1,6 @@
+IQ-TREE
+-------
+
+Efficient phylogenetic software by maximum likelihood
+
+Please see our github wiki for more information: <https://github.com/Cibiv/IQ-TREE/wiki>
diff --git a/alignment.cpp b/alignment.cpp
index e90cc8d..aaa8f54 100644
--- a/alignment.cpp
+++ b/alignment.cpp
@@ -298,6 +298,12 @@ Alignment *Alignment::removeGappySeq() {
 		}
 	if (keep_seqs.size() == nseq)
 		return this;
+    // 2015-12-03: if resulting alignment has too few seqs, try to add some back
+    if (keep_seqs.size() < 3 && getNSeq() >= 3) {
+        for (i = 0; i < nseq && keep_seqs.size() < 3; i++)
+            if (isGapOnlySeq(i))
+                keep_seqs.push_back(i);
+    }
 	Alignment *aln = new Alignment;
 	aln->extractSubAlignment(this, keep_seqs, 0);
 	return aln;
@@ -405,6 +411,7 @@ void Alignment::buildSeqStates(bool add_unobs_const) {
 			has_state[at(site)[seq]] = true;
 		for (string::iterator it = unobs_const.begin(); it != unobs_const.end(); it++)
 			has_state[*it] = true;
+        seq_states[seq].clear();
 		for (int state = 0; state < STATE_UNKNOWN; state++)
 			if (has_state[state])
 				seq_states[seq].push_back(state);
@@ -825,7 +832,7 @@ SeqType Alignment::detectSequenceType(StrVector &sequences) {
 
     for (StrVector::iterator it = sequences.begin(); it != sequences.end(); it++)
         for (string::iterator i = it->begin(); i != it->end(); i++) {
-            if ((*i) != '?' && (*i) != '-' && (*i) != '.' && *i != 'N' && *i != 'X') num_ungap++;
+            if ((*i) != '?' && (*i) != '-' && (*i) != '.' && *i != 'N' && *i != 'X' &&  (*i) != '~') num_ungap++;
             if ((*i) == 'A' || (*i) == 'C' || (*i) == 'G' || (*i) == 'T' || (*i) == 'U')
                 num_nuc++;
             if ((*i) == '0' || (*i) == '1')
@@ -849,6 +856,7 @@ void Alignment::buildStateMap(char *map, SeqType seq_type) {
     assert(STATE_UNKNOWN < 126);
     map[(unsigned char)'?'] = STATE_UNKNOWN;
     map[(unsigned char)'-'] = STATE_UNKNOWN;
+    map[(unsigned char)'~'] = STATE_UNKNOWN;
     map[(unsigned char)'.'] = STATE_UNKNOWN;
     int len;
     switch (seq_type) {
@@ -911,7 +919,7 @@ void Alignment::buildStateMap(char *map, SeqType seq_type) {
 	@return state ID
 */
 char Alignment::convertState(char state, SeqType seq_type) {
-    if (state == '?' || state == '-' || state == '.')
+    if (state == '?' || state == '-' || state == '.' || state == '~')
         return STATE_UNKNOWN;
 
     char *loc;
@@ -1425,7 +1433,7 @@ int Alignment::readPhylip(char *filename, char *sequence_type) {
             } else
                 for (string::iterator it = line.begin(); it != line.end(); it++) {
                     if ((*it) <= ' ') continue;
-                    if (isalnum(*it) || (*it) == '-' || (*it) == '?'|| (*it) == '.' || (*it) == '*')
+                    if (isalnum(*it) || (*it) == '-' || (*it) == '?'|| (*it) == '.' || (*it) == '*' || (*it) == '~')
                         sequences[seq_id].append(1, toupper(*it));
                     else {
                         err_str << "Line " << line_num <<": Unrecognized character " << *it;
@@ -1483,7 +1491,7 @@ int Alignment::readFasta(char *filename, char *sequence_type) {
         if (sequences.empty()) throw "First line must begin with '>' to define sequence name";
         for (string::iterator it = line.begin(); it != line.end(); it++) {
             if ((*it) <= ' ') continue;
-            if (isalnum(*it) || (*it) == '-' || (*it) == '?'|| (*it) == '.' || (*it) == '*')
+            if (isalnum(*it) || (*it) == '-' || (*it) == '?'|| (*it) == '.' || (*it) == '*' || (*it) == '~')
                 sequences.back().append(1, toupper(*it));
             else {
                 err_str << "Line " << line_num <<": Unrecognized character " << *it;
@@ -1588,7 +1596,7 @@ int Alignment::readClustal(char *filename, char *sequence_type) {
         // read sequence contents
         for (string::iterator it = line.begin(); it != line.end(); it++) {
             if ((*it) <= ' ') continue;
-            if (isalnum(*it) || (*it) == '-' || (*it) == '?'|| (*it) == '.' || (*it) == '*')
+            if (isalnum(*it) || (*it) == '-' || (*it) == '?'|| (*it) == '.' || (*it) == '*' || (*it) == '~')
                 sequences[seq_count].append(1, toupper(*it));
             else {
                 throw "Line " +convertIntToString(line_num) + ": Unrecognized character " + *it;
@@ -2564,7 +2572,7 @@ void Alignment::printDist(ostream &out, double *dist_mat) {
     if (max_len < 10) max_len = 10;
     out << nseqs << endl;
     int pos = 0;
-    out.precision(6);
+    out.precision(max((int)ceil(-log10(Params::getInstance().min_branch_length))+1, 6));
     out << fixed;
     for (int seq1 = 0; seq1 < nseqs; seq1 ++)  {
         out.width(max_len);
diff --git a/alignmentpairwise.cpp b/alignmentpairwise.cpp
index efc3c80..38b30a2 100644
--- a/alignmentpairwise.cpp
+++ b/alignmentpairwise.cpp
@@ -141,7 +141,7 @@ double AlignmentPairwise::computeFunction(double value) {
         for (cat = 0; cat < ncat; cat++) {
             tree->getModelFactory()->computeTransMatrix(value*site_rate->getRate(cat), trans_mat);
             double *pair_pos = pair_freq + cat*trans_size;
-            for (i = 0; i < trans_size; i++) if (pair_pos[i] > 1e-6) {
+            for (i = 0; i < trans_size; i++) if (pair_pos[i] > Params::getInstance().min_branch_length) {
                     if (trans_mat[i] <= 0) throw "Negative transition probability";
                     lh -= pair_pos[i] * log(trans_mat[i]);
                 }
@@ -270,7 +270,7 @@ void AlignmentPairwise::computeFuncDerv(double value, double &df, double &ddf) {
             sum_derv2[i] += trans_derv2[i] * rate_sqr;
         }
     }
-    for (i = 0; i < trans_size; i++) if (pair_freq[i] > 1e-6) {
+    for (i = 0; i < trans_size; i++) if (pair_freq[i] > Params::getInstance().min_branch_length) {
 //            lh -= pair_freq[i] * log(sum_trans[i]);
             double d1 = sum_derv1[i] / sum_trans[i];
             df -= pair_freq[i] * d1;
@@ -298,9 +298,9 @@ double AlignmentPairwise::optimizeDist(double initial_dist, double &d2l) {
 
     double negative_lh, ferror;
     if (tree->optimize_by_newton) // Newton-Raphson method
-        dist = minimizeNewton(1e-6, dist, MAX_GENETIC_DIST, 1e-6, d2l);
+        dist = minimizeNewton(Params::getInstance().min_branch_length, dist, MAX_GENETIC_DIST, Params::getInstance().min_branch_length, d2l);
     else // Brent method
-        dist = minimizeOneDimen(1e-6, dist, MAX_GENETIC_DIST, 1e-6, &negative_lh, &ferror);
+        dist = minimizeOneDimen(Params::getInstance().min_branch_length, dist, MAX_GENETIC_DIST, Params::getInstance().min_branch_length, &negative_lh, &ferror);
 
     return dist;
 }
diff --git a/bionj.h b/bionj.h
index 1552550..dafb032 100644
--- a/bionj.h
+++ b/bionj.h
@@ -523,21 +523,21 @@ void Finish(float **delta, int n, POINTERS *trees, FILE *output)
   fprintf(output,":");
 /*   gcvt(length,PREC, str); */
 /*   fprintf(output,"%s,",str); */
-  fprintf(output,"%f,",length);
+  fprintf(output,"%10.8f,",length);
 
   length=Finish_branch_length(last[1],last[0],last[2],delta);
   Print_output(last[1],trees,output);
   fprintf(output,":");
 /*   gcvt(length,PREC, str); */
 /*   fprintf(output,"%s,",str); */
-  fprintf(output,"%f,",length);
+  fprintf(output,"%10.8f,",length);
 
   length=Finish_branch_length(last[2],last[1],last[0],delta);
   Print_output(last[2],trees,output);
   fprintf(output,":");
 /*   gcvt(length,PREC,str); */
 /*   fprintf(output,"%s",str); */
-  fprintf(output,"%f",length);
+  fprintf(output,"%10.8f",length);
   fprintf(output,");");
   fprintf(output,"\n");
 
@@ -738,7 +738,7 @@ int create(const char *inputFile, const char *outputFile) {
 	  strcpy(chain1,"");                     /* to the NEWSWICK format   */
 	  strcat(chain1,":");
 
-	  sprintf(chain1+strlen(chain1),"%f",la);
+	  sprintf(chain1+strlen(chain1),"%10.8f",la);
 /* 	  gcvt(la,PREC, chain2); */
 /* 	  strcat(chain1, chain2); */
 
@@ -749,7 +749,7 @@ int create(const char *inputFile, const char *outputFile) {
 	  strcpy(chain1,"");
 	  strcat(chain1,":");
 
-	  sprintf(chain1+strlen(chain1),"%f",lb);
+	  sprintf(chain1+strlen(chain1),"%10.8f",lb);
 /* 	  gcvt(lb, PREC, chain2); */
 /* 	  strcat(chain1, chain2); */
 	  strcat(chain1,")");
diff --git a/eigendecomposition.cpp b/eigendecomposition.cpp
index 4b61549..528e617 100644
--- a/eigendecomposition.cpp
+++ b/eigendecomposition.cpp
@@ -60,6 +60,14 @@ void EigenDecomposition::eigensystem(
 
 	/* get relative transition matrix and frequencies */
 	memcpy(forg, state_freq, num_state * sizeof(double));
+    // BQM 2015-09-07: normalize state frequencies to 1
+    double sum = 0.0;
+    for (i = 0; i < num_state; i++) 
+        sum += forg[i];
+    sum = 1.0/sum;
+    for (i = 0; i < num_state; i++) 
+        forg[i] *= sum;
+
 	for (i = 0; i < num_state; i++)
 		memcpy(a[i], rate_params[i], num_state * sizeof(double)); 
 
@@ -178,6 +186,15 @@ void EigenDecomposition::eigensystem_sym(double **rate_params, double *state_fre
 	/* get relative transition matrix and frequencies */
 	memcpy(forg, state_freq, num_state * sizeof(double));
     
+    // BQM 2015-09-07: normalize state frequencies to 1
+    double sum = 0.0;
+    for (i = 0; i < num_state; i++) 
+        sum += forg[i];
+    sum = 1.0/sum;
+    for (i = 0; i < num_state; i++) 
+        forg[i] *= sum;
+
+    
 	for (i = 0; i < num_state; i++)
 		memcpy(a[i], rate_params[i], num_state * sizeof(double)); 
 
@@ -257,7 +274,7 @@ void EigenDecomposition::eigensystem_sym(double **rate_params, double *state_fre
 //			for (j = 0; j < num_state; j++) cout << rate_params[i][j] << " ";
 //			cout << endl;
 //		}
-		cout << "State frequencies: " << endl;
+		cout << "State frequencies (might be un-normalized): " << endl;
 		for (i = 0; i < num_state; i++) cout << state_freq[i] << " ";
 		cout << endl;
 	}
diff --git a/example/example.nex b/example/example.nex
new file mode 100644
index 0000000..620068c
--- /dev/null
+++ b/example/example.nex
@@ -0,0 +1,9 @@
+#nexus
+
+begin sets;
+	charset part1 = 1-99\3 2-99\3;
+	charset part2 = 3-99\3;
+	charset part3 = 100-384;
+
+	charpartition mine = HKY:part1, GTR+G:part2, GTR+G: part3;
+end;
diff --git a/example/example.phy b/example/example.phy
new file mode 100644
index 0000000..8637b06
--- /dev/null
+++ b/example/example.phy
@@ -0,0 +1,46 @@
+ 44 384 
+FL-1-103     atgcgcatcacccaaggc---------------------accttctccttcctgcccgacctcacggcggcccaggtcaaggcccagatccagtatgcgctggaccagaactgggcggtctcggtggagtacacggacgatccc------------------------------------------------------catccccggaacacctattgggagatgtggggcctgcccatgttcgacctgcgcgatgccgccggcgtctatggcgaggtcgaggcctgccgcaccgcccatcccggcaagtatgtgcgggtgaacgccttcgactccaatcgcgggtgggagacggtgcgcctctccttcatcgtccagcgtccg
+OSH-1-103    atgcgcatcacccaaggc---------------------tgcttctcgttcctgcccgacctgaccgacgagcagatctcggcgcaggtggactattgcctcggccgcggctgggccgtgagcctcgaacataccgacgacccg------------------------------------------------------catccccggaacacctactgggaaatgtggggcatgccgatgttcgacctgcgcgaccccaagggcgtgatgatcgagctggacgagtgccgcaaggcctggcccggccgctacatccgcatcaatgccttcgattccacccgcggcttcgagacggtcacgatgagcttcatcgtcaaccgcccc
+CEU-1-103    atgcgcatcactcaaggc---------------------actttttccttcctgcccgaactgaccgacgagcagatcaccaaacagctcgaatactgcctgaaccagggctgggcggtcggcctcgaatacaccgacgacccg------------------------------------------------------cacccgcgcaacacgtactgggagatgttcgggctgccgatgttcgacctgcgcgatgccgccggcatcctgatggaaatcaacaacgcgcggaacaccttccccaaccactacatccgcgtcacggccttcgattcgacgcatacggtggagtcggtggtgatgtcgttcatcgtcaatcgtccc
+TH-1-103     atgagacttacacaaggc---------------------gcattttcgttcttacctgacttaacagatgagcaaatcgtaaaacaaattcaatatgctatcagcaaaaactgggctttaaacgttgaatggacagatgatccg------------------------------------------------------caccctcgcaacgcatactgggatttatggggattaccattatttggtattaaagatccagcggctgtaatgtttgaaatcaatgcttgccgtaaagctaaaccagcttgttacgtaaaagtaaatgcgtttgataactcacgtggtgtagaaagctgctgcttatcttttatcgttcaacgtcct
+CAa1-103     atgaaactaacacaagga---------------------gctttctcatttcttcctgacttaactgatgcgcaagtaactaagcaaatccagtacgctttaaataagagttgggctatttcgattgaatatactgatgatccg------------------------------------------------------cacccacgtaacagttactgggagatgtggggccttcctctattcgatgttaaggatccagctgcgattcttttcgaaatcaacatggctcgtaaggctaagcctaactactaccttaaaatagcttgttttgataacacacgtggtatcgaaagttgtgtactttctttcattgtacaacgtcct
+CAb1-103     gtgagagttacacaagga---------------------acattttcttttctaccagacctgacaaatgatcaaatcagaaaacaaattcaatatgccataaataaaggatgggcattgagtgtagaatatacagatgaccct------------------------------------------------------cacccacggaattcttactgggaaatgtggggactgcctttatttgatgtcaaagaccctgcggcaattatgtttgaagttgaagcttgtcgaaaagagaaaagcaactattatattaagctattagcttttgattcaaccaaaggagttgaaagtacagcaatgtcctttatggtcaataggcct
+SI-1-103     atgagagttacacaagga---------------------tgtttttcgtttttaccagatttaagtgatgatcaaattaaacaacaagtttcttacgctatgagcaaaggttgggcggttagtgtagaatggacagatgatcca------------------------------------------------------catccacgtaactcatattgggaattatggggtcttcctttatttgatgttaaagatccagctgcagttatgtatgaacttgctgaatgtagaaaagttaacccagaaggttatattaaaattaatgctttcgatgctagtattggtacagaaagttgtgtaatgtcttttattgtacaacgtcct
+LU-1-103     gtgagacttacacaagga---------------------gctttttcttatttaccagatttaactgatgcacaaatcatcaaacaaattgactactgcttaagcagaggttggtctgttggtgttgaatggactgatgatcca------------------------------------------------------cacccacgtaacgcttactgggaactatggggtcttccattatttgacgtaaaagattcttcagcaattttatacgaagttaatgaatgtcgtcgtttaaaccctgaaggttacattaaattagttgctttcaacgcagcacgtggtactgaaagtagtgcatctgcttttattgtacaacgtcca
+SU-1-103     gtgagaataactcaaggt---------------------accttttcttttttgccggacttgactgatgaacaaatcaaaaaacaaattgattatatgatatctaaaaaattagctataggtattgaatatactaacgacata------------------------------------------------------catcctagaaattcattttgggaaatgtggggattacctctatttgaggtcacagatccagctccagtattatttgaaattaatgcttgtcgtaaagcaaaaagtaatttctatatcaaggtagtaggattttcttctgaaagaggtatagaaagtacaataatttcatttattgtaaatagacca
+RP-56-175    atgcaggtgtggccaccagttggcaagaagaagtttgagaccctttcataccttccacccctcactgatgagcaattgcttaaggaagtagagtatcttctaaggaagggatgggttccatgtgttgaatttgagttggagaaa------------------ggatttgtccaccgtcagtacaacagttcaccaggatactatgatggacgttactggacaatgtggaggttgccattgtttggaaccactgatgctgctcaggtgttgaaggaagttgctgaatgtaaagcagaatacccagaagctttcatccgtatcatcggatttgacaacgttcgt------caagtgcaatgcattagtttcattgcaagcacaccc
+A-14-133     atgcaggtgtggcctccaattggaaagaagaagtttgagactctttcctatttgccaccattgacgagagatcaattgttgaaagaagttgaataccttctgaggaagggatgggttccatgcttggaatttgagttgctcaaa------------------ggatttgtgtacggtgagcacaacaagtcaccaagatactatgatggaagatactggacaatgtggaagcttcctatgtttggcaccactgatcctgctcaagtcgtgaaggaggttgatgaagttgttgccgcttaccccgaagctttcgttcgtgtcatcggtttcaacaacgttcgt------caagttcaatgcatcagtttcattgcacacacacca
+PR-57-176    atgcaggtgtggccaccacgtaatttgaagaagtttgagaccctatcataccttccaactctttccgaggagtcattgttgaaggagatcaactaccttctaatcaagggatgggttccttgccttgagttcgaagttggaccg------------------gcacatgtataccgtgagaacaacaagtcaccaggatactatgacggaaggtactggacaatgtggaagctacccatgttcggatgcactgacgcatcccaagttgcagctgaggtggtcgagtgcaagaacgcttaccctgatgcccacgtcagaatcattggattcgacaacaagcgt------caagtccagtgcatcagtttcattgcctacaaacct
+PY-61-180    atgcaggtgtggcctccactcggactgaagaagttcgagaccctctcttaccttcctcccctttcttccgagtccttggccaaggaagttgactacctcctccgcaagaactgggttccctgcttggaatttgagttggagact------------------ggattcgtgtaccgtgagaaccacaggtccccaggatactatgatggaaggtactggacaatgtggaagctgcccatgttcggatgcaccgactcttcccaggtgttgaaggagctggaagaggccaagaaggcttacccccagtccttcatccgtatcatcggattcgacaatgtccgt------caagtgcagtgcatcagtttcatcgcttacaagcct
+MGI-58-176   atgcaggtgtggccgccggagggcctgaagaagttcgagaccctctcctacctcccccctctctccgtcgaggacctcgccaaggaggtggactacctcctccgcaacgactgggttccctgcatcgagttctccaaggaa---------------------gggttcgtgtaccgcgagaaccacgcgtcgcccgggtactacgacgggcggtactggacgatgtggaagctgcccatgttcggctgcaccgacgccagccaggtgatcgccgaggtggaggaggccaagaaggcctaccccgagtacttcgtcagaatcatcggcttcgacaacaagcgc------caagtccagtgcatcagcttcatcgcctacaagccc
+SCR-58-177   tgcatggtgtggccaccactaggaatgaagaagtttgagactctgtcttacctgccccctctatccgaagagtcattgttgaaggaggtccaataccttctcaacaatggatgggttccctgcttggaattcgagcccactcac------------------ggatttgtgtaccgtgagcacggaaacacaccaggatactacgatggacgttactggacaatgtggaagttgcccatgttcggttgcactgacccatcccaggttgttgctgagctcgaggaggccaagaaggcttaccctgaggccttcatccgtatcataggattcgacaacgtgcgt------caagtccagtgtgtcagtttcatcgcctacaagccc
+SA-60-179    atgaaggtgtggccaccacttggattgaggaagttcgagactctttcttacctgcctgatatgagtaacgaacaattgtcaaaggaatgtgactaccttctcaggaatggatgggttccctgcgttgaattcgacatcggaagc------------------ggattcgtgtaccgtgagaaccacaggtcaccaggattctacgatggacgttactggaccatgtggaagctccctatgtttggctgcaccgactcatctcaggtgattcaggagattgaggaggctaagaaggaataccccgacgcattcatcagggttattggctttgacaacgtccgt------caagtccagtgcatcagtttcatcgcctacaagccc
+BR-60-179    atgcaggtatggccaccacgtgggaagaagttctacgagactctctcataccttccaccccttacaagggagcaattggccaaggaagttgaataccttcttcgcaagggatgggttccttgcttggaattcgagttggagcat------------------ggaaccgtgtaccgtgagtaccacagatcaccagggtactatgatggtcgttactggaccatgtggaagctgcccatgtttggttgcacagatgcagtgcaggtgttgcaggagcttgatgagatgattaaagcttacccagattgctatggtaggatcattggtttcgacaatgttcgc------caagtccagtgcattagtttccttgcctacaagcct
+CPL-58-177   atgcaggtgtggccaccaattaacaagaagaagtacgagactctctcatacctccctgatttgagccaagagcaattgcttagcgaaattgagtaccttttgaaaagtggatgggttccttgcttggaattcgaaactgagcgc------------------ggatttgtctaccgtgaacaccaccattcaccaggatactatgacggcaggtactggaccatgtggaagctacctatgttcggatgcactgatgccacccaagtgttggctgaggtggaagaggcgaagaaggcatacccacaggcctgggtccgtattattggattcgacaacgtgcgt------caagtgcagtgcatcagtttcattgcctacaagcca
+LTU-59-178   atgcaggtgtggccaccaattaacatgaagaaatacgagacattgtcataccttcctgacttgtccgatgagcaattgctcaaggaagttgagtaccttttgaaaaatggatgggttccttgcttggaattcgagactgagcac------------------ggatttgtgtaccgtgagcacaacagctcaccaggatactacgatggtagatactggaccatgtggaagttgcctatgtttgggtgcactgacggaacccaggtgttggctgaggttcaagaggccaagaatgcgtacccacaggcctggatccgtattatcggattcgacaacgttcgt------caagtgcagtgcatcagtttcattgcctacaagcca
+TSP-58-177   atgcaggtgtggcccccatatggcaagaagaagtacgagactctctcataccttcctgatttaaccgacgagcaattgctcaaggagattgagtaccttttgaacaagggatgggttccttgcttggaatttgagactgagcac------------------ggatttgtctaccgtgaataccacgcctcacctagatactatgatggaaggtactggaccatgtggaagttgcccatgtttgggtgcactgatgcaactcaggtgttgggtgagctccaagaggccaagaaggcttaccctaatgcatggatcagaatcatcggattcgacaacgtccgt------caagtgcaatgcatcagtttcattgcctacaagcca
+YBN-56-175   atgcaggtgtggccaccagttggcaagaagaagtttgagactctttcctacctgccagaccttgatgatgcacaattggcaaaggaagtagaataccttcttaggaagggatggattccttgcttggaattcgagttggagcac------------------ggtttcgtgtaccgtgagcacaacaggtcactaggatactacgatggacgctactggaccatgtggaagctgcctatgtttggttgcactgatgcttctcaggtgttgaaggagcttcaagaggctaagactgcataccccaacggcttcatccgtatcatcggattcgacaacgttcgc------caagtgcagtgcatcagcttcatcgcctacaagccc
+AN-56-175    atgaaggtgtggccaccacttggattgaagaagtacgagactctctcatacttaccaccactaactgaaactcagttggctaaggaagtcgactacttgctccgcaaaaaatgggttccttgtttggaattcgagttggagcac------------------ggttttgtctaccgtgagaacgccagatcccccggatactatgacggaagatactggacaatgtggaaattgcctatgttcggttgcaccgactcagcccaagtgatgaaggagcttgctgaatgcaagaaggagtacccccaggcctggatccgtatcatcggatttgacaatgttcgt------caagttcaatgtatcatgttcattgcttccaggcca
+HI-60-179    atgcaggtgtggcctcctcttgggaagaagaagttcgagacactctcatacctccccgatcttacacccgtacagttggctaaggaagtagattaccttcttcgctctaaatggattccttgcttggaattcgaattagaggag------------------ggattcgtgcaccgtaagtactcgagcttacccacgtactacgatggacgctactggaccatgtggaaactgcccatgtttgggtgcactgactcggctcaggtgttggaggagcttgagaattgcaagaaggaataccccaatgcattcattagaatcattgggttcgacaacgttcgt------caagtgcagtgcattagtttcattgcctacaagcct
+ANA-56-175   atgaaggtgtggccaccagttggaaagaagaagtttgagaccctctcttaccttcctgaccttaccgaagttgaattgggtaaggaagtcgactaccttctccgcaacaagtggattccttgtgttgaattcgagttggagcac------------------gggtttgtttaccgtgagcacggaagcacccccggatactacgatggccgttactggacaatgtggaagcttcccttgttcggatgcactgactctgctcaagtgttgaaggaagtccaagaatgcaaaacggagtaccctaacgctttcatcaggatcatcggattcgacaacaaccgt------caggtccagtgcatcagtttcatcgcctacaagcca
+ZE-48-166    atgcaggtgtggccggcctacggcaacaagaagttcgagacgctgtcgtacctgccgccgctgtcgacggacgacctgctgaagcaggtggactacctgctgcgcaacggctggataccctgcctcgagttcagcaaggtc---------------------ggcttcgtgtaccgcgagaactccacctccccgtgctactacgacggccgctactggaccatgtggaagctgcccatgttcggctgcaacgacgccacccaggtgtacaaggagctgcaggaggccatcaaatcctacccggacgccttccaccgcgtcatcggcttcgacaacatcaag------cagacgcagtgcgtcagcttcatcgcctacaagccc
+EAT-48-166   atgcaggtgtggccaattgagggcatcaagaagttcgagaccctgtcttacttgccacccctctccacggaggccctcttgaagcaggtcgactacttgatccgctccaagtgggtgccctgcctcgagttcagcaaggtt---------------------ggcttcgtcttccgtgagcacaacagctcccccgggtactacgacggtcgatactggacaatgtggaagctgcctatgttcgggtgcaccgacgccacacaggtgctcaacgaggtggaggaggttaagaaggagtaccctgatgcgtatgtccgcgtcatcggtttcgacaacatgcgc------caggtgcaatgcgtcagcttcattgccttcaggcca
+YSA-46-164   atgcaggtgtggccgattgagggcatcaagaagttcgagaccctctcctacctgccaccgctcaccgtggaggacctcctgaagcagatcgagtacctagctccgttccaagtggtgccctgcctcgagttcagcaaggtc---------------------ggatttgtctaccgtgagaaccacaagtcccctggatactacgacggcaggtactggaccatgtggaagctgcccatgttcgggtgcaccgacgccacccaggtcgtcaaggagctcgaggaggccaagaaggcgtaccctgatgcattcgtccgtatcatcggcttcgacaacgttagg------caggtgcagctcatcagcttcatcgcctacaacccg
+TH-52-170    atgcaggtgtggcctccattcggaaaccccaagtttgagactctgtcctacctccctacgctaaccgaggagcagctggtgaaggaggttgagtacttgttgaggaacaagtgggtgccttgtctagagtttgatctggaa---------------------ggatccatctcgaggaagtataataggagcccggggtactacgatgggagatactgggtgatgtggaagttgccgatgtttgggtgcacagaggcatctcaggtgataaacgaggtgagagagtgtgccaaggcataccccaaagccttcatccgtgtcattggctttgacaacgtccgc------caagtgcagtgcatctccttcatcgtccacaagccc
+LA-68-186    atgcaggtgtggcctccttacgcgaataaaaagtttgagactctgtcgtatctccctcgcttgaccccggagcaactggtgaaggaggtggagtacctgctgaagaacaagtgggtgccctgcctggaattcgaggaggat---------------------ggtgaaataaagagagtgtatgggaatagcccagggtactacgacgggagatactgggtgatgtggaagctgcctatgttcggatgcacagaggcatcgcaggtgttgaacgaggtgaacgagtgtgcgaaggcataccccaacgccttcatccgcgtcatcggattcgacaacgtccgc------caagtgcagtgcatctccttcatcgtccacaagcct
+GR-854-978   atgaaggtgtggaaccccgtcaacaacaagaagttcgagaccttctcctacctgccccccctgtctgacgcccagatcgccaagcaggtggacatgatcattgccaaggggctctccccctgcctggagttcgccgccccggagaacagcttcatcgccaatgacaacactgtgcgcttcagcggcaccgctgcaggctactatgacaaccggtactggaccatgtggaagctgcccatgttcggctgcacggacgccagccaggtgctgcgtgagatctccgagtgccgcagggcctacccccagtgctacgtccgc---ctggccttcgactccgtcaag------caggtgcaggtgatctcgttcgtggtgcagcgcccc
+MO-29-154    ttcaaggtctggcagcccgtgaacaacaagcagtacgagaccttctcctacctgccccccctgaccaaccagaagatcggccgtcaggtcgactacatcatcaacaacggctggaccccctgcttggagttcgctgacccctccacctccttcgtcagcaacgcgaacgccgtgcgcctccagggtgtctccgctggctactacgacaacaggtactggaccatgtggaagctgcccatgttcggctgcactgaccccagccaggtgctgcgcgaggtgtccgcctgccaggtggccttccccaacgtgtacatccgcctggttgccttcgacaacgtcaag------caggtgcagtgcatgggcttcctagtgcagcgcccc
+OE-36-161    atgatggtatggtagccctttaacaataagttctttgagaccttctcgtacttgccccctctcactgacgaccaaatcaccaagcaagtggactacatcttgagaaacaattggactccttgtctggagtttgcgggatccgaccaagcgtatgtgacccacgacaacacggtaagaatgggagattgtgcatccacttatcaggacaacagatattggaccatgtggaaattgcctatgttcggttgcattgatggatcgcaagtgttgaccgaaatttcagcttgcactaaggcctttcctgatgcctacatccgtttggtgtgttttgatgcaaatagg------caagtccaaatttccggctttttggtacataggccc
+EME-43-168   atgatggtttggtagcccttcaacaacaaaatgtttgaaactttttccttcttgcctcccttgactgatgaacaaattagcaaacaagtggactacatcttggccaactcctggaccccctgtcttgaatttgcagcttctgatcaagcttatgctggcaatgaaaattgcatcagaatgggacctgtggcttctacctaccaagacaatagatattggacaatgtggaagctacctatgtttggatgcacagacggctctcaagtgttgagcgagatccaagcatgcacaaatgctttccccgatgcttacatcagattggtttgttttgacgcaaacaga------taggtgtaaatttctggatttttggtgcacagacct
+LRE-46-171   atgatggtctggaccccggtcaacaacaagatgttcgagaccttctcctacctgccccccctgagcgacgagcagatcgccgcccaggtcgactacattgtcgccaacggctggatcccctgcctggagttcgctgagtcggacaaggcctacgtgtccaacgagtcggccatccgcttcggcagcgtgtcttgcctgtactacgacaaccgctactggaccatgtggaagctgcccatgttcggctgccgcgaccccatgcaggtgctgcgcgagatcgtcgcctgcaccaaggccttccccgatgcctacgtgcgcctggtggccttcgacaaccagaag------caggtgcagatcatgggcttcctggtccagcgcccc
+P6-2-107     atgaaaactctgcccaaa------gagcgtcgtttcgagactttctcgtacctgcctcccctcagcgatcgccaaatcgctgcacaaatcgagtacatgatcgagcaaggcttccaccccttgatcgagttcaacgagcac------------------------------------------------------tcgaatccggaagagttctactggacgatgtggaagctccccctgtttgactgcaagagccctcagcaagtcctcgatgaagtgcgtgagtgccgcagcgaatacggtgattgctacatccgtgtcgctggcttcgacaacatcaag------cagtgccaaaccgtgagcttcatcgttcatcgtccc
+HO-1-106     atgaaaactctgcccaaa------gagcgtcgctacgaaaccctttcctacctgccccccctgagcgatcagcaaattgctcgccagattgagtacatggtgcgcgaaggctatattcccgccgtggaattcaacgaagat------------------------------------------------------tccgacgcgaccacctgctactggaccatgtggaagttgcccctgttccacgccacttctacccaagaagtgttgggcgaagtgcgcgagtgccgcaccgaataccccaactgctacatccgcgtagttggtttcgacaacatcaag------cagtgtcagtccgtgagcttcatcgttcacaagccc
+SP-1-106     atgcaaaccttaccaaaa------gagcgtcgttacgaaaccctttcttacttaccccccctcaccgacgttcaaatcgaaaagcaagtccagtacattctgagccaaggctacattccagccgttgagttcaacgaagtt------------------------------------------------------tctgaacctaccgaactttattggacactgtggaagctacctttgtttggtgctaaaacatcccgtgaagtattggcagaagttcaatcttgccgttctcaatatcctggtcactacatccgtgttgtaggatttgacaatattaag------cagtgccaaatcctgagcttcatcgttcacaaaccc
+PA-1-105     ---atgcaacttagagta------gaacgtaagttcgaaactttttcttatttaccaccattaaacgaccaacagattgcgcgtcaattacaatacgcactttccaatggttatagcccagcaatcgaattcagttttaca------------------------------------------------------ggtaaagctgaagacttagtatggactttatggaaattacctttatttggtgcacaatctcctgaagaagtacttagcgaaattcaagcttgtaaacaacagttccctaatgcttacattcgtgttgtagcatttgactctatcaga------caagttcaaactttaatgttcttagtttacaaacca
+NE-2-109     gctgaaatgcaggattacaagcaaagcctcaaatatgagactttctcttatcttccacccatgaacgcggaacgcatccgcgctcaaatcaagtacgcaattgctcaaggctggagccccggcattgagcacgtagaagtgaaa------------------------------------------------------aactccatgaaccaatattggtacatgtggaaacttcccttcttcggcgaacaaaatgtcgacaacgtgttggctgaaattgaagcgtgtcgtagtgcgtatccaacacaccaggtcaaactggtggcttatgacaactatgcg------caaagcttaggtctggccttcgtggtctaccgcggc
+IFE-2-109    gctgacattcaggactacaactcaacacccaagtacgaaaccttctcttatttgccggcaatgggaccggaaaaaatgcgccgtcagatcgcctatctcatcaatcagggctggaaccccggcatcgagcatgtggaacctgaa------------------------------------------------------cgcgcatcaacatactactggtacatgtggaagttacccatgttcggcgaacagtcggtggacaccgtgatcatggagttggaagcatgccatcgcgctcaccccggccatcacgtgcgcttggtcgggtatgacaattactcg------cagagccagggcagcgcttttgtggtgtttcgcggg
+HS-9-115     ---tcgagcgtcagcgatccgtcgagccgcaagttcgagaccttctcctacctgcccgaactcggcgtggaaaagatccgcaagcaggtcgagtacatcgtcagcaagggctggaacccggccgtcgagcacaccgagccggag------------------------------------------------------aacgccttcgaccactactggtacatgtggaagctgccgatgttcggcgaaaccgacgtggacgccatcctggccgaggccgaggcatgccacaaggcgcatccctcgcatcacgtgcgcctgatcggctacgacaactatgcc------cagtcgcaaggcactgccatggtgatcttccgcggc
+RVI-7-114    agttccagcctcgaagacgtcaacagccgcaagttcgagaccttctcctacctgccgcgcatggatgccgaccgcatccgcaagcaggtcgagtacatcgtctccaagggctggaacccggccatcgagcacaccgagccggaa------------------------------------------------------aacgccttcgatcactactggtacatgtggaagctgccgatgttcggcgagaccgacatcgacaccatcctcaaggaggccgaagcctgccacaaggcgcaccccaacaatcacgtgcgtctgatcggcttcgacaactatgcc------cagtccaagggcgccgagatggtggtctatcgcggc
+IFE-8-115    aaatcccgtctctccgacccggcgagcgcgaagttcgagacactgtcttacctgcccgccctgaccgcggacgagatccgtcaacaggttgcgtatattgtttccaagggctggaatccggcggtagaacataccgaaccggaa------------------------------------------------------aacgccttcggcaactactggtatatgtggaagttgcccatgttcggcgaaacggacgtggacaccattctgaaagaagcggaacgctgccataagcggaatccccataaccacgtccgtatcgtcggctatgataacttcaag------cagtcccagggtacttccctggtagtctatcggggc
+RVI-5-112    agcagcatgggcgatcacgccaccatcggccgctacgagaccttttcctatctgccgccgctcaaccgcgaggagatcctggagcagatcctctacatcctcgacaacggctggaacgcctcgctggagcacgagcatccggat------------------------------------------------------cgcgccttcgagtattactggccgatgtggaagatgcccttcttcggcgaacaggatccgaacgtgatcctgaccgagatcgagtcctgccggcgcagctatccggaccatcacgtccggctggtcggctacgacacctacgcc------cagagcaagggacattccttcctggcgcaccgcccg
+
diff --git a/model/models.nex b/example/models.nex
similarity index 100%
rename from model/models.nex
rename to example/models.nex
diff --git a/examples/example.phy b/examples/example.phy
new file mode 100644
index 0000000..8637b06
--- /dev/null
+++ b/examples/example.phy
@@ -0,0 +1,46 @@
+ 44 384 
+FL-1-103     atgcgcatcacccaaggc---------------------accttctccttcctgcccgacctcacggcggcccaggtcaaggcccagatccagtatgcgctggaccagaactgggcggtctcggtggagtacacggacgatccc------------------------------------------------------catccccggaacacctattgggagatgtggggcctgcccatgttcgacctgcgcgatgccgccggcgtctatggcgaggtcgaggcctgccgcaccgcccatcccggcaagtatgtgcgggtgaacgccttcgactccaatcgcgggtgggagacggtgcgcctctccttcatcgtccagcgtccg
+OSH-1-103    atgcgcatcacccaaggc---------------------tgcttctcgttcctgcccgacctgaccgacgagcagatctcggcgcaggtggactattgcctcggccgcggctgggccgtgagcctcgaacataccgacgacccg------------------------------------------------------catccccggaacacctactgggaaatgtggggcatgccgatgttcgacctgcgcgaccccaagggcgtgatgatcgagctggacgagtgccgcaaggcctggcccggccgctacatccgcatcaatgccttcgattccacccgcggcttcgagacggtcacgatgagcttcatcgtcaaccgcccc
+CEU-1-103    atgcgcatcactcaaggc---------------------actttttccttcctgcccgaactgaccgacgagcagatcaccaaacagctcgaatactgcctgaaccagggctgggcggtcggcctcgaatacaccgacgacccg------------------------------------------------------cacccgcgcaacacgtactgggagatgttcgggctgccgatgttcgacctgcgcgatgccgccggcatcctgatggaaatcaacaacgcgcggaacaccttccccaaccactacatccgcgtcacggccttcgattcgacgcatacggtggagtcggtggtgatgtcgttcatcgtcaatcgtccc
+TH-1-103     atgagacttacacaaggc---------------------gcattttcgttcttacctgacttaacagatgagcaaatcgtaaaacaaattcaatatgctatcagcaaaaactgggctttaaacgttgaatggacagatgatccg------------------------------------------------------caccctcgcaacgcatactgggatttatggggattaccattatttggtattaaagatccagcggctgtaatgtttgaaatcaatgcttgccgtaaagctaaaccagcttgttacgtaaaagtaaatgcgtttgataactcacgtggtgtagaaagctgctgcttatcttttatcgttcaacgtcct
+CAa1-103     atgaaactaacacaagga---------------------gctttctcatttcttcctgacttaactgatgcgcaagtaactaagcaaatccagtacgctttaaataagagttgggctatttcgattgaatatactgatgatccg------------------------------------------------------cacccacgtaacagttactgggagatgtggggccttcctctattcgatgttaaggatccagctgcgattcttttcgaaatcaacatggctcgtaaggctaagcctaactactaccttaaaatagcttgttttgataacacacgtggtatcgaaagttgtgtactttctttcattgtacaacgtcct
+CAb1-103     gtgagagttacacaagga---------------------acattttcttttctaccagacctgacaaatgatcaaatcagaaaacaaattcaatatgccataaataaaggatgggcattgagtgtagaatatacagatgaccct------------------------------------------------------cacccacggaattcttactgggaaatgtggggactgcctttatttgatgtcaaagaccctgcggcaattatgtttgaagttgaagcttgtcgaaaagagaaaagcaactattatattaagctattagcttttgattcaaccaaaggagttgaaagtacagcaatgtcctttatggtcaataggcct
+SI-1-103     atgagagttacacaagga---------------------tgtttttcgtttttaccagatttaagtgatgatcaaattaaacaacaagtttcttacgctatgagcaaaggttgggcggttagtgtagaatggacagatgatcca------------------------------------------------------catccacgtaactcatattgggaattatggggtcttcctttatttgatgttaaagatccagctgcagttatgtatgaacttgctgaatgtagaaaagttaacccagaaggttatattaaaattaatgctttcgatgctagtattggtacagaaagttgtgtaatgtcttttattgtacaacgtcct
+LU-1-103     gtgagacttacacaagga---------------------gctttttcttatttaccagatttaactgatgcacaaatcatcaaacaaattgactactgcttaagcagaggttggtctgttggtgttgaatggactgatgatcca------------------------------------------------------cacccacgtaacgcttactgggaactatggggtcttccattatttgacgtaaaagattcttcagcaattttatacgaagttaatgaatgtcgtcgtttaaaccctgaaggttacattaaattagttgctttcaacgcagcacgtggtactgaaagtagtgcatctgcttttattgtacaacgtcca
+SU-1-103     gtgagaataactcaaggt---------------------accttttcttttttgccggacttgactgatgaacaaatcaaaaaacaaattgattatatgatatctaaaaaattagctataggtattgaatatactaacgacata------------------------------------------------------catcctagaaattcattttgggaaatgtggggattacctctatttgaggtcacagatccagctccagtattatttgaaattaatgcttgtcgtaaagcaaaaagtaatttctatatcaaggtagtaggattttcttctgaaagaggtatagaaagtacaataatttcatttattgtaaatagacca
+RP-56-175    atgcaggtgtggccaccagttggcaagaagaagtttgagaccctttcataccttccacccctcactgatgagcaattgcttaaggaagtagagtatcttctaaggaagggatgggttccatgtgttgaatttgagttggagaaa------------------ggatttgtccaccgtcagtacaacagttcaccaggatactatgatggacgttactggacaatgtggaggttgccattgtttggaaccactgatgctgctcaggtgttgaaggaagttgctgaatgtaaagcagaatacccagaagctttcatccgtatcatcggatttgacaacgttcgt------caagtgcaatgcattagtttcattgcaagcacaccc
+A-14-133     atgcaggtgtggcctccaattggaaagaagaagtttgagactctttcctatttgccaccattgacgagagatcaattgttgaaagaagttgaataccttctgaggaagggatgggttccatgcttggaatttgagttgctcaaa------------------ggatttgtgtacggtgagcacaacaagtcaccaagatactatgatggaagatactggacaatgtggaagcttcctatgtttggcaccactgatcctgctcaagtcgtgaaggaggttgatgaagttgttgccgcttaccccgaagctttcgttcgtgtcatcggtttcaacaacgttcgt------caagttcaatgcatcagtttcattgcacacacacca
+PR-57-176    atgcaggtgtggccaccacgtaatttgaagaagtttgagaccctatcataccttccaactctttccgaggagtcattgttgaaggagatcaactaccttctaatcaagggatgggttccttgccttgagttcgaagttggaccg------------------gcacatgtataccgtgagaacaacaagtcaccaggatactatgacggaaggtactggacaatgtggaagctacccatgttcggatgcactgacgcatcccaagttgcagctgaggtggtcgagtgcaagaacgcttaccctgatgcccacgtcagaatcattggattcgacaacaagcgt------caagtccagtgcatcagtttcattgcctacaaacct
+PY-61-180    atgcaggtgtggcctccactcggactgaagaagttcgagaccctctcttaccttcctcccctttcttccgagtccttggccaaggaagttgactacctcctccgcaagaactgggttccctgcttggaatttgagttggagact------------------ggattcgtgtaccgtgagaaccacaggtccccaggatactatgatggaaggtactggacaatgtggaagctgcccatgttcggatgcaccgactcttcccaggtgttgaaggagctggaagaggccaagaaggcttacccccagtccttcatccgtatcatcggattcgacaatgtccgt------caagtgcagtgcatcagtttcatcgcttacaagcct
+MGI-58-176   atgcaggtgtggccgccggagggcctgaagaagttcgagaccctctcctacctcccccctctctccgtcgaggacctcgccaaggaggtggactacctcctccgcaacgactgggttccctgcatcgagttctccaaggaa---------------------gggttcgtgtaccgcgagaaccacgcgtcgcccgggtactacgacgggcggtactggacgatgtggaagctgcccatgttcggctgcaccgacgccagccaggtgatcgccgaggtggaggaggccaagaaggcctaccccgagtacttcgtcagaatcatcggcttcgacaacaagcgc------caagtccagtgcatcagcttcatcgcctacaagccc
+SCR-58-177   tgcatggtgtggccaccactaggaatgaagaagtttgagactctgtcttacctgccccctctatccgaagagtcattgttgaaggaggtccaataccttctcaacaatggatgggttccctgcttggaattcgagcccactcac------------------ggatttgtgtaccgtgagcacggaaacacaccaggatactacgatggacgttactggacaatgtggaagttgcccatgttcggttgcactgacccatcccaggttgttgctgagctcgaggaggccaagaaggcttaccctgaggccttcatccgtatcataggattcgacaacgtgcgt------caagtccagtgtgtcagtttcatcgcctacaagccc
+SA-60-179    atgaaggtgtggccaccacttggattgaggaagttcgagactctttcttacctgcctgatatgagtaacgaacaattgtcaaaggaatgtgactaccttctcaggaatggatgggttccctgcgttgaattcgacatcggaagc------------------ggattcgtgtaccgtgagaaccacaggtcaccaggattctacgatggacgttactggaccatgtggaagctccctatgtttggctgcaccgactcatctcaggtgattcaggagattgaggaggctaagaaggaataccccgacgcattcatcagggttattggctttgacaacgtccgt------caagtccagtgcatcagtttcatcgcctacaagccc
+BR-60-179    atgcaggtatggccaccacgtgggaagaagttctacgagactctctcataccttccaccccttacaagggagcaattggccaaggaagttgaataccttcttcgcaagggatgggttccttgcttggaattcgagttggagcat------------------ggaaccgtgtaccgtgagtaccacagatcaccagggtactatgatggtcgttactggaccatgtggaagctgcccatgtttggttgcacagatgcagtgcaggtgttgcaggagcttgatgagatgattaaagcttacccagattgctatggtaggatcattggtttcgacaatgttcgc------caagtccagtgcattagtttccttgcctacaagcct
+CPL-58-177   atgcaggtgtggccaccaattaacaagaagaagtacgagactctctcatacctccctgatttgagccaagagcaattgcttagcgaaattgagtaccttttgaaaagtggatgggttccttgcttggaattcgaaactgagcgc------------------ggatttgtctaccgtgaacaccaccattcaccaggatactatgacggcaggtactggaccatgtggaagctacctatgttcggatgcactgatgccacccaagtgttggctgaggtggaagaggcgaagaaggcatacccacaggcctgggtccgtattattggattcgacaacgtgcgt------caagtgcagtgcatcagtttcattgcctacaagcca
+LTU-59-178   atgcaggtgtggccaccaattaacatgaagaaatacgagacattgtcataccttcctgacttgtccgatgagcaattgctcaaggaagttgagtaccttttgaaaaatggatgggttccttgcttggaattcgagactgagcac------------------ggatttgtgtaccgtgagcacaacagctcaccaggatactacgatggtagatactggaccatgtggaagttgcctatgtttgggtgcactgacggaacccaggtgttggctgaggttcaagaggccaagaatgcgtacccacaggcctggatccgtattatcggattcgacaacgttcgt------caagtgcagtgcatcagtttcattgcctacaagcca
+TSP-58-177   atgcaggtgtggcccccatatggcaagaagaagtacgagactctctcataccttcctgatttaaccgacgagcaattgctcaaggagattgagtaccttttgaacaagggatgggttccttgcttggaatttgagactgagcac------------------ggatttgtctaccgtgaataccacgcctcacctagatactatgatggaaggtactggaccatgtggaagttgcccatgtttgggtgcactgatgcaactcaggtgttgggtgagctccaagaggccaagaaggcttaccctaatgcatggatcagaatcatcggattcgacaacgtccgt------caagtgcaatgcatcagtttcattgcctacaagcca
+YBN-56-175   atgcaggtgtggccaccagttggcaagaagaagtttgagactctttcctacctgccagaccttgatgatgcacaattggcaaaggaagtagaataccttcttaggaagggatggattccttgcttggaattcgagttggagcac------------------ggtttcgtgtaccgtgagcacaacaggtcactaggatactacgatggacgctactggaccatgtggaagctgcctatgtttggttgcactgatgcttctcaggtgttgaaggagcttcaagaggctaagactgcataccccaacggcttcatccgtatcatcggattcgacaacgttcgc------caagtgcagtgcatcagcttcatcgcctacaagccc
+AN-56-175    atgaaggtgtggccaccacttggattgaagaagtacgagactctctcatacttaccaccactaactgaaactcagttggctaaggaagtcgactacttgctccgcaaaaaatgggttccttgtttggaattcgagttggagcac------------------ggttttgtctaccgtgagaacgccagatcccccggatactatgacggaagatactggacaatgtggaaattgcctatgttcggttgcaccgactcagcccaagtgatgaaggagcttgctgaatgcaagaaggagtacccccaggcctggatccgtatcatcggatttgacaatgttcgt------caagttcaatgtatcatgttcattgcttccaggcca
+HI-60-179    atgcaggtgtggcctcctcttgggaagaagaagttcgagacactctcatacctccccgatcttacacccgtacagttggctaaggaagtagattaccttcttcgctctaaatggattccttgcttggaattcgaattagaggag------------------ggattcgtgcaccgtaagtactcgagcttacccacgtactacgatggacgctactggaccatgtggaaactgcccatgtttgggtgcactgactcggctcaggtgttggaggagcttgagaattgcaagaaggaataccccaatgcattcattagaatcattgggttcgacaacgttcgt------caagtgcagtgcattagtttcattgcctacaagcct
+ANA-56-175   atgaaggtgtggccaccagttggaaagaagaagtttgagaccctctcttaccttcctgaccttaccgaagttgaattgggtaaggaagtcgactaccttctccgcaacaagtggattccttgtgttgaattcgagttggagcac------------------gggtttgtttaccgtgagcacggaagcacccccggatactacgatggccgttactggacaatgtggaagcttcccttgttcggatgcactgactctgctcaagtgttgaaggaagtccaagaatgcaaaacggagtaccctaacgctttcatcaggatcatcggattcgacaacaaccgt------caggtccagtgcatcagtttcatcgcctacaagcca
+ZE-48-166    atgcaggtgtggccggcctacggcaacaagaagttcgagacgctgtcgtacctgccgccgctgtcgacggacgacctgctgaagcaggtggactacctgctgcgcaacggctggataccctgcctcgagttcagcaaggtc---------------------ggcttcgtgtaccgcgagaactccacctccccgtgctactacgacggccgctactggaccatgtggaagctgcccatgttcggctgcaacgacgccacccaggtgtacaaggagctgcaggaggccatcaaatcctacccggacgccttccaccgcgtcatcggcttcgacaacatcaag------cagacgcagtgcgtcagcttcatcgcctacaagccc
+EAT-48-166   atgcaggtgtggccaattgagggcatcaagaagttcgagaccctgtcttacttgccacccctctccacggaggccctcttgaagcaggtcgactacttgatccgctccaagtgggtgccctgcctcgagttcagcaaggtt---------------------ggcttcgtcttccgtgagcacaacagctcccccgggtactacgacggtcgatactggacaatgtggaagctgcctatgttcgggtgcaccgacgccacacaggtgctcaacgaggtggaggaggttaagaaggagtaccctgatgcgtatgtccgcgtcatcggtttcgacaacatgcgc------caggtgcaatgcgtcagcttcattgccttcaggcca
+YSA-46-164   atgcaggtgtggccgattgagggcatcaagaagttcgagaccctctcctacctgccaccgctcaccgtggaggacctcctgaagcagatcgagtacctagctccgttccaagtggtgccctgcctcgagttcagcaaggtc---------------------ggatttgtctaccgtgagaaccacaagtcccctggatactacgacggcaggtactggaccatgtggaagctgcccatgttcgggtgcaccgacgccacccaggtcgtcaaggagctcgaggaggccaagaaggcgtaccctgatgcattcgtccgtatcatcggcttcgacaacgttagg------caggtgcagctcatcagcttcatcgcctacaacccg
+TH-52-170    atgcaggtgtggcctccattcggaaaccccaagtttgagactctgtcctacctccctacgctaaccgaggagcagctggtgaaggaggttgagtacttgttgaggaacaagtgggtgccttgtctagagtttgatctggaa---------------------ggatccatctcgaggaagtataataggagcccggggtactacgatgggagatactgggtgatgtggaagttgccgatgtttgggtgcacagaggcatctcaggtgataaacgaggtgagagagtgtgccaaggcataccccaaagccttcatccgtgtcattggctttgacaacgtccgc------caagtgcagtgcatctccttcatcgtccacaagccc
+LA-68-186    atgcaggtgtggcctccttacgcgaataaaaagtttgagactctgtcgtatctccctcgcttgaccccggagcaactggtgaaggaggtggagtacctgctgaagaacaagtgggtgccctgcctggaattcgaggaggat---------------------ggtgaaataaagagagtgtatgggaatagcccagggtactacgacgggagatactgggtgatgtggaagctgcctatgttcggatgcacagaggcatcgcaggtgttgaacgaggtgaacgagtgtgcgaaggcataccccaacgccttcatccgcgtcatcggattcgacaacgtccgc------caagtgcagtgcatctccttcatcgtccacaagcct
+GR-854-978   atgaaggtgtggaaccccgtcaacaacaagaagttcgagaccttctcctacctgccccccctgtctgacgcccagatcgccaagcaggtggacatgatcattgccaaggggctctccccctgcctggagttcgccgccccggagaacagcttcatcgccaatgacaacactgtgcgcttcagcggcaccgctgcaggctactatgacaaccggtactggaccatgtggaagctgcccatgttcggctgcacggacgccagccaggtgctgcgtgagatctccgagtgccgcagggcctacccccagtgctacgtccgc---ctggccttcgactccgtcaag------caggtgcaggtgatctcgttcgtggtgcagcgcccc
+MO-29-154    ttcaaggtctggcagcccgtgaacaacaagcagtacgagaccttctcctacctgccccccctgaccaaccagaagatcggccgtcaggtcgactacatcatcaacaacggctggaccccctgcttggagttcgctgacccctccacctccttcgtcagcaacgcgaacgccgtgcgcctccagggtgtctccgctggctactacgacaacaggtactggaccatgtggaagctgcccatgttcggctgcactgaccccagccaggtgctgcgcgaggtgtccgcctgccaggtggccttccccaacgtgtacatccgcctggttgccttcgacaacgtcaag------caggtgcagtgcatgggcttcctagtgcagcgcccc
+OE-36-161    atgatggtatggtagccctttaacaataagttctttgagaccttctcgtacttgccccctctcactgacgaccaaatcaccaagcaagtggactacatcttgagaaacaattggactccttgtctggagtttgcgggatccgaccaagcgtatgtgacccacgacaacacggtaagaatgggagattgtgcatccacttatcaggacaacagatattggaccatgtggaaattgcctatgttcggttgcattgatggatcgcaagtgttgaccgaaatttcagcttgcactaaggcctttcctgatgcctacatccgtttggtgtgttttgatgcaaatagg------caagtccaaatttccggctttttggtacataggccc
+EME-43-168   atgatggtttggtagcccttcaacaacaaaatgtttgaaactttttccttcttgcctcccttgactgatgaacaaattagcaaacaagtggactacatcttggccaactcctggaccccctgtcttgaatttgcagcttctgatcaagcttatgctggcaatgaaaattgcatcagaatgggacctgtggcttctacctaccaagacaatagatattggacaatgtggaagctacctatgtttggatgcacagacggctctcaagtgttgagcgagatccaagcatgcacaaatgctttccccgatgcttacatcagattggtttgttttgacgcaaacaga------taggtgtaaatttctggatttttggtgcacagacct
+LRE-46-171   atgatggtctggaccccggtcaacaacaagatgttcgagaccttctcctacctgccccccctgagcgacgagcagatcgccgcccaggtcgactacattgtcgccaacggctggatcccctgcctggagttcgctgagtcggacaaggcctacgtgtccaacgagtcggccatccgcttcggcagcgtgtcttgcctgtactacgacaaccgctactggaccatgtggaagctgcccatgttcggctgccgcgaccccatgcaggtgctgcgcgagatcgtcgcctgcaccaaggccttccccgatgcctacgtgcgcctggtggccttcgacaaccagaag------caggtgcagatcatgggcttcctggtccagcgcccc
+P6-2-107     atgaaaactctgcccaaa------gagcgtcgtttcgagactttctcgtacctgcctcccctcagcgatcgccaaatcgctgcacaaatcgagtacatgatcgagcaaggcttccaccccttgatcgagttcaacgagcac------------------------------------------------------tcgaatccggaagagttctactggacgatgtggaagctccccctgtttgactgcaagagccctcagcaagtcctcgatgaagtgcgtgagtgccgcagcgaatacggtgattgctacatccgtgtcgctggcttcgacaacatcaag------cagtgccaaaccgtgagcttcatcgttcatcgtccc
+HO-1-106     atgaaaactctgcccaaa------gagcgtcgctacgaaaccctttcctacctgccccccctgagcgatcagcaaattgctcgccagattgagtacatggtgcgcgaaggctatattcccgccgtggaattcaacgaagat------------------------------------------------------tccgacgcgaccacctgctactggaccatgtggaagttgcccctgttccacgccacttctacccaagaagtgttgggcgaagtgcgcgagtgccgcaccgaataccccaactgctacatccgcgtagttggtttcgacaacatcaag------cagtgtcagtccgtgagcttcatcgttcacaagccc
+SP-1-106     atgcaaaccttaccaaaa------gagcgtcgttacgaaaccctttcttacttaccccccctcaccgacgttcaaatcgaaaagcaagtccagtacattctgagccaaggctacattccagccgttgagttcaacgaagtt------------------------------------------------------tctgaacctaccgaactttattggacactgtggaagctacctttgtttggtgctaaaacatcccgtgaagtattggcagaagttcaatcttgccgttctcaatatcctggtcactacatccgtgttgtaggatttgacaatattaag------cagtgccaaatcctgagcttcatcgttcacaaaccc
+PA-1-105     ---atgcaacttagagta------gaacgtaagttcgaaactttttcttatttaccaccattaaacgaccaacagattgcgcgtcaattacaatacgcactttccaatggttatagcccagcaatcgaattcagttttaca------------------------------------------------------ggtaaagctgaagacttagtatggactttatggaaattacctttatttggtgcacaatctcctgaagaagtacttagcgaaattcaagcttgtaaacaacagttccctaatgcttacattcgtgttgtagcatttgactctatcaga------caagttcaaactttaatgttcttagtttacaaacca
+NE-2-109     gctgaaatgcaggattacaagcaaagcctcaaatatgagactttctcttatcttccacccatgaacgcggaacgcatccgcgctcaaatcaagtacgcaattgctcaaggctggagccccggcattgagcacgtagaagtgaaa------------------------------------------------------aactccatgaaccaatattggtacatgtggaaacttcccttcttcggcgaacaaaatgtcgacaacgtgttggctgaaattgaagcgtgtcgtagtgcgtatccaacacaccaggtcaaactggtggcttatgacaactatgcg------caaagcttaggtctggccttcgtggtctaccgcggc
+IFE-2-109    gctgacattcaggactacaactcaacacccaagtacgaaaccttctcttatttgccggcaatgggaccggaaaaaatgcgccgtcagatcgcctatctcatcaatcagggctggaaccccggcatcgagcatgtggaacctgaa------------------------------------------------------cgcgcatcaacatactactggtacatgtggaagttacccatgttcggcgaacagtcggtggacaccgtgatcatggagttggaagcatgccatcgcgctcaccccggccatcacgtgcgcttggtcgggtatgacaattactcg------cagagccagggcagcgcttttgtggtgtttcgcggg
+HS-9-115     ---tcgagcgtcagcgatccgtcgagccgcaagttcgagaccttctcctacctgcccgaactcggcgtggaaaagatccgcaagcaggtcgagtacatcgtcagcaagggctggaacccggccgtcgagcacaccgagccggag------------------------------------------------------aacgccttcgaccactactggtacatgtggaagctgccgatgttcggcgaaaccgacgtggacgccatcctggccgaggccgaggcatgccacaaggcgcatccctcgcatcacgtgcgcctgatcggctacgacaactatgcc------cagtcgcaaggcactgccatggtgatcttccgcggc
+RVI-7-114    agttccagcctcgaagacgtcaacagccgcaagttcgagaccttctcctacctgccgcgcatggatgccgaccgcatccgcaagcaggtcgagtacatcgtctccaagggctggaacccggccatcgagcacaccgagccggaa------------------------------------------------------aacgccttcgatcactactggtacatgtggaagctgccgatgttcggcgagaccgacatcgacaccatcctcaaggaggccgaagcctgccacaaggcgcaccccaacaatcacgtgcgtctgatcggcttcgacaactatgcc------cagtccaagggcgccgagatggtggtctatcgcggc
+IFE-8-115    aaatcccgtctctccgacccggcgagcgcgaagttcgagacactgtcttacctgcccgccctgaccgcggacgagatccgtcaacaggttgcgtatattgtttccaagggctggaatccggcggtagaacataccgaaccggaa------------------------------------------------------aacgccttcggcaactactggtatatgtggaagttgcccatgttcggcgaaacggacgtggacaccattctgaaagaagcggaacgctgccataagcggaatccccataaccacgtccgtatcgtcggctatgataacttcaag------cagtcccagggtacttccctggtagtctatcggggc
+RVI-5-112    agcagcatgggcgatcacgccaccatcggccgctacgagaccttttcctatctgccgccgctcaaccgcgaggagatcctggagcagatcctctacatcctcgacaacggctggaacgcctcgctggagcacgagcatccggat------------------------------------------------------cgcgccttcgagtattactggccgatgtggaagatgcccttcttcggcgaacaggatccgaacgtgatcctgaccgagatcgagtcctgccggcgcagctatccggaccatcacgtccggctggtcggctacgacacctacgcc------cagagcaagggacattccttcctggcgcaccgcccg
+
diff --git a/gss.cpp b/gss.cpp
index b6d54a2..23b33e8 100644
--- a/gss.cpp
+++ b/gss.cpp
@@ -319,7 +319,7 @@ void GSSNetwork::findPD(Params &params, vector<SplitSet> &taxa_set, vector<int>
         taxa_set[index].push_back(pd_set);
     }
     cout << endl;
-    delete variables;
+    delete [] variables;
     // call the leaving function
     leaveFindPD(taxa_set);
 }
diff --git a/gzstream.h b/gzstream.h
index f7f971f..9116b02 100644
--- a/gzstream.h
+++ b/gzstream.h
@@ -32,7 +32,8 @@
 // standard C++ with new header file names and std:: namespace
 #include <iostream>
 #include <fstream>
-#include "zlib-1.2.7/zlib.h"
+//#include "zlib-1.2.7/zlib.h"
+#include <zlib.h>
 
 #ifdef GZSTREAM_NAMESPACE
 namespace GZSTREAM_NAMESPACE {
diff --git a/iqtree.cpp b/iqtree.cpp
index 22bf83e..6678274 100644
--- a/iqtree.cpp
+++ b/iqtree.cpp
@@ -61,7 +61,7 @@ void IQTree::init() {
     max_candidate_trees = 0;
     logl_cutoff = 0.0;
     len_scale = 10000;
-    save_all_br_lens = false;
+//    save_all_br_lens = false;
     duplication_counter = 0;
     //boot_splits = new SplitGraph;
     pll2iqtree_pattern_index = NULL;
@@ -161,8 +161,8 @@ void IQTree::initSettings(Params &params) {
             save_all_trees = 2;
         }
     }
-    if (params.gbo_replicates > 0 && params.do_compression)
-        save_all_br_lens = true;
+//    if (params.gbo_replicates > 0 && params.do_compression)
+//        save_all_br_lens = true;
     print_tree_lh = params.print_tree_lh;
     max_candidate_trees = params.max_candidate_trees;
     if (max_candidate_trees == 0)
@@ -194,7 +194,9 @@ void IQTree::initSettings(Params &params) {
         	boot_samples[i] = mem + i*nptn;
 
         boot_logl.resize(params.gbo_replicates, -DBL_MAX);
-        boot_trees.resize(params.gbo_replicates, -1);
+        boot_trees.resize(params.gbo_replicates, "");
+        if (params.print_ufboot_trees == 2)
+        	boot_trees_brlen.resize(params.gbo_replicates);
         boot_counts.resize(params.gbo_replicates, 0);
         VerboseMode saved_mode = verbose_mode;
         verbose_mode = VB_QUIET;
@@ -235,43 +237,23 @@ void IQTree::initSettings(Params &params) {
     }
 }
 
-void myPartitionsDestroy(partitionList *pl) {
-	int i;
-	for (i = 0; i < pl->numberOfPartitions; i++) {
-		rax_free(pl->partitionData[i]->partitionName);
-		rax_free(pl->partitionData[i]);
-	}
-	rax_free(pl->partitionData);
-	rax_free(pl);
-}
-
 IQTree::~IQTree() {
     //if (bonus_values)
     //delete bonus_values;
     //bonus_values = NULL;
-    if (dist_matrix)
-        delete[] dist_matrix;
-    dist_matrix = NULL;
 
-    if (var_matrix)
-        delete[] var_matrix;
-    var_matrix = NULL;
-
-    for (vector<double*>::reverse_iterator it = treels_ptnlh.rbegin(); it != treels_ptnlh.rend(); it++)
-        delete[] (*it);
-    treels_ptnlh.clear();
+//    for (vector<double*>::reverse_iterator it = treels_ptnlh.rbegin(); it != treels_ptnlh.rend(); it++)
+//        delete[] (*it);
+//    treels_ptnlh.clear();
     for (vector<SplitGraph*>::reverse_iterator it2 = boot_splits.rbegin(); it2 != boot_splits.rend(); it2++)
         delete (*it2);
+    boot_splits.clear();
     //if (boot_splits) delete boot_splits;
-    if (pllPartitions)
-    	myPartitionsDestroy(pllPartitions);
-    if (pllAlignment)
-    	pllAlignmentDataDestroy(pllAlignment);
-    if (pllInst)
-        pllDestroyInstance(pllInst);
-
-    if (!boot_samples.empty())
+
+    if (!boot_samples.empty()) {
     	aligned_free(boot_samples[0]); // free memory
+        boot_samples.clear();
+    }
 }
 
 extern const char *aa_model_names_rax[];
@@ -280,38 +262,72 @@ void IQTree::createPLLPartition(Params &params, ostream &pllPartitionFileHandle)
     if (isSuperTree()) {
         PhyloSuperTree *siqtree = (PhyloSuperTree*) this;
         // additional check for PLL hard limit
-        if (siqtree->size() > PLL_NUM_BRANCHES)
-        	outError("Number of partitions exceeds PLL limit, please increase PLL_NUM_BRANCHES constant in pll.h");
-        int i = 0;
-        int startPos = 1;
-        for (PhyloSuperTree::iterator it = siqtree->begin(); it != siqtree->end(); it++) {
-            i++;
-            int curLen = ((*it))->getAlnNSite();
-            if ((*it)->aln->seq_type == SEQ_DNA) {
-                pllPartitionFileHandle << "DNA";
-            } else if ((*it)->aln->seq_type == SEQ_PROTEIN) {
-            	if (siqtree->part_info[i-1].model_name != "" && siqtree->part_info[i-1].model_name.substr(0, 4) != "TEST") {
-                    string modelStr = siqtree->part_info[i - 1].model_name.
-                            substr(0, siqtree->part_info[i - 1].model_name.find_first_of("+{"));
-                    if (modelStr == "LG4")
-                        modelStr = "LG4M";
-                    bool name_ok = false;
-                    for (int j = 0; j < 18; j++)
-                        if (modelStr == aa_model_names_rax[j]) {
-                            name_ok = true;
-                            break;
+        if (params.pll) {
+            if (siqtree->size() > PLL_NUM_BRANCHES)
+                outError("Number of partitions exceeds PLL limit, please increase PLL_NUM_BRANCHES constant in pll.h");
+            int i = 0;
+            int startPos = 1;
+            
+            // prepare proper partition file 
+            for (PhyloSuperTree::iterator it = siqtree->begin(); it != siqtree->end(); it++) {
+                i++;
+                int curLen = ((*it))->getAlnNSite();
+                if ((*it)->aln->seq_type == SEQ_DNA) {
+                    pllPartitionFileHandle << "DNA";
+                } else if ((*it)->aln->seq_type == SEQ_PROTEIN) {
+                    if (siqtree->part_info[i-1].model_name != "" && siqtree->part_info[i-1].model_name.substr(0, 4) != "TEST") {
+                        string modelStr = siqtree->part_info[i - 1].model_name.
+                                substr(0, siqtree->part_info[i - 1].model_name.find_first_of("+{"));
+                        if (modelStr == "LG4")
+                            modelStr = "LG4M";
+                        bool name_ok = false;
+                        for (int j = 0; j < 18; j++)
+                            if (modelStr == aa_model_names_rax[j]) {
+                                name_ok = true;
+                                break;
+                            }
+                        if (name_ok)
+                            pllPartitionFileHandle << modelStr;
+                        else
+                            pllPartitionFileHandle << "WAG";                    
+                    } else {
+                        pllPartitionFileHandle << "WAG";
+                    }
+                } else
+                    outError("PLL only works with DNA/protein alignments");
+                pllPartitionFileHandle << ", p" << i << " = " << startPos << "-" << startPos + curLen - 1 << endl;
+                startPos = startPos + curLen;
+            }
+        } else {
+            // only prepare partition file for computing parsimony trees
+            SeqType datatype[] = {SEQ_DNA, SEQ_PROTEIN};
+            PhyloSuperTree::iterator it;
+            
+            for (int i = 0; i < sizeof(datatype)/sizeof(SeqType); i++) {
+                bool first = true;
+                int startPos = 1;
+                for (it = siqtree->begin(); it != siqtree->end(); it++) 
+                    if ((*it)->aln->seq_type == datatype[i]) {
+                        if (first) {
+                        if (datatype[i] == SEQ_DNA)
+                            pllPartitionFileHandle << "DNA";
+                        else
+                            pllPartitionFileHandle << "WAG";
                         }
-                    if (name_ok)
-                        pllPartitionFileHandle << modelStr;
-                    else
-                        pllPartitionFileHandle << "WAG";                    
-                } else {
-                    pllPartitionFileHandle << "WAG";
-                }
-            } else
-            	outError("PLL only works with DNA/protein alignments");
-            pllPartitionFileHandle << ", p" << i << " = " << startPos << "-" << startPos + curLen - 1 << endl;
-            startPos = startPos + curLen;
+                        int curLen = (*it)->getAlnNSite();                    
+                        if (first) 
+                            pllPartitionFileHandle << ", p" << i << " = ";
+                        else
+                            pllPartitionFileHandle << ", ";
+                            
+                        pllPartitionFileHandle << startPos << "-" << startPos + curLen - 1;
+                        startPos = startPos + curLen;
+                        first = false;
+                    } else {
+                        startPos = startPos + (*it)->getAlnNSite();
+                    }
+                if (!first) pllPartitionFileHandle << endl;
+            }
         }
     } else {
         /* create a partition file */
@@ -380,6 +396,7 @@ void IQTree::computeInitialTree(string &dist_file, LikelihoodKernel kernel) {
 	    wrapperFixNegativeBranch(false);
 
         break;
+    case STT_RANDOM_TREE:
     case STT_PLL_PARSIMONY:
         cout << endl;
         cout << "Create initial parsimony tree by phylogenetic likelihood library (PLL)... ";
@@ -404,11 +421,11 @@ void IQTree::computeInitialTree(string &dist_file, LikelihoodKernel kernel) {
         else
         	fixed_number = wrapperFixNegativeBranch(false);
 		break;
-    case STT_RANDOM_TREE:
-        cout << "Generate random initial Yule-Harding tree..." << endl;
-        generateRandomTree(YULE_HARDING);
-        wrapperFixNegativeBranch(true);
-        break;
+//    case STT_RANDOM_TREE:
+//        cout << "Generate random initial Yule-Harding tree..." << endl;
+//        generateRandomTree(YULE_HARDING);
+//        wrapperFixNegativeBranch(true);
+//        break;
     }
 
     if (fixed_number) {
@@ -430,27 +447,29 @@ void IQTree::computeInitialTree(string &dist_file, LikelihoodKernel kernel) {
 }
 
 void IQTree::initCandidateTreeSet(int nParTrees, int nNNITrees) {
-    cout << "--------------------------------------------------------------------" << endl;
-    cout << "|             INITIALIZING CANDIDATE TREE SET                      |" << endl;
-    cout << "--------------------------------------------------------------------" << endl;
 
-    cout << "Generating " << nParTrees  << " parsimony trees... ";
-    cout.flush();
+    if (nParTrees > 0) {
+        if (params->start_tree == STT_RANDOM_TREE)
+            cout << "Generating " << nParTrees  << " random trees... ";
+        else
+            cout << "Generating " << nParTrees  << " parsimony trees... ";
+        cout.flush();
+    }
     double startTime = getRealTime();
     int numDupPars = 0;
 #ifdef _OPENMP
     StrVector pars_trees;
-    if (params->start_tree == STT_PARSIMONY && nParTrees > 1) {
-        pars_trees.resize(nParTrees-1);
+    if (params->start_tree == STT_PARSIMONY && nParTrees >= 1) {
+        pars_trees.resize(nParTrees);
         #pragma omp parallel
         {
             PhyloTree tree;
             tree.setParams(params);
             tree.setParsimonyKernel(params->SSE);
             #pragma omp for
-            for (int i = 1; i < nParTrees; i++) {
+            for (int i = 0; i < nParTrees; i++) {
                 tree.computeParsimonyTree(NULL, aln);
-                pars_trees[i-1] = tree.getTreeString();
+                pars_trees[i] = tree.getTreeString();
             }
         }
     }
@@ -470,7 +489,11 @@ void IQTree::initCandidateTreeSet(int nParTrees, int nNNITrees) {
 			PhyloTree::readTreeString(curParsTree);
 			wrapperFixNegativeBranch(true);
 			curParsTree = getTreeString();
-        } else {
+        } else if (params->start_tree == STT_RANDOM_TREE) {
+            generateRandomTree(YULE_HARDING);
+            wrapperFixNegativeBranch(true);
+			curParsTree = getTreeString();
+        } else if (params->start_tree == STT_PARSIMONY) {
             /********* Create parsimony tree using IQ-TREE *********/
 #ifdef _OPENMP
             curParsTree = pars_trees[treeNr-1];
@@ -478,6 +501,8 @@ void IQTree::initCandidateTreeSet(int nParTrees, int nNNITrees) {
             computeParsimonyTree(NULL, aln);
             curParsTree = getTreeString();
 #endif
+        } else {
+            assert(0);
         }
 
         if (candidateTrees.treeExist(curParsTree)) {
@@ -497,15 +522,19 @@ void IQTree::initCandidateTreeSet(int nParTrees, int nNNITrees) {
         	candidateTrees.update(curParsTree, -DBL_MAX, false);
         }
     }
+    
+
     double parsTime = getRealTime() - startTime;
-    cout << parsTime << " seconds ";
-    cout << candidateTrees.size() << " distinct starting trees" << endl;
+    if (nParTrees > 0) {
+        cout << parsTime << " seconds ";
+        cout << candidateTrees.size() << " distinct starting trees" << endl;
+    }
 
     /****************************************************************************************
                       Compute logl of all parsimony trees
     *****************************************************************************************/
 
-    cout << "Computing log-likelihood of parsimony trees ... ";
+    cout << "Computing log-likelihood of " << candidateTrees.size() << " initial trees ... ";
     startTime = getRealTime();
 //    CandidateSet candTrees = candidateTrees.getBestCandidateTrees(candidateTrees.size());
     CandidateSet candTrees = candidateTrees;
@@ -539,13 +568,13 @@ void IQTree::initCandidateTreeSet(int nParTrees, int nNNITrees) {
     CandidateSet initParsimonyTrees = candidateTrees.getBestCandidateTrees(nNNITrees);
     candidateTrees.clear();
 
-    cout << "Optimizing top parsimony trees with NNI..." << endl;
+    cout << "Optimizing top " << initParsimonyTrees.size() << " initial trees with NNI..." << endl;
     startTime = getCPUTime();
     /*********** START: Do NNI on the best parsimony trees ************************************/
     CandidateSet::reverse_iterator rit;
-    stop_rule.setCurIt(1);
-    for (rit = initParsimonyTrees.rbegin(); rit != initParsimonyTrees.rend(); ++rit, stop_rule.setCurIt(
-            stop_rule.getCurIt() + 1)) {
+    stop_rule.setCurIt(0);
+    for (rit = initParsimonyTrees.rbegin(); rit != initParsimonyTrees.rend(); ++rit) {
+        stop_rule.setCurIt(stop_rule.getCurIt() + 1);
     	int nniCount, nniStep;
         double initLogl, nniLogl;
         string tree;
@@ -642,8 +671,7 @@ void IQTree::initializePLL(Params &params) {
 }
 
 
-void IQTree::initializeModel(Params &params) {
-	ModelsBlock *models_block = readModelsDefinition(params);
+void IQTree::initializeModel(Params &params, ModelsBlock *models_block) {
     try {
         if (!getModelFactory()) {
             if (isSuperTree()) {
@@ -672,7 +700,6 @@ void IQTree::initializeModel(Params &params) {
         	outError("non GTR model for DNA is not yet supported by PLL.");
         pllInitModel(pllInst, pllPartitions);
     }
-    delete models_block;
 }
 double IQTree::getProbDelete() {
     return (double) k_delete / leafNum;
@@ -1719,11 +1746,11 @@ double IQTree::doTreeSearch() {
 	/*====================================================
 	 * MAIN LOOP OF THE IQ-TREE ALGORITHM
 	 *====================================================*/
-    for (; !stop_rule.meetStopCondition(stop_rule.getCurIt(), cur_correlation); stop_rule.setCurIt(
-            stop_rule.getCurIt() + 1)) {
+    while(!stop_rule.meetStopCondition(stop_rule.getCurIt(), cur_correlation)) {
+        stop_rule.setCurIt(stop_rule.getCurIt() + 1);
         searchinfo.curIter = stop_rule.getCurIt();
         // estimate logl_cutoff for bootstrap
-        if (params->avoid_duplicated_trees && max_candidate_trees > 0 && treels_logl.size() > 1000) {
+        if (/*params->avoid_duplicated_trees &&*/ max_candidate_trees > 0 && treels_logl.size() > 1000) {
         	int predicted_iteration = ((stop_rule.getCurIt()+params->step_iterations-1)/params->step_iterations)*params->step_iterations;
             int num_entries = floor(max_candidate_trees * ((double) stop_rule.getCurIt() / predicted_iteration));
             if (num_entries < treels_logl.size() * 0.9) {
@@ -1734,11 +1761,11 @@ double IQTree::doTreeSearch() {
                 logl_cutoff = 0.0;
             if (verbose_mode >= VB_MED) {
                 if (stop_rule.getCurIt() % 10 == 0) {
-                    cout << treels.size() << " trees, " << treels_logl.size() << " logls, logl_cutoff= " << logl_cutoff;
-                    if (params->store_candidate_trees)
-                        cout << " duplicates= " << duplication_counter << " ("
-                                << (int) round(100 * ((double) duplication_counter / treels_logl.size())) << "%)" << endl;
-                    else
+                    cout << treels_logl.size() << " logls, logl_cutoff= " << logl_cutoff;
+//                    if (params->store_candidate_trees)
+//                        cout << " duplicates= " << duplication_counter << " ("
+//                                << (int) round(100 * ((double) duplication_counter / treels_logl.size())) << "%)" << endl;
+//                    else
                         cout << endl;
                 }
             }
@@ -1798,6 +1825,7 @@ double IQTree::doTreeSearch() {
             }
 
             computeLogL();
+            perturbScore = curScore;
         }
 
     	/*----------------------------------------
@@ -1907,6 +1935,7 @@ double IQTree::doTreeSearch() {
 
        //if (params->partition_type)
        // 	((PhyloSuperTreePlen*)this)->printNNIcasesNUM();
+       
     }
 
     readTreeString(candidateTrees.getTopTrees()[0]);
@@ -1959,6 +1988,7 @@ double IQTree::optimizeNNI(int &nni_count, int &nni_steps) {
     int numNNIs = 0; // number of NNI to be applied in each step
     const int MAXSTEPS = aln->getNSeq(); // maximum number of NNI steps
     NodeVector nodes1, nodes2;
+    DoubleVector lenvec;
     for (nni_steps = 1; nni_steps <= MAXSTEPS; nni_steps++) {
         double oldScore = curScore;
         if (!rollBack) { // tree get improved and was not rollbacked
@@ -1973,10 +2003,11 @@ double IQTree::optimizeNNI(int &nni_count, int &nni_steps) {
             }
 
             nonConfNNIs.clear(); // Vector containing non-conflicting positive NNIs
-            optBrans.clear(); // Vector containing branch length of the positive NNIs
-            orgBrans.clear(); // Vector containing all current branch of the tree
+//            optBrans.clear(); // Vector containing branch length of the positive NNIs
+//            orgBrans.clear(); // Vector containing all current branch of the tree
             plusNNIs.clear(); // Vector containing all positive NNIs
-            saveBranches(); // save all current branch lengths
+//            saveBranches(); // save all current branch lengths
+            saveBranchLengths(lenvec);
             initPartitionInfo(); // for super tree
             int numRemoved;
             if (nodes1.size() == 0) {
@@ -2081,7 +2112,8 @@ double IQTree::optimizeNNI(int &nni_count, int &nni_steps) {
             for (int i = 0; i < numNNIs; i++)
                 doNNI(nonConfNNIs.at(i));
             // restore the branch lengths
-            restoreAllBrans();
+//            restoreAllBrans();
+            restoreBranchLengths(lenvec);
             // This is important because after restoring the branch lengths, all partial
             // likelihood need to be cleared.
 //            if (params->lh_mem_save == LM_PER_NODE) {
@@ -2205,10 +2237,10 @@ void IQTree::pllInitUFBootData(){
             if(!pllUFBootDataPtr->treels_logl) outError("Not enough dynamic memory!");
             //memset(pllUFBootDataPtr->treels_logl, 0, max_candidate_trees * (sizeof(double)));
 
-            pllUFBootDataPtr->treels_newick =
-                (char **) malloc(max_candidate_trees * (sizeof(char *)));
-            if(!pllUFBootDataPtr->treels_newick) outError("Not enough dynamic memory!");
-            memset(pllUFBootDataPtr->treels_newick, 0, max_candidate_trees * (sizeof(char *)));
+//            pllUFBootDataPtr->treels_newick =
+//                (char **) malloc(max_candidate_trees * (sizeof(char *)));
+//            if(!pllUFBootDataPtr->treels_newick) outError("Not enough dynamic memory!");
+//            memset(pllUFBootDataPtr->treels_newick, 0, max_candidate_trees * (sizeof(char *)));
 
 
             pllUFBootDataPtr->treels_ptnlh =
@@ -2244,16 +2276,16 @@ void IQTree::pllInitUFBootData(){
             if(!pllUFBootDataPtr->boot_counts) outError("Not enough dynamic memory!");
             memset(pllUFBootDataPtr->boot_counts, 0, params->gbo_replicates * (sizeof(int)));
 
-            pllUFBootDataPtr->boot_trees =
-                (int *) malloc(params->gbo_replicates * (sizeof(int)));
-            if(!pllUFBootDataPtr->boot_trees) outError("Not enough dynamic memory!");
-
+//            pllUFBootDataPtr->boot_trees =
+//                (int *) malloc(params->gbo_replicates * (sizeof(int)));
+//            if(!pllUFBootDataPtr->boot_trees) outError("Not enough dynamic memory!");
+            pllUFBootDataPtr->boot_trees.resize(params->gbo_replicates, "");
             pllUFBootDataPtr->duplication_counter = 0;
         }
     }
     pllUFBootDataPtr->max_candidate_trees = max_candidate_trees;
     pllUFBootDataPtr->save_all_trees = save_all_trees;
-    pllUFBootDataPtr->save_all_br_lens = save_all_br_lens;
+//    pllUFBootDataPtr->save_all_br_lens = save_all_br_lens;
     pllUFBootDataPtr->logl_cutoff = logl_cutoff;
     pllUFBootDataPtr->n_patterns = pllAlignment->sequenceLength;
 }
@@ -2269,10 +2301,10 @@ void IQTree::pllDestroyUFBootData(){
 
         free(pllUFBootDataPtr->treels_logl);
 
-        for(int i = 0; i < pllUFBootDataPtr->candidate_trees_count; i++)
-            if(pllUFBootDataPtr->treels_newick[i])
-                free(pllUFBootDataPtr->treels_newick[i]);
-        free(pllUFBootDataPtr->treels_newick);
+//        for(int i = 0; i < pllUFBootDataPtr->candidate_trees_count; i++)
+//            if(pllUFBootDataPtr->treels_newick[i])
+//                free(pllUFBootDataPtr->treels_newick[i]);
+//        free(pllUFBootDataPtr->treels_newick);
 
         for(int i = 0; i < pllUFBootDataPtr->treels_size; i++)
             if(pllUFBootDataPtr->treels_ptnlh[i])
@@ -2287,7 +2319,7 @@ void IQTree::pllDestroyUFBootData(){
 
         free(pllUFBootDataPtr->boot_counts);
 
-        free(pllUFBootDataPtr->boot_trees);
+//        free(pllUFBootDataPtr->boot_trees);
     }
     free(pllUFBootDataPtr);
     pllUFBootDataPtr = NULL;
@@ -2303,6 +2335,9 @@ void IQTree::doNNIs(int nni2apply, bool changeBran) {
             changeNNIBrans(nonConfNNIs.at(i));
         }
     }
+    // 2015-10-14: has to reset this pointer when read in
+    current_it = current_it_back = NULL;
+    
 //    if (params->lh_mem_save == LM_PER_NODE) {
 //        initializeAllPartialLh();
 //    }
@@ -2386,52 +2421,6 @@ void IQTree::setDelete(int _delete) {
     k_delete = _delete;
 }
 
-void IQTree::changeBranLen(PhyloNode *node1, PhyloNode *node2, double newlen) {
-    node1->findNeighbor(node2)->length = newlen;
-    node2->findNeighbor(node1)->length = newlen;
-    node1->clearReversePartialLh(node2);
-    node2->clearReversePartialLh(node1);
-}
-
-double IQTree::getBranLen(PhyloNode *node1, PhyloNode *node2) {
-    return  node1->findNeighbor(node2)->length;
-}
-
-void IQTree::saveBranches(PhyloNode *node, PhyloNode *dad) {
-    if (!node) {
-        node = (PhyloNode*) root;
-    }
-    if (dad) {
-        double len = getBranLen(node, dad);
-        string key = getBranchID(node, dad);
-        orgBrans.insert(mapString2Double::value_type(key, len));
-    }
-
-    FOR_NEIGHBOR_IT(node, dad, it){
-    saveBranches((PhyloNode*) (*it)->node, node);
-}
-}
-
-void IQTree::restoreAllBrans(PhyloNode *node, PhyloNode *dad) {
-    if (!node) {
-        node = (PhyloNode*) root;
-    }
-    if (dad) {
-        string key = getBranchID(node, dad);
-        Neighbor* bran_it = node->findNeighbor(dad);
-        assert(bran_it);
-        Neighbor* bran_it_back = dad->findNeighbor(node);
-        assert(bran_it_back);
-        assert(orgBrans.count(key));
-        bran_it->length = orgBrans[key];
-        bran_it_back->length = orgBrans[key];
-    }
-
-    FOR_NEIGHBOR_IT(node, dad, it){
-    restoreAllBrans((PhyloNode*) (*it)->node, node);
-}
-}
-
 void IQTree::evalNNIs(PhyloNode *node, PhyloNode *dad) {
     if (!node) {
         node = (PhyloNode*) root;
@@ -2564,48 +2553,47 @@ void IQTree::estimateNNICutoff(Params* params) {
 }
 
 void IQTree::saveCurrentTree(double cur_logl) {
-    ostringstream ostr;
-    string tree_str;
-    StringIntMap::iterator it = treels.end();
-    if (params->store_candidate_trees) {
-        printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA);
-        tree_str = ostr.str();
-        it = treels.find(tree_str);
-    }
-    int tree_index = -1;
-    if (it != treels.end()) { // already in treels
-        duplication_counter++;
-        tree_index = it->second;
-        if (cur_logl <= treels_logl[it->second] + 1e-4) {
-            if (cur_logl < treels_logl[it->second] - 5.0)
-                if (verbose_mode >= VB_MED)
-                    cout << "Current lh " << cur_logl << " is much worse than expected " << treels_logl[it->second]
-                            << endl;
-            return;
-        }
-        if (verbose_mode >= VB_MAX)
-            cout << "Updated logl " << treels_logl[it->second] << " to " << cur_logl << endl;
-        treels_logl[it->second] = cur_logl;
-        if (save_all_br_lens) {
-            ostr.seekp(ios::beg);
-            printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA | WT_BR_LEN | WT_BR_SCALE | WT_BR_LEN_ROUNDING);
-            treels_newick[it->second] = ostr.str();
-        }
-        if (boot_samples.empty()) {
-            computePatternLikelihood(treels_ptnlh[it->second], &cur_logl);
-            return;
-        }
-        if (verbose_mode >= VB_MAX)
-            cout << "Update treels_logl[" << tree_index << "] := " << cur_logl << endl;
-    } else {
+//    StringIntMap::iterator it = treels.end();
+//    if (params->store_candidate_trees) {
+//        printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA);
+//        tree_str = ostr.str();
+//        it = treels.find(tree_str);
+//    }
+//    int tree_index = -1;
+//    if (it != treels.end()) { // already in treels
+//        duplication_counter++;
+//        tree_index = it->second;
+//        if (cur_logl <= treels_logl[it->second] + 1e-4) {
+//            if (cur_logl < treels_logl[it->second] - 5.0)
+//                if (verbose_mode >= VB_MED)
+//                    cout << "Current lh " << cur_logl << " is much worse than expected " << treels_logl[it->second]
+//                            << endl;
+//            return;
+//        }
+//        if (verbose_mode >= VB_MAX)
+//            cout << "Updated logl " << treels_logl[it->second] << " to " << cur_logl << endl;
+//        treels_logl[it->second] = cur_logl;
+////        if (save_all_br_lens) {
+////            ostr.seekp(ios::beg);
+////            printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA | WT_BR_LEN | WT_BR_SCALE | WT_BR_LEN_ROUNDING);
+////            treels_newick[it->second] = ostr.str();
+////        }
+//        if (boot_samples.empty()) {
+////            computePatternLikelihood(treels_ptnlh[it->second], &cur_logl);
+//            return;
+//        }
+//        if (verbose_mode >= VB_MAX)
+//            cout << "Update treels_logl[" << tree_index << "] := " << cur_logl << endl;
+//    } else
+    {
         if (logl_cutoff != 0.0 && cur_logl <= logl_cutoff + 1e-4)
             return;
-        tree_index = treels_logl.size();
-        if (params->store_candidate_trees)
-            treels[tree_str] = tree_index;
+//        tree_index = treels_logl.size();
+//        if (params->store_candidate_trees)
+//            treels[tree_str] = tree_index;
         treels_logl.push_back(cur_logl);
-        if (verbose_mode >= VB_MAX)
-            cout << "Add    treels_logl[" << tree_index << "] := " << cur_logl << endl;
+//        if (verbose_mode >= VB_MAX)
+//            cout << "Add    treels_logl[" << tree_index << "] := " << cur_logl << endl;
     }
 
     if (write_intermediate_trees)
@@ -2631,16 +2619,26 @@ void IQTree::saveCurrentTree(double cur_logl) {
 
     if (boot_samples.empty()) {
         // for runGuidedBootstrap
-#ifdef BOOT_VAL_FLOAT
-        treels_ptnlh.push_back(pattern_lh_orig);
-#else
-        treels_ptnlh.push_back(pattern_lh);
-#endif
+//#ifdef BOOT_VAL_FLOAT
+//        treels_ptnlh.push_back(pattern_lh_orig);
+//#else
+//        treels_ptnlh.push_back(pattern_lh);
+//#endif
     } else {
         // online bootstrap
-        int ptn;
+//        int ptn;
 //        int updated = 0;
         int nsamples = boot_samples.size();
+        ostringstream ostr;
+        string tree_str, tree_str_brlen;
+        printTree(ostr, WT_TAXON_ID);
+        tree_str = ostr.str();
+        if (params->print_ufboot_trees == 2) {
+            ostringstream ostr_brlen;
+			printTree(ostr_brlen, WT_BR_LEN);
+			tree_str_brlen = ostr_brlen.str();
+        }
+        double rand_double = random_double();
 
         #ifdef _OPENMP
         #pragma omp parallel for
@@ -2648,13 +2646,14 @@ void IQTree::saveCurrentTree(double cur_logl) {
         for (int sample = 0; sample < nsamples; sample++) {
             double rell = 0.0;
 
-            if (false) {
-            	BootValType *boot_sample = boot_samples[sample];
-            	BootValType rellll = 0.0;
-				for (ptn = 0; ptn < nptn; ptn++)
-					rellll += pattern_lh[ptn] * boot_sample[ptn];
-				rell = (double)rellll;
-            } else {
+//            if (false) {
+//            	BootValType *boot_sample = boot_samples[sample];
+//            	BootValType rellll = 0.0;
+//				for (ptn = 0; ptn < nptn; ptn++)
+//					rellll += pattern_lh[ptn] * boot_sample[ptn];
+//				rell = (double)rellll;
+//            } else 
+            {
             	// SSE optimized version of the above loop
 				BootValType *boot_sample = boot_samples[sample];
 
@@ -2665,34 +2664,37 @@ void IQTree::saveCurrentTree(double cur_logl) {
 
             bool better = rell > boot_logl[sample] + params->ufboot_epsilon;
             if (!better && rell > boot_logl[sample] - params->ufboot_epsilon) {
-                #ifdef _OPENMP
-                #pragma omp critical
-                #endif
-                better = random_double() <= 1.0 / (boot_counts[sample] + 1);
+//                #ifdef _OPENMP
+//                #pragma omp critical
+//                #endif
+                better = (rand_double <= 1.0 / (boot_counts[sample] + 1));
             }
             if (better) {
-                if (tree_str == "") 
-                #ifdef _OPENMP
-                #pragma omp critical
-                #endif
-                {
-                    printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA);
-                    tree_str = ostr.str();
-                    it = treels.find(tree_str);
-                    if (it != treels.end()) {
-                        tree_index = it->second;
-                    } else {
-                        tree_index = treels.size();
-                        treels[tree_str] = tree_index;
-                    }
-                }
+//                if (tree_str == "") 
+//                #ifdef _OPENMP
+//                #pragma omp critical
+//                #endif
+//                {
+//                    printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA);
+//                    tree_str = ostr.str();
+//                    it = treels.find(tree_str);
+//                    if (it != treels.end()) {
+//                        tree_index = it->second;
+//                    } else {
+//                        tree_index = treels.size();
+//                        treels[tree_str] = tree_index;
+//                    }
+//                }
                 if (rell <= boot_logl[sample] + params->ufboot_epsilon) {
                     boot_counts[sample]++;
                 } else {
                     boot_counts[sample] = 1;
                 }
                 boot_logl[sample] = max(boot_logl[sample], rell);
-                boot_trees[sample] = tree_index;
+                boot_trees[sample] = tree_str;
+                if (params->print_ufboot_trees == 2) {
+                	boot_trees_brlen[sample] = tree_str_brlen;
+                }
 //                updated++;
             } /*else if (verbose_mode >= VB_MED && rell > boot_logl[sample] - 0.01) {
              cout << "Info: multiple RELL score trees detected" << endl;
@@ -2707,11 +2709,11 @@ void IQTree::saveCurrentTree(double cur_logl) {
          summarizeBootstrap(*boot_splits);
          }*/
     }
-    if (save_all_br_lens) {
-        ostr.seekp(ios::beg);
-        printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA | WT_BR_LEN | WT_BR_SCALE | WT_BR_LEN_ROUNDING);
-        treels_newick.push_back(ostr.str());
-    }
+//    if (save_all_br_lens) {
+//        ostr.seekp(ios::beg);
+//        printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA | WT_BR_LEN | WT_BR_SCALE | WT_BR_LEN_ROUNDING);
+//        treels_newick.push_back(ostr.str());
+//    }
     if (print_tree_lh) {
         out_treelh << cur_logl;
         double prob;
@@ -2880,55 +2882,70 @@ void IQTree::summarizeBootstrap(Params &params, MTreeSet &trees) {
 
 void IQTree::writeUFBootTrees(Params &params) {
     MTreeSet trees;
-    IntVector tree_weights;
+//    IntVector tree_weights;
     int sample, i, j;
-    tree_weights.resize(treels_logl.size(), 0);
-    for (sample = 0; sample < boot_trees.size(); sample++)
-        tree_weights[boot_trees[sample]]++;
-    trees.init(treels, rooted, tree_weights);
 	string filename = params.out_prefix;
 	filename += ".ufboot";
 	ofstream out(filename.c_str());
-	for (i = 0; i < trees.size(); i++) {
-		NodeVector taxa;
-		// change the taxa name from ID to real name
-		trees[i]->getOrderedTaxa(taxa);
-		for (j = 0; j < taxa.size(); j++)
-			taxa[j]->name = aln->getSeqName(taxa[j]->id);
-		if (removed_seqs.size() > 0) {
-			// reinsert removed seqs into each tree
-			trees[i]->insertTaxa(removed_seqs, twin_seqs);
+
+	if (params.print_ufboot_trees == 1) {
+		// print trees without branch lengths
+//		tree_weights.resize(treels_logl.size(), 0);
+//		for (sample = 0; sample < boot_trees.size(); sample++)
+//			tree_weights[boot_trees[sample]]++;
+//		trees.init(treels, rooted, tree_weights);
+        trees.init(boot_trees, rooted);
+		for (i = 0; i < trees.size(); i++) {
+			NodeVector taxa;
+			// change the taxa name from ID to real name
+			trees[i]->getOrderedTaxa(taxa);
+			for (j = 0; j < taxa.size(); j++)
+				taxa[j]->name = aln->getSeqName(taxa[j]->id);
+			if (removed_seqs.size() > 0) {
+				// reinsert removed seqs into each tree
+				trees[i]->insertTaxa(removed_seqs, twin_seqs);
+			}
+			// now print to file
+			for (j = 0; j < trees.tree_weights[i]; j++)
+				if (params.print_ufboot_trees == 1)
+					trees[i]->printTree(out, WT_NEWLINE);
+				else
+					trees[i]->printTree(out, WT_NEWLINE + WT_BR_LEN);
 		}
-		// now print to file
-		for (j = 0; j < trees.tree_weights[i]; j++)
-			trees[i]->printTree(out, WT_NEWLINE);
+		cout << "UFBoot trees printed to " << filename << endl;
+	} else {
+		// with branch lengths
+		for (sample = 0; sample < boot_trees_brlen.size(); sample++)
+			out << boot_trees_brlen[sample] << endl;
+		cout << "UFBoot trees with branch lengths printed to " << filename << endl;
 	}
 	out.close();
-	cout << "UFBoot trees printed to " << filename << endl;
 }
 
 void IQTree::summarizeBootstrap(Params &params) {
 	setRootNode(params.root);
-	if (verbose_mode >= VB_MED)
-		cout << "Summarizing from " << treels.size() << " candidate trees..." << endl;
+//	if (verbose_mode >= VB_MED)
+//		cout << "Summarizing from " << treels.size() << " candidate trees..." << endl;
     MTreeSet trees;
-    IntVector tree_weights;
-    int sample;
-    tree_weights.resize(treels_logl.size(), 0);
-    for (sample = 0; sample < boot_trees.size(); sample++)
-        tree_weights[boot_trees[sample]]++;
-    trees.init(treels, rooted, tree_weights);
+//    IntVector tree_weights;
+//    int sample;
+//    tree_weights.resize(treels_logl.size(), 0);
+//    for (sample = 0; sample < boot_trees.size(); sample++)
+//        tree_weights[boot_trees[sample]]++;
+//    trees.init(treels, rooted, tree_weights);
+    trees.init(boot_trees, rooted);
     summarizeBootstrap(params, trees);
 }
 
 void IQTree::summarizeBootstrap(SplitGraph &sg) {
     MTreeSet trees;
-    IntVector tree_weights;
-    tree_weights.resize(treels_logl.size(), 0);
-    for (int sample = 0; sample < boot_trees.size(); sample++)
-        tree_weights[boot_trees[sample]]++;
-    trees.init(treels, rooted, tree_weights);
+//    IntVector tree_weights;
+//    tree_weights.resize(treels_logl.size(), 0);
+//    for (int sample = 0; sample < boot_trees.size(); sample++)
+//        tree_weights[boot_trees[sample]]++;
+//    trees.init(treels, rooted, tree_weights);
     //SplitGraph sg;
+    trees.init(boot_trees, rooted);
     SplitIntMap hash_ss;
     // make the taxa name
     vector<string> taxname;
@@ -2957,19 +2974,19 @@ void IQTree::pllConvertUFBootData2IQTree(){
         boot_trees.push_back(pllUFBootDataPtr->boot_trees[i]);
 
     //treels
-    treels.clear();
-    if(pllUFBootDataPtr->candidate_trees_count > 0){
-        struct pllHashItem * hItem;
-        struct pllHashTable * hTable = pllUFBootDataPtr->treels;
-        for (int i = 0; i < hTable->size; ++ i){
-            hItem = hTable->Items[i];
-            while (hItem){
-                string k(hItem->str);
-                treels[k] = *((int *)hItem->data);
-                hItem = hItem->next;
-            }
-        }
-    }
+//    treels.clear();
+//    if(pllUFBootDataPtr->candidate_trees_count > 0){
+//        struct pllHashItem * hItem;
+//        struct pllHashTable * hTable = pllUFBootDataPtr->treels;
+//        for (int i = 0; i < hTable->size; ++ i){
+//            hItem = hTable->Items[i];
+//            while (hItem){
+//                string k(hItem->str);
+//                treels[k] = *((int *)hItem->data);
+//                hItem = hItem->next;
+//            }
+//        }
+//    }
 }
 
 double computeCorrelation(IntVector &ix, IntVector &iy) {
@@ -3130,46 +3147,47 @@ void IQTree::printIntermediateTree(int brtype) {
     bool duplicated_tree = false;
     double *pattern_lh = NULL;
     double logl = curScore;
-    if (params->avoid_duplicated_trees) {
-        // estimate logl_cutoff
-        stringstream ostr;
-        printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA);
-        string tree_str = ostr.str();
-        StringIntMap::iterator it = treels.find(tree_str);
-        if (it != treels.end()) { // already in treels
-            duplicated_tree = true;
-            if (curScore > treels_logl[it->second] + 1e-4) {
-                if (verbose_mode >= VB_MAX)
-                    cout << "Updated logl " << treels_logl[it->second] << " to " << curScore << endl;
-                treels_logl[it->second] = curScore;
-                computeLikelihood(treels_ptnlh[it->second]);
-                if (save_all_br_lens) {
-                    ostr.seekp(ios::beg);
-                    printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA | WT_BR_LEN | WT_BR_SCALE | WT_BR_LEN_ROUNDING);
-                    treels_newick[it->second] = ostr.str();
-                }
-            }
-            //pattern_lh = treels_ptnlh[treels[tree_str]];
-        } else {
-            //cout << __func__ << ": new tree" << endl;
-            if (logl_cutoff != 0.0 && curScore <= logl_cutoff + 1e-4)
-                duplicated_tree = true;
-            else {
-                treels[tree_str] = treels_ptnlh.size();
-                pattern_lh = new double[getAlnNPattern()];
-//                computePatternLikelihood(pattern_lh, &logl);
-                computePatternLikelihood(pattern_lh);
-                treels_ptnlh.push_back(pattern_lh);
-                treels_logl.push_back(logl);
-                if (save_all_br_lens) {
-                    ostr.seekp(ios::beg);
-                    printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA | WT_BR_LEN | WT_BR_SCALE | WT_BR_LEN_ROUNDING);
-                    treels_newick.push_back(ostr.str());
-                }
-            }
-        }
-        //cout << tree_str << endl;
-    } else {
+//    if (params->avoid_duplicated_trees) {
+//        // estimate logl_cutoff
+//        stringstream ostr;
+//        printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA);
+//        string tree_str = ostr.str();
+//        StringIntMap::iterator it = treels.find(tree_str);
+//        if (it != treels.end()) { // already in treels
+//            duplicated_tree = true;
+//            if (curScore > treels_logl[it->second] + 1e-4) {
+//                if (verbose_mode >= VB_MAX)
+//                    cout << "Updated logl " << treels_logl[it->second] << " to " << curScore << endl;
+//                treels_logl[it->second] = curScore;
+////                computeLikelihood(treels_ptnlh[it->second]);
+////                if (save_all_br_lens) {
+////                    ostr.seekp(ios::beg);
+////                    printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA | WT_BR_LEN | WT_BR_SCALE | WT_BR_LEN_ROUNDING);
+////                    treels_newick[it->second] = ostr.str();
+////                }
+//            }
+//            //pattern_lh = treels_ptnlh[treels[tree_str]];
+//        } else {
+//            //cout << __func__ << ": new tree" << endl;
+//            if (logl_cutoff != 0.0 && curScore <= logl_cutoff + 1e-4)
+//                duplicated_tree = true;
+//            else {
+////                treels[tree_str] = treels_ptnlh.size();
+//                pattern_lh = new double[getAlnNPattern()];
+////                computePatternLikelihood(pattern_lh, &logl);
+//                computePatternLikelihood(pattern_lh);
+////                treels_ptnlh.push_back(pattern_lh);
+//                treels_logl.push_back(logl);
+////                if (save_all_br_lens) {
+////                    ostr.seekp(ios::beg);
+////                    printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA | WT_BR_LEN | WT_BR_SCALE | WT_BR_LEN_ROUNDING);
+////                    treels_newick.push_back(ostr.str());
+////                }
+//            }
+//        }
+//        //cout << tree_str << endl;
+//    } else
+    {
         if (params->print_tree_lh) {
             pattern_lh = new double[getAlnNPattern()];
             computePatternLikelihood(pattern_lh, &logl);
@@ -3191,7 +3209,7 @@ void IQTree::printIntermediateTree(int brtype) {
             for (int i = 0; i < aln->getNSite(); i++)
                 out_sitelh << "\t" << pattern_lh[aln->getPatternID(i)];
             out_sitelh << endl;
-            if (!params->avoid_duplicated_trees)
+//            if (!params->avoid_duplicated_trees)
                 delete[] pattern_lh;
         }
     }
diff --git a/iqtree.h b/iqtree.h
index 94e8fe9..fd3ed13 100644
--- a/iqtree.h
+++ b/iqtree.h
@@ -87,7 +87,7 @@ public:
 
     IQTree(Alignment *aln);
 
-    EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+//    EIGEN_MAKE_ALIGNED_OPERATOR_NEW
 
     /**
             destructor
@@ -105,7 +105,7 @@ public:
 
     void initializePLL(Params &params);
 
-    void initializeModel(Params &params);
+    void initializeModel(Params &params, ModelsBlock *models_block);
 
     /**
             print tree to .treefile
@@ -362,12 +362,12 @@ public:
     /**
      * 	Save all the current branch lengths
      */
-    void saveBranches(PhyloNode *node = NULL, PhyloNode *dad = NULL);
+//    void saveBranches(PhyloNode *node = NULL, PhyloNode *dad = NULL);
 
     /**
      * 	 Restore the branch lengths from the saved values
      */
-    virtual void restoreAllBrans(PhyloNode *node = NULL, PhyloNode *dad = NULL);
+//    virtual void restoreAllBrans(PhyloNode *node = NULL, PhyloNode *dad = NULL);
 
     /**
      * Get the branch length of the branch node1-node2
@@ -375,7 +375,7 @@ public:
      * @param node2
      * @return the branch length
      */
-    double getBranLen(PhyloNode *node1, PhyloNode *node2);
+//    double getBranLen(PhyloNode *node1, PhyloNode *node2);
 
 
     /**
@@ -384,7 +384,7 @@ public:
             @param node1 the first node of the branch
             @param node2 the second node of the branch
      */
-    void changeBranLen(PhyloNode *node1, PhyloNode *node2, double branLen);
+//    void changeBranLen(PhyloNode *node1, PhyloNode *node2, double branLen);
 
     /**
      * Estimate the 95% quantile of the distribution of N (see paper for more d
@@ -557,7 +557,7 @@ protected:
     /**
         Optimal branch lengths
      */
-    mapString2Double optBrans;
+//    mapString2Double optBrans;
 
     /**
      *  @brief get branches, on which NNIs are evaluated for the next NNI step.
@@ -575,7 +575,7 @@ protected:
     /**
             Original branch lengths
      */
-    mapString2Double orgBrans;
+//    mapString2Double orgBrans;
 
     int k_delete, k_delete_min, k_delete_max, k_delete_stay;
 
@@ -620,22 +620,22 @@ public:
     /****** following variables are for ultra-fast bootstrap *******/
 
     /** TRUE to save also branch lengths into treels_newick */
-    bool save_all_br_lens;
+//    bool save_all_br_lens;
 
     /**
         this keeps the list of intermediate trees.
         it will be activated if params.avoid_duplicated_trees is TRUE.
      */
-    StringIntMap treels;
+//    StringIntMap treels;
 
     /** pattern log-likelihood vector for each treels */
-    vector<double* > treels_ptnlh;
+//    vector<double* > treels_ptnlh;
 
     /** tree log-likelihood for each treels */
     DoubleVector treels_logl;
 
     /** NEWICK string for each treels */
-    StrVector treels_newick;
+//    StrVector treels_newick;
 
     /** maximum number of distinct candidate trees (tau parameter) */
     int max_candidate_trees;
@@ -647,7 +647,10 @@ public:
     vector<BootValType* > boot_samples;
 
     /** newick string of corresponding bootstrap trees */
-    IntVector boot_trees;
+    StrVector boot_trees;
+
+    /** bootstrap tree strings with branch lengths, for -wbtl option */
+    StrVector boot_trees_brlen;
 
 	/** number of multiple optimal trees per replicate */
 	IntVector boot_counts;
diff --git a/iqtree_config.h.in b/iqtree_config.h.in
index f19b326..9d3e17c 100644
--- a/iqtree_config.h.in
+++ b/iqtree_config.h.in
@@ -1,6 +1,6 @@
 #define iqtree_VERSION_MAJOR @iqtree_VERSION_MAJOR@
 #define iqtree_VERSION_MINOR @iqtree_VERSION_MINOR@
-#define iqtree_VERSION_PATCH @iqtree_VERSION_PATCH@
+#define iqtree_VERSION_PATCH "@iqtree_VERSION_PATCH@"
 
 /* does the platform provide gettimeofday functions? */
 #cmakedefine HAVE_GETTIMEOFDAY
@@ -11,4 +11,4 @@
 /* does the platform provide pclose functions? */
 /*#cmakedefine HAVE_PCLOSE*/
 /* does the platform provide GlobalMemoryStatusEx functions? */
-#cmakedefine HAVE_GLOBALMEMORYSTATUSEX
\ No newline at end of file
+#cmakedefine HAVE_GLOBALMEMORYSTATUSEX
diff --git a/mexttree.cpp b/mexttree.cpp
index e4df396..d9ee2f7 100644
--- a/mexttree.cpp
+++ b/mexttree.cpp
@@ -496,3 +496,25 @@ void MExtTree::createCluster(int clu_num, Node *node, Node *dad) {
 	}
 }
 
+
+void MExtTree::collapseLowBranchSupport(DoubleVector &minsup, Node *node, Node *dad) {
+    if (!node) node = root;
+    FOR_NEIGHBOR_IT(node, dad, it) {
+        collapseLowBranchSupport(minsup, (*it)->node, node);
+    }
+    if (!node->isLeaf() && dad && node->name != "") {
+        DoubleVector vec;
+        convert_double_vec(node->name.c_str(), vec, '/');
+        if (vec.size() != minsup.size()) {
+            cout << "Branch with name " << node->name << " ignored" << endl;
+            return;
+        }
+        for (int i = 0; i < vec.size(); i++)
+            if (vec[i] < minsup[i]) {
+                // support smaller than threshold, mark this branch for deletion
+                dad->findNeighbor(node)->length = 0.0;
+                node->findNeighbor(dad)->length = 0.0;
+                break;
+            }
+    }
+}
diff --git a/mexttree.h b/mexttree.h
index c45caed..b7d501c 100644
--- a/mexttree.h
+++ b/mexttree.h
@@ -155,6 +155,15 @@ public:
 	*/
 	void createCluster(int clu_num, Node *node, Node *dad);
 
+/********************************************************
+	Miscellaneous
+********************************************************/
+
+    /**
+        collapse all branches with support lower than minsup (in back-slash separated format)
+        implemented for Ricardo
+    */
+    void collapseLowBranchSupport(DoubleVector &minsup, Node *node = NULL, Node *dad = NULL);
 
 };
 
diff --git a/model/modelcodon.cpp b/model/modelcodon.cpp
index 0da735b..27b034b 100644
--- a/model/modelcodon.cpp
+++ b/model/modelcodon.cpp
@@ -774,40 +774,49 @@ double ModelCodon::computeEmpiricalOmega() {
     
 
 
-void ModelCodon::getVariables(double *variables) {
-	int i, j;
+bool ModelCodon::getVariables(double *variables) {
+	int j;
+    bool changed = false;
     if (num_params > 0) {
         j = 1;
-        if (!fix_omega)
+        if (!fix_omega) {
+            changed |= (omega != variables[j]);
             omega = variables[j++];
-        if (!fix_kappa)
+        }
+        if (!fix_kappa) {
+            changed |= (kappa != variables[j]);
             kappa = variables[j++];
-        if (!fix_kappa2)
+        }
+        if (!fix_kappa2) {
+            changed |= (kappa2 != variables[j]);
             kappa2 = variables[j++];
+        }
         assert(j == num_params+1);
     }
 	if (freq_type == FREQ_ESTIMATE) {
-//		int ndim = getNDim();
-//		memcpy(state_freq, variables+(ndim-num_states+2), (num_states-1)*sizeof(double));
+        // 2015-09-07: relax the sum of state_freq to be 1, this will be done at the end of optimization
+		int ndim = getNDim();
+		changed |= memcmpcpy(state_freq, variables+(ndim-num_states+2), (num_states-1)*sizeof(double));
 //		double sum = 0;
 //		for (i = 0; i < num_states-1; i++)
 //			sum += state_freq[i];
 //		state_freq[num_states-1] = 1.0 - sum;
 
         // BUG FIX 2015.08.28
-        int nrate = getNDim();
-        if (freq_type == FREQ_ESTIMATE) nrate -= (num_states-1);
-		double sum = 1.0;
-//		int i, j;
-		for (i = 1; i < num_states; i++)
-			sum += variables[nrate+i];
-		for (i = 0, j = 1; i < num_states; i++)
-			if (i != highest_freq_state) {
-				state_freq[i] = variables[nrate+j] / sum;
-				j++;
-			}
-		state_freq[highest_freq_state] = 1.0/sum;
+//        int nrate = getNDim();
+//        if (freq_type == FREQ_ESTIMATE) nrate -= (num_states-1);
+//		double sum = 1.0;
+////		int i, j;
+//		for (i = 1; i < num_states; i++)
+//			sum += variables[nrate+i];
+//		for (i = 0, j = 1; i < num_states; i++)
+//			if (i != highest_freq_state) {
+//				state_freq[i] = variables[nrate+j] / sum;
+//				j++;
+//			}
+//		state_freq[highest_freq_state] = 1.0/sum;
 	}
+    return changed;
 }
 
 void ModelCodon::setVariables(double *variables) {
@@ -824,18 +833,19 @@ void ModelCodon::setVariables(double *variables) {
 		assert(j == num_params+1);
 	}
 	if (freq_type == FREQ_ESTIMATE) {
-//		int ndim = getNDim();
-//		memcpy(variables+(ndim-num_states+2), state_freq, (num_states-1)*sizeof(double));
+        // 2015-09-07: relax the sum of state_freq to be 1, this will be done at the end of optimization
+		int ndim = getNDim();
+		memcpy(variables+(ndim-num_states+2), state_freq, (num_states-1)*sizeof(double));
 
         // BUG FIX 2015.08.28
-        int nrate = getNDim();
-        if (freq_type == FREQ_ESTIMATE) nrate -= (num_states-1);
-		int i, j;
-		for (i = 0, j = 1; i < num_states; i++)
-			if (i != highest_freq_state) {
-				variables[nrate+j] = state_freq[i] / state_freq[highest_freq_state];
-				j++;
-			}
+//        int nrate = getNDim();
+//        if (freq_type == FREQ_ESTIMATE) nrate -= (num_states-1);
+//		int i, j;
+//		for (i = 0, j = 1; i < num_states; i++)
+//			if (i != highest_freq_state) {
+//				variables[nrate+j] = state_freq[i] / state_freq[highest_freq_state];
+//				j++;
+//			}
 	}
 }
 
diff --git a/model/modelcodon.h b/model/modelcodon.h
index c6f064e..3d617fd 100644
--- a/model/modelcodon.h
+++ b/model/modelcodon.h
@@ -171,8 +171,9 @@ protected:
 		this function is served for the multi-dimension optimization. It should assign the model parameters
 		from a vector of variables that is index from 1 (NOTE: not from 0)
 		@param variables vector of variables, indexed from 1
+		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
 	*/
-	virtual void getVariables(double *variables);
+	virtual bool getVariables(double *variables);
 
 };
 
diff --git a/model/modeldna.cpp b/model/modeldna.cpp
index 7053119..8fd890c 100644
--- a/model/modeldna.cpp
+++ b/model/modeldna.cpp
@@ -243,6 +243,7 @@ string ModelDNA::getNameParams() {
 		}
 	}
 	retname << '}';
+    getNameParamsFreq(retname);
 	return retname.str();
 }
 
@@ -348,8 +349,9 @@ void ModelDNA::writeParameters(ostream &out) {
 }
 
 
-void ModelDNA::getVariables(double *variables) {
+bool ModelDNA::getVariables(double *variables) {
 	int i;
+    bool changed = false;
 	if (num_params > 0) {
 		int num_all = param_spec.length();
 		if (verbose_mode >= VB_MAX) {
@@ -358,31 +360,34 @@ void ModelDNA::getVariables(double *variables) {
 		}
 		for (i = 0; i < num_all; i++)
 			if (!param_fixed[param_spec[i]]) {
+                changed |= (rates[i] != variables[(int)param_spec[i]]);
 				rates[i] = variables[(int)param_spec[i]];
 			}
 	}
 	if (freq_type == FREQ_ESTIMATE) {
-//		int ndim = getNDim();
-//		memcpy(state_freq, variables+(ndim-num_states+2), (num_states-1)*sizeof(double));
+        // 2015-09-07: relax the sum of state_freq to be 1, this will be done at the end of optimization
+		int ndim = getNDim();
+		changed |= memcmpcpy(state_freq, variables+(ndim-num_states+2), (num_states-1)*sizeof(double));
 //		double sum = 0;
 //		for (i = 0; i < num_states-1; i++) 
 //			sum += state_freq[i];
 //		state_freq[num_states-1] = 1.0 - sum;
 
         // BUG FIX 2015.08.28
-        int nrate = getNDim();
-        if (freq_type == FREQ_ESTIMATE) nrate -= (num_states-1);
-		double sum = 1.0;
-		int i, j;
-		for (i = 1; i < num_states; i++)
-			sum += variables[nrate+i];
-		for (i = 0, j = 1; i < num_states; i++)
-			if (i != highest_freq_state) {
-				state_freq[i] = variables[nrate+j] / sum;
-				j++;
-			}
-		state_freq[highest_freq_state] = 1.0/sum;
+//        int nrate = getNDim();
+//        if (freq_type == FREQ_ESTIMATE) nrate -= (num_states-1);
+//		double sum = 1.0;
+//		int i, j;
+//		for (i = 1; i < num_states; i++)
+//			sum += variables[nrate+i];
+//		for (i = 0, j = 1; i < num_states; i++)
+//			if (i != highest_freq_state) {
+//				state_freq[i] = variables[nrate+j] / sum;
+//				j++;
+//			}
+//		state_freq[highest_freq_state] = 1.0/sum;
 	}
+    return changed;
 }
 
 void ModelDNA::setVariables(double *variables) {
@@ -393,17 +398,18 @@ void ModelDNA::setVariables(double *variables) {
 				variables[(int)param_spec[i]] = rates[i];
 	}
 	if (freq_type == FREQ_ESTIMATE) {
-//		int ndim = getNDim();
-//		memcpy(variables+(ndim-num_states+2), state_freq, (num_states-1)*sizeof(double));
+        // 2015-09-07: relax the sum of state_freq to be 1, this will be done at the end of optimization
+		int ndim = getNDim();
+		memcpy(variables+(ndim-num_states+2), state_freq, (num_states-1)*sizeof(double));
 
         // BUG FIX 2015.08.28
-        int nrate = getNDim();
-        if (freq_type == FREQ_ESTIMATE) nrate -= (num_states-1);
-		int i, j;
-		for (i = 0, j = 1; i < num_states; i++)
-			if (i != highest_freq_state) {
-				variables[nrate+j] = state_freq[i] / state_freq[highest_freq_state];
-				j++;
-			}
+//        int nrate = getNDim();
+//        if (freq_type == FREQ_ESTIMATE) nrate -= (num_states-1);
+//		int i, j;
+//		for (i = 0, j = 1; i < num_states; i++)
+//			if (i != highest_freq_state) {
+//				variables[nrate+j] = state_freq[i] / state_freq[highest_freq_state];
+//				j++;
+//			}
 	}
 }
diff --git a/model/modeldna.h b/model/modeldna.h
index 3cf3664..9f91d29 100644
--- a/model/modeldna.h
+++ b/model/modeldna.h
@@ -105,8 +105,9 @@ protected:
 		this function is served for the multi-dimension optimization. It should assign the model parameters 
 		from a vector of variables that is index from 1 (NOTE: not from 0)
 		@param variables vector of variables, indexed from 1
+		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
 	*/
-	virtual void getVariables(double *variables);
+	virtual bool getVariables(double *variables);
 
 	/**
 		rate parameter specification, a string of 6 characters
diff --git a/model/modelfactory.cpp b/model/modelfactory.cpp
index 1220d26..a14cf00 100644
--- a/model/modelfactory.cpp
+++ b/model/modelfactory.cpp
@@ -39,15 +39,17 @@
 #include <string>
 #include "timeutil.h"
 #include "myreader.h"
+#include <sstream>
 
 ModelsBlock *readModelsDefinition(Params &params) {
 
 	ModelsBlock *models_block = new ModelsBlock;
 
-	if (true)
+	try
 	{
 		// loading internal model definitions
-		istringstream in(builtin_mixmodels_definition);
+		stringstream in(builtin_mixmodels_definition);
+        assert(in && "stringstream is OK");
 		NxsReader nexus;
 		nexus.Add(models_block);
 	    MyToken token(in);
@@ -56,7 +58,9 @@ ModelsBlock *readModelsDefinition(Params &params) {
 //	    for (ModelsBlock::iterator it = models_block->begin(); it != models_block->end(); it++)
 //	    	if ((*it).flag & NM_FREQ) num_freq++; else num_model++;
 //	    cout << num_model << " models and " << num_freq << " frequency vectors loaded" << endl;
-	}
+	} catch (...) {
+        assert(0 && "predefined mixture models initialized");
+    }
 
 	if (params.model_def_file) {
 		cout << "Reading model definition file " << params.model_def_file << " ... ";
@@ -553,13 +557,7 @@ ModelFactory::ModelFactory(Params &params, PhyloTree *tree, ModelsBlock *models_
 }
 
 int ModelFactory::getNParameters() {
-	int df = model->getNDim() + site_rate->getNDim() + site_rate->phylo_tree->branchNum;
-	if (model->freq_type == FREQ_EMPIRICAL) 
-        df += model->num_states-1;
-	else if (model->freq_type == FREQ_CODON_1x4) 
-        df += 3;
-	else if (model->freq_type == FREQ_CODON_3x4 || model->freq_type == FREQ_CODON_3x4C) 
-        df += 9;
+	int df = model->getNDim() + model->getNDimFreq() + site_rate->getNDim() + site_rate->phylo_tree->branchNum;
 	return df;
 }
 void ModelFactory::readSiteFreq(Alignment *aln, char* site_freq_file, IntVector &site_model, vector<double*> &freq_vec)
@@ -771,6 +769,10 @@ double ModelFactory::optimizeParameters(bool fixed_len, bool write_info,
 	assert(tree);
 
 	stopStoringTransMatrix();
+        // modified by Thomas Wong on Sept 11, 15
+        // no optimization of branch length in the first round
+        cur_lh = tree->computeLikelihood();
+        /*
 	if (fixed_len || tree->params->num_param_iterations == 0)
 		cur_lh = tree->computeLikelihood();
 	else {
@@ -779,6 +781,7 @@ double ModelFactory::optimizeParameters(bool fixed_len, bool write_info,
         else
             cur_lh = tree->computeLikelihood();
 	}
+        */
     tree->setCurScore(cur_lh);
 	if (verbose_mode >= VB_MED || write_info) 
 		cout << "1. Initial log-likelihood: " << cur_lh << endl;
@@ -816,6 +819,10 @@ double ModelFactory::optimizeParameters(bool fixed_len, bool write_info,
             Params::getInstance().fai = false;
         }
 
+                // changed to opimise edge length first, and then Q,W,R inside the loop by Thomas on Sept 11, 15
+		if (!fixed_len)
+			new_lh = tree->optimizeAllBranches(min(i,3), logl_epsilon);  // loop only 3 times in total (previously in v0.9.6 5 times)
+
         new_lh = optimizeParametersOnly(gradient_epsilon);
 
 		if (new_lh == 0.0) {
@@ -826,16 +833,17 @@ double ModelFactory::optimizeParameters(bool fixed_len, bool write_info,
 			model->writeInfo(cout);
 			site_rate->writeInfo(cout);
 		}
-		if (!fixed_len)
-			new_lh = tree->optimizeAllBranches(min(i,3), logl_epsilon);  // loop only 3 times in total (previously in v0.9.6 5 times)
 		if (new_lh > cur_lh + logl_epsilon) {
-            if (Params::getInstance().testAlpha && i == 3) {
-                double newEpsilon = (new_lh - cur_lh) * 0.01;
-                if (newEpsilon > defaultEpsilon) {
-                    logl_epsilon = newEpsilon;
-                    cout << "Estimate model parameters with new epsilon = " << logl_epsilon << endl;
-                }
-            }
+			if (Params::getInstance().testAlpha && Params::getInstance().testAlphaEpsAdaptive) {
+				if (i == 3) {
+					double newEpsilon = (new_lh - cur_lh) * 0.01;
+					if (newEpsilon > defaultEpsilon) {
+						logl_epsilon = newEpsilon;
+						cout << "Estimate model parameters with new epsilon = " << logl_epsilon << endl;
+					}
+				}
+			}
+
 //			if (gradient_epsilon > (new_lh - cur_lh) * logl_epsilon)
 //				gradient_epsilon = (new_lh - cur_lh) * logl_epsilon;
 			cur_lh = new_lh;
@@ -1018,8 +1026,9 @@ void ModelFactory::setVariables(double *variables) {
 	site_rate->setVariables(variables + model->getNDim());
 }
 
-void ModelFactory::getVariables(double *variables) {
-	model->getVariables(variables);
-	site_rate->getVariables(variables + model->getNDim());
+bool ModelFactory::getVariables(double *variables) {
+	bool changed = model->getVariables(variables);
+	changed |= site_rate->getVariables(variables + model->getNDim());
+    return changed;
 }
 
diff --git a/model/modelfactory.h b/model/modelfactory.h
index 47b8bb3..7a0d455 100644
--- a/model/modelfactory.h
+++ b/model/modelfactory.h
@@ -229,8 +229,9 @@ protected:
 		this function is served for the multi-dimension optimization. It should assign the model parameters
 		from a vector of variables that is index from 1 (NOTE: not from 0)
 		@param variables vector of variables, indexed from 1
+		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
 	*/
-	virtual void getVariables(double *variables);
+	virtual bool getVariables(double *variables);
 
 };
 
diff --git a/model/modelgtr.cpp b/model/modelgtr.cpp
index 7417f1b..1728f8a 100644
--- a/model/modelgtr.cpp
+++ b/model/modelgtr.cpp
@@ -68,10 +68,28 @@ void ModelGTR::setTree(PhyloTree *tree) {
 	phylo_tree = tree;
 }
 
+string ModelGTR::getName() {
+	if (getFreqType() == FREQ_EMPIRICAL)
+		return name + "+F";
+	else if (getFreqType() == FREQ_CODON_1x4)
+		return name += "+F1X4";
+	else if (getFreqType() == FREQ_CODON_3x4)
+		return name + "+F3X4";
+	else if (getFreqType() == FREQ_CODON_3x4C)
+		return name + "+F3X4C";
+	else if (getFreqType() == FREQ_ESTIMATE && phylo_tree->aln->seq_type != SEQ_DNA)
+		return name + "+FO";
+	else if (getFreqType() == FREQ_EQUAL && phylo_tree->aln->seq_type != SEQ_DNA)
+		return name + "+FQ";
+    else
+        return name;
+}
+
 string ModelGTR::getNameParams() {
+
 	ostringstream retname;
-	retname << "GTR";
-	if (num_states != 4) retname << num_states;
+	retname << name;
+//	if (num_states != 4) retname << num_states;
 	retname << '{';
 	int nrates = getNumRateEntries();
 	for (int i = 0; i < nrates; i++) {
@@ -79,7 +97,31 @@ string ModelGTR::getNameParams() {
 		retname << rates[i];
 	}
 	retname << '}';
-	return retname.str();
+    getNameParamsFreq(retname);
+    return retname.str();    
+}
+    
+void ModelGTR::getNameParamsFreq(ostream &retname) {
+	if (getFreqType() == FREQ_EMPIRICAL || (getFreqType() == FREQ_USER_DEFINED && phylo_tree->aln->seq_type == SEQ_DNA)) {
+		retname << "+F";
+        retname << "{" << state_freq[0];
+        for (int i = 1; i < num_states; i++)
+            retname << "," << state_freq[i];
+        retname << "}";
+	} else if (getFreqType() == FREQ_CODON_1x4)
+		retname << "+F1X4";
+	else if (getFreqType() == FREQ_CODON_3x4)
+		retname << "+F3X4";
+	else if (getFreqType() == FREQ_CODON_3x4C)
+		name += "+F3X4C";
+	else if (getFreqType() == FREQ_ESTIMATE) {
+		retname << "+FO";
+        retname << "{" << state_freq[0];
+        for (int i = 1; i < num_states; i++)
+            retname << "," << state_freq[i];
+        retname << "}";
+    } else if (getFreqType() == FREQ_EQUAL && phylo_tree->aln->seq_type != SEQ_DNA)
+		retname << "+FQ";
 }
 
 void ModelGTR::init(StateFreqType type) {
@@ -315,6 +357,12 @@ void ModelGTR::getStateFrequency(double *freq) {
 	assert(state_freq);
 	assert(freq_type != FREQ_UNKNOWN);
 	memcpy(freq, state_freq, sizeof(double) * num_states);
+    // 2015-09-07: relax the sum of state_freq to be 1, this will be done at the end of optimization
+    double sum = 0.0;
+    int i;
+    for (i = 0; i < num_states; i++) sum += freq[i];
+    sum = 1.0/sum;
+    for (i = 0; i < num_states; i++) freq[i] *= sum;
 }
 
 void ModelGTR::setStateFrequency(double* freq)
@@ -356,6 +404,16 @@ int ModelGTR::getNDim() {
 	return ndim;
 }
 
+int ModelGTR::getNDimFreq() { 
+	if (freq_type == FREQ_EMPIRICAL) 
+        return num_states-1;
+	else if (freq_type == FREQ_CODON_1x4) 
+        return 3;
+	else if (freq_type == FREQ_CODON_3x4 || freq_type == FREQ_CODON_3x4C) 
+        return 9;
+    
+    return 0;
+}
 
 void ModelGTR::scaleStateFreq(bool sum_one) {
 	int i;
@@ -379,25 +437,41 @@ void ModelGTR::setVariables(double *variables) {
 	if (nrate > 0)
 		memcpy(variables+1, rates, nrate*sizeof(double));
 	if (freq_type == FREQ_ESTIMATE) {
-		int i, j;
-		for (i = 0, j = 1; i < num_states; i++)
-			if (i != highest_freq_state) {
-				variables[nrate+j] = state_freq[i] / state_freq[highest_freq_state];
-				j++;
-			}
+        // 2015-09-07: relax the sum of state_freq to be 1, this will be done at the end of optimization
+		int ndim = getNDim();
+		memcpy(variables+(ndim-num_states+2), state_freq, (num_states-1)*sizeof(double));
+        
+//		int i, j;
+//		for (i = 0, j = 1; i < num_states; i++)
+//			if (i != highest_freq_state) {
+//				variables[nrate+j] = state_freq[i] / state_freq[highest_freq_state];
+//				j++;
+//			}
 		//scaleStateFreq(false);
 //		memcpy(variables+nrate+1, state_freq, (num_states-1)*sizeof(double));
 		//scaleStateFreq(true);
 	}
 }
 
-void ModelGTR::getVariables(double *variables) {
+bool ModelGTR::getVariables(double *variables) {
 	int nrate = getNDim();
+	int i;
+	bool changed = false;
 	if (freq_type == FREQ_ESTIMATE) nrate -= (num_states-1);
-	if (nrate > 0)
+	if (nrate > 0) {
+		for (i = 0; i < nrate; i++)
+			changed |= (rates[i] != variables[i+1]);
 		memcpy(rates, variables+1, nrate * sizeof(double));
+	}
 
 	if (freq_type == FREQ_ESTIMATE) {
+        // 2015-09-07: relax the sum of state_freq to be 1, this will be done at the end of optimization
+        // 2015-09-07: relax the sum of state_freq to be 1, this will be done at the end of optimization
+		int ndim = getNDim();
+		for (i = 0; i < num_states-1; i++)
+			changed |= (state_freq[i] != variables[i+ndim-num_states+2]);
+		memcpy(state_freq, variables+(ndim-num_states+2), (num_states-1)*sizeof(double));
+
 //		memcpy(state_freq, variables+nrate+1, (num_states-1)*sizeof(double));
 		//state_freq[num_states-1] = 0.1;
 		//scaleStateFreq(true);
@@ -406,25 +480,28 @@ void ModelGTR::getVariables(double *variables) {
 //		for (int i = 0; i < num_states-1; i++)
 //			sum += state_freq[i];
 //		state_freq[num_states-1] = 1.0 - sum;
-		double sum = 1.0;
-		int i, j;
-		for (i = 1; i < num_states; i++)
-			sum += variables[nrate+i];
-		for (i = 0, j = 1; i < num_states; i++)
-			if (i != highest_freq_state) {
-				state_freq[i] = variables[nrate+j] / sum;
-				j++;
-			}
-		state_freq[highest_freq_state] = 1.0/sum;
+//		double sum = 1.0;
+//		int i, j;
+//		for (i = 1; i < num_states; i++)
+//			sum += variables[nrate+i];
+//		for (i = 0, j = 1; i < num_states; i++)
+//			if (i != highest_freq_state) {
+//				state_freq[i] = variables[nrate+j] / sum;
+//				j++;
+//			}
+//		state_freq[highest_freq_state] = 1.0/sum;
 	}
+	return changed;
 }
 
 double ModelGTR::targetFunk(double x[]) {
-	getVariables(x);
+	bool changed = getVariables(x);
 	if (state_freq[num_states-1] < 1e-4) return 1.0e+12;
-	decomposeRateMatrix();
-	assert(phylo_tree);
-	phylo_tree->clearAllPartialLH();
+	if (changed) {
+		decomposeRateMatrix();
+		assert(phylo_tree);
+		phylo_tree->clearAllPartialLH();
+	}
 	return -phylo_tree->computeLikelihood();
 }
 
@@ -453,10 +530,11 @@ void ModelGTR::setBounds(double *lower_bound, double *upper_bound, bool *bound_c
 
 	if (freq_type == FREQ_ESTIMATE) {
 		for (i = ndim-num_states+2; i <= ndim; i++) {
-            lower_bound[i] = MIN_FREQUENCY/state_freq[highest_freq_state];
+//            lower_bound[i] = MIN_FREQUENCY/state_freq[highest_freq_state];
 //			upper_bound[i] = state_freq[highest_freq_state]/MIN_FREQUENCY;
-//            lower_bound[i]  = MIN_FREQUENCY;
-            upper_bound[i] = 100.0;
+            lower_bound[i]  = MIN_FREQUENCY;
+//            upper_bound[i] = 100.0;
+            upper_bound[i] = 1.0;
             bound_check[i] = false;
         }
 	}
@@ -487,15 +565,21 @@ double ModelGTR::optimizeParameters(double gradient_epsilon) {
 	setVariables(variables);
 	setBounds(lower_bound, upper_bound, bound_check);
 	//packData(variables, lower_bound, upper_bound, bound_check);
-    if (phylo_tree->params->optimize_alg.find("BFGS-B") == string::npos)
+//    if (phylo_tree->params->optimize_alg.find("BFGS-B") == string::npos)
         score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(gradient_epsilon, TOL_RATE));
-    else
-        score = -L_BFGS_B(ndim, variables+1, lower_bound+1, upper_bound+1, max(gradient_epsilon, TOL_RATE));
-
-	getVariables(variables);
-	//if (freq_type == FREQ_ESTIMATE) scaleStateFreq(true);
-	decomposeRateMatrix();
-	phylo_tree->clearAllPartialLH();
+//    else
+//        score = -L_BFGS_B(ndim, variables+1, lower_bound+1, upper_bound+1, max(gradient_epsilon, TOL_RATE));
+
+	bool changed = getVariables(variables);
+    // BQM 2015-09-07: normalize state_freq
+	if (freq_type == FREQ_ESTIMATE) { 
+        scaleStateFreq(true);
+        changed = true;
+    }
+    if (changed) {
+        decomposeRateMatrix();
+        phylo_tree->clearAllPartialLH();
+    }
 	
 	delete [] bound_check;
 	delete [] lower_bound;
@@ -702,8 +786,8 @@ void ModelGTR::readStateFreq(string str) throw(const char*) {
 			outError("State frequency must be in [0,1] in ", str);
 		if (i == num_states-1 && end_pos < str.length())
 			outError("Unexpected end of string ", str);
-		if (end_pos < str.length() && str[end_pos] != ',')
-			outError("Comma to separate state frequencies not found in ", str);
+		if (end_pos < str.length() && str[end_pos] != ',' && str[end_pos] != ' ')
+			outError("Comma/Space to separate state frequencies not found in ", str);
 		end_pos++;
 	}
 	double sum = 0.0;
diff --git a/model/modelgtr.h b/model/modelgtr.h
index 1be91ba..b73007a 100644
--- a/model/modelgtr.h
+++ b/model/modelgtr.h
@@ -71,10 +71,21 @@ public:
     virtual ~ModelGTR();
 
 	/**
+	 * @return model name
+	 */
+	virtual string getName();
+
+	/**
 	 * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f}
 	 */
 	virtual string getNameParams();
 
+    /**
+        internal function: return string for frequency
+        @param retname output stream
+    */
+    void getNameParamsFreq(ostream &retname);
+
 	/**
 		set the associated tree
 		@param tree the associated tree
@@ -220,6 +231,11 @@ public:
 		@return the number of dimensions
 	*/
 	virtual int getNDim();
+
+	/**
+		@return the number of dimensions corresponding to state frequencies
+	*/
+	virtual int getNDimFreq();
 	
 
 	/**
@@ -292,8 +308,9 @@ protected:
 		this function is served for the multi-dimension optimization. It should assign the model parameters 
 		from a vector of variables that is index from 1 (NOTE: not from 0)
 		@param variables vector of variables, indexed from 1
+		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
 	*/
-	virtual void getVariables(double *variables);
+	virtual bool getVariables(double *variables);
 
 	virtual void freeMem();
 
diff --git a/model/modelmixture.cpp b/model/modelmixture.cpp
index d84cf9a..b4239de 100644
--- a/model/modelmixture.cpp
+++ b/model/modelmixture.cpp
@@ -1035,10 +1035,29 @@ ModelSubst* createModel(string model_str, ModelsBlock *models_block, StateFreqTy
 	return model;
 }
 
+/**
+	constructor
+	@param tree associated tree for the model
+*/
+ModelMixture::ModelMixture(PhyloTree *tree, bool count_rates) : ModelGTR(tree, count_rates) {
+	prop = NULL;
+	fix_prop = true;
+	optimizing_submodels = false;
+}
+
 ModelMixture::ModelMixture(string orig_model_name, string model_name, string model_list, ModelsBlock *models_block,
 		StateFreqType freq, string freq_params, PhyloTree *tree, bool optimize_weights, bool count_rates)
 	: ModelGTR(tree, count_rates)
 {
+	prop = NULL;
+	fix_prop = true;
+	optimizing_submodels = false;
+	initMixture(orig_model_name, model_name, model_list, models_block, freq, freq_params, tree, optimize_weights, count_rates);
+}
+
+void ModelMixture::initMixture(string orig_model_name, string model_name, string model_list, ModelsBlock *models_block,
+		StateFreqType freq, string freq_params, PhyloTree *tree, bool optimize_weights, bool count_rates)
+{
 //	const int MAX_MODELS = 64;
 	size_t cur_pos;
 	int m;
@@ -1096,15 +1115,17 @@ ModelMixture::ModelMixture(string orig_model_name, string model_name, string mod
         for (m = 0; m < freq_weights.size(); m++)
             if (!freq_vec[m]) 
                 freq_weights[m] = sum_weights/freq_weights.size();
-		init(FREQ_USER_DEFINED);
+		ModelGTR::init(FREQ_USER_DEFINED);
 	} else {
 		if (freq_params != "")
 			readStateFreq(freq_params);
-		init(freq);
+		ModelGTR::init(freq);
 	}
 
 	DoubleVector weights;
-    name = orig_model_name;
+    name = orig_model_name.substr(0, orig_model_name.find_first_of("+*"));
+    if (!models_block->findMixModel(name))
+        name = "";
 	full_name = (string)"MIX" + OPEN_BRACKET;
 	if (model_list == "") model_list = model_name;
 	for (m = 0, cur_pos = 0; cur_pos < model_list.length(); m++) {
@@ -1172,6 +1193,8 @@ ModelMixture::ModelMixture(string orig_model_name, string model_name, string mod
 	full_name += CLOSE_BRACKET;
 
 	int nmixtures = size();
+	if (prop)
+		aligned_free(prop);
 	prop = aligned_alloc<double>(nmixtures);
 
 	double sum = 0.0;
@@ -1263,6 +1286,13 @@ int ModelMixture::getNDim() {
 	return dim;
 }
 
+int ModelMixture::getNDimFreq() {
+    int dim = 0;
+	for (iterator it = begin(); it != end(); it++)
+		dim += (*it)->getNDimFreq();
+	return dim;
+}
+
 double ModelMixture::targetFunk(double x[]) {
 	getVariables(x);
 //	decomposeRateMatrix();
@@ -1281,83 +1311,213 @@ double ModelMixture::targetFunk(double x[]) {
 
 double ModelMixture::optimizeWeights() {
     // first compute _pattern_lh_cat
-    if (phylo_tree->getModelFactory()->fused_mix_rate) {
-        phylo_tree->computeMixrateLikelihoodBranchEigen((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); 
-    } else {
-        phylo_tree->computeMixtureLikelihoodBranchEigen((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); 
-    }
+    phylo_tree->computePatternLhCat(WSL_MIXTURE);
     size_t ptn, c;
     size_t nptn = phylo_tree->aln->getNPattern();
     size_t nmix = getNMixtures();
     
-    double *lk_ptn = aligned_alloc<double>(nptn);
     double *new_prop = aligned_alloc<double>(nmix);
-    
-        
+    double *ratio_prop = aligned_alloc<double>(nmix);
+
     // EM algorithm loop described in Wang, Li, Susko, and Roger (2008)
     for (int step = 0; step < nmix; step++) {
         // E-step
-        memset(lk_ptn, 0, nptn*sizeof(double));
-        if (step == 0) {
-            for (c = 0; c < nmix; c++) 
-                new_prop[c] = 1.0 / prop[c];
-            // decoupled weights (prop) from _pattern_lh_cat to obtain L_ci and compute pattern likelihood L_i
+
+        if (step > 0) {
+            // convert _pattern_lh_cat taking into account new weights
             for (ptn = 0; ptn < nptn; ptn++) {
                 double *this_lk_cat = phylo_tree->_pattern_lh_cat + ptn*nmix;
                 for (c = 0; c < nmix; c++) {
-                    lk_ptn[ptn] += this_lk_cat[c];
-                    this_lk_cat[c] *= new_prop[c];
+                    this_lk_cat[c] *= ratio_prop[c];
                 }
             } 
-        } else {
-            // update L_i according to (**)
-            for (ptn = 0; ptn < nptn; ptn++) {
-                double *this_lk_cat = phylo_tree->_pattern_lh_cat + ptn*nmix;
-                for (c = 0; c < nmix; c++) {
-                    lk_ptn[ptn] += this_lk_cat[c] * prop[c];
-                }
-            }        
         }
-        
-        // M-step, update weights according to (*)
         memset(new_prop, 0, nmix*sizeof(double));
         for (ptn = 0; ptn < nptn; ptn++) {
-            double inv_lk_ptn = phylo_tree->ptn_freq[ptn] / lk_ptn[ptn];
             double *this_lk_cat = phylo_tree->_pattern_lh_cat + ptn*nmix;
-            for (c = 0; c < nmix; c++)
-                new_prop[c] += this_lk_cat[c] * inv_lk_ptn;
-        }
-        
+            double lk_ptn = phylo_tree->ptn_invar[ptn];
+            for (c = 0; c < nmix; c++) {
+                lk_ptn += this_lk_cat[c];
+            }
+            lk_ptn = phylo_tree->ptn_freq[ptn] / lk_ptn;
+            for (c = 0; c < nmix; c++) {
+                new_prop[c] += this_lk_cat[c] * lk_ptn;
+            }
+        } 
         bool converged = true;
+        double new_pinvar = 0.0;    
         for (c = 0; c < nmix; c++) {
-            new_prop[c] = prop[c] * (new_prop[c] / phylo_tree->getAlnNSite());
+            new_prop[c] /= phylo_tree->getAlnNSite();
             // check for convergence
             converged = converged && (fabs(prop[c]-new_prop[c]) < 1e-4);
+            ratio_prop[c] = new_prop[c] / prop[c];
             prop[c] = new_prop[c];
+            new_pinvar += prop[c];
+        }
+        new_pinvar = 1.0 - new_pinvar;
+        if (new_pinvar != 0.0) {
+            converged = converged && (fabs(phylo_tree->getRate()->getPInvar()-new_pinvar) < 1e-4);
+            phylo_tree->getRate()->setPInvar(new_pinvar);
+            phylo_tree->getRate()->setOptimizePInvar(false);
+            phylo_tree->computePtnInvar();
+            
         }
         if (converged) break;
+
     }
     
+    aligned_free(ratio_prop);
     aligned_free(new_prop);
-    aligned_free(lk_ptn);
+//    aligned_free(lk_ptn);
     return phylo_tree->computeLikelihood();
 }
 
+double ModelMixture::optimizeWithEM(double gradient_epsilon) {
+    size_t ptn, c;
+    size_t nptn = phylo_tree->aln->getNPattern();
+    size_t nmix = size();
+    
+    double *new_prop = aligned_alloc<double>(nmix);
+    PhyloTree *tree = new PhyloTree;
+    
+    // attach memory to save space
+    tree->central_partial_lh = phylo_tree->central_partial_lh;
+    tree->central_scale_num = phylo_tree->central_scale_num;
+    tree->central_partial_pars = phylo_tree->central_partial_pars;
+    
+    tree->copyPhyloTree(phylo_tree);
+    tree->optimize_by_newton = phylo_tree->optimize_by_newton;
+    tree->setLikelihoodKernel(phylo_tree->sse);
+    // initialize model
+    ModelFactory *model_fac = new ModelFactory();
+    model_fac->joint_optimize = phylo_tree->params->optimize_model_rate_joint;
+//    model_fac->unobserved_ptns = phylo_tree->getModelFactory()->unobserved_ptns;
+
+    RateHeterogeneity *site_rate = new RateHeterogeneity; 
+    tree->setRate(site_rate);
+    site_rate->setTree(tree);
+            
+    model_fac->site_rate = site_rate;
+    tree->model_factory = model_fac;
+    tree->setParams(phylo_tree->params);
+    double score;
+        
+    int num_steps = (getNDim()+1)*3;
+    
+    // EM algorithm loop described in Wang, Li, Susko, and Roger (2008)
+    for (int step = 0; step < num_steps; step++) {
+        // first compute _pattern_lh_cat
+        score = phylo_tree->computePatternLhCat(WSL_MIXTURE);
+        
+        memset(new_prop, 0, nmix*sizeof(double));
+                
+        // E-step
+        // decoupled weights (prop) from _pattern_lh_cat to obtain L_ci and compute pattern likelihood L_i
+        for (ptn = 0; ptn < nptn; ptn++) {
+            double *this_lk_cat = phylo_tree->_pattern_lh_cat + ptn*nmix;
+            double lk_ptn = phylo_tree->ptn_invar[ptn];
+            for (c = 0; c < nmix; c++) {
+                lk_ptn += this_lk_cat[c];
+            }
+            lk_ptn = phylo_tree->ptn_freq[ptn] / lk_ptn;
+            
+            // transform _pattern_lh_cat into posterior probabilities of each category
+            for (c = 0; c < nmix; c++) {
+                this_lk_cat[c] *= lk_ptn;
+                new_prop[c] += this_lk_cat[c];
+            }
+        } 
+        
+        // M-step, update weights according to (*)        
+        
+        bool converged = !fix_prop;
+        
+        if (!fix_prop) {
+            double new_pinvar = 0.0;
+            for (c = 0; c < nmix; c++) {
+                new_prop[c] = new_prop[c] / phylo_tree->getAlnNSite();
+                // check for convergence
+                converged = converged && (fabs(prop[c]-new_prop[c]) < 1e-4);
+                prop[c] = new_prop[c];
+                new_pinvar += prop[c];
+            }
+            new_pinvar = 1.0 - new_pinvar;
+            if (new_pinvar != 0.0) {
+                converged = converged && (fabs(phylo_tree->getRate()->getPInvar()-new_pinvar) < 1e-4);
+                phylo_tree->getRate()->setPInvar(new_pinvar);
+                phylo_tree->getRate()->setOptimizePInvar(false);
+                phylo_tree->computePtnInvar();
+            }
+        }
+        
+        // now optimize model one by one
+        for (c = 0; c < nmix; c++) if (at(c)->getNDim() > 0) {
+            tree->copyPhyloTree(phylo_tree);
+            ModelGTR *subst_model;
+            subst_model = at(c);
+            tree->setModel(subst_model);
+            subst_model->setTree(tree);
+            model_fac->model = subst_model;
+                        
+            // initialize likelihood
+            tree->initializeAllPartialLh();
+            // copy posterior probability into ptn_freq
+            tree->computePtnFreq();
+            double *this_lk_cat = phylo_tree->_pattern_lh_cat+c;
+            for (ptn = 0; ptn < nptn; ptn++)
+                tree->ptn_freq[ptn] = this_lk_cat[ptn*nmix];
+            subst_model->optimizeParameters(gradient_epsilon);
+            // reset subst model
+            tree->setModel(NULL);
+            subst_model->setTree(phylo_tree);
+            
+        }
+        
+        phylo_tree->clearAllPartialLH();
+        if (converged) break;
+    }
+    
+    // deattach memory
+    tree->central_partial_lh = NULL;
+    tree->central_scale_num = NULL;
+    tree->central_partial_pars = NULL;
+    
+    delete tree;
+    aligned_free(new_prop);
+    score = phylo_tree->computeLikelihood();
+    phylo_tree->clearAllPartialLH();
+    return score;
+}
+
 double ModelMixture::optimizeParameters(double gradient_epsilon) {
 	optimizing_submodels = true;
-	double score = ModelGTR::optimizeParameters(gradient_epsilon);
-	optimizing_submodels = false;
-    if (!fix_prop)
+    
+    int dim = getNDim();
+    double score = 0.0;
+    
+    if (!phylo_tree->getModelFactory()->unobserved_ptns.empty())
+        outError("Mixture model +ASC is not supported yet. Contact author if needed.");
+    
+    if (dim > 0)
+        score = optimizeWithEM(gradient_epsilon);
+    else if (!fix_prop)
         score = optimizeWeights();
+    
+//	double score = ModelGTR::optimizeParameters(gradient_epsilon);
+	optimizing_submodels = false;
 	if (getNDim() == 0) return score;
 	// now rescale Q matrices to have proper interpretation of branch lengths
 	double sum;
 	int i, ncategory = size();
 	for (i = 0, sum = 0.0; i < ncategory; i++)
 		sum += prop[i]*at(i)->total_num_subst;
-	for (i = 0; i < ncategory; i++)
-		at(i)->total_num_subst /= sum;
-	decomposeRateMatrix();
+//    sum += phylo_tree->getRate()->getPInvar();
+    if (fabs(sum-1.0) > 1e-6) {
+        for (i = 0; i < ncategory; i++)
+            at(i)->total_num_subst /= sum;
+        decomposeRateMatrix();
+        phylo_tree->clearAllPartialLH();
+    }
 	return score;
 }
 
@@ -1393,10 +1553,11 @@ void ModelMixture::setVariables(double *variables) {
 
 }
 
-void ModelMixture::getVariables(double *variables) {
+bool ModelMixture::getVariables(double *variables) {
 	int dim = 0;
+    bool changed = false;
 	for (iterator it = begin(); it != end(); it++) {
-		(*it)->getVariables(&variables[dim]);
+		changed |= (*it)->getVariables(&variables[dim]);
 		dim += (*it)->getNDim();
 	}
 //	if (fix_prop) return;
@@ -1433,6 +1594,7 @@ void ModelMixture::getVariables(double *variables) {
 //	}
 //	delete [] y;
 
+    return changed;
 }
 
 void ModelMixture::setBounds(double *lower_bound, double *upper_bound, bool *bound_check) {
@@ -1468,14 +1630,27 @@ void ModelMixture::writeParameters(ostream &out) {
 	}
 }
 
+string ModelMixture::getName() {
+    if (name != "") return name;
+    string retname = "MIX";
+    retname += OPEN_BRACKET;
+    for (iterator it = begin(); it != end(); it++) {
+        if (it != begin()) retname += ",";
+        retname += (*it)->getName();
+    }
+    retname += CLOSE_BRACKET;
+    return retname;
+}
+
 string ModelMixture::getNameParams() {
-//	ostringstream retname;
-//	retname << "MIX" << OPEN_BRACKET;
-//    for (iterator it=begin(); it != end(); it++) {
-//        if (it != begin()) retname << ",";
-//        retname << (*it)->ModelSubst::getNameParams();
-//    }
-//	retname << CLOSE_BRACKET;
-//	return retname.str();
-    return full_name;
+    if (full_name != "")
+        return full_name;
+    string retname = "MIX";
+    retname += OPEN_BRACKET;
+    for (iterator it = begin(); it != end(); it++) {
+        if (it != begin()) retname += ",";
+        retname += (*it)->getNameParams();
+    }
+    retname += CLOSE_BRACKET;
+    return retname;
 }
diff --git a/model/modelmixture.h b/model/modelmixture.h
index dd53df4..45abec9 100644
--- a/model/modelmixture.h
+++ b/model/modelmixture.h
@@ -46,6 +46,16 @@ public:
     ModelMixture(string orig_model_name, string model_name, string model_list, ModelsBlock *models_block,
     		StateFreqType freq, string freq_params, PhyloTree *tree, bool optimize_weights, bool count_rates = true);
 
+    void initMixture(string orig_model_name, string model_name, string model_list, ModelsBlock *models_block,
+    		StateFreqType freq, string freq_params, PhyloTree *tree, bool optimize_weights, bool count_rates = true);
+
+    /**
+		constructor
+		@param tree associated tree for the model
+	*/
+    ModelMixture(PhyloTree *tree, bool count_rates = true);
+
+
     virtual ~ModelMixture();
 
 
@@ -66,14 +76,31 @@ public:
 	virtual int getNDim();
 
 	/**
+		@return the number of dimensions corresponding to state frequencies
+	*/
+	virtual int getNDimFreq();
+	
+	/**
 		the target function which needs to be optimized
 		@param x the input vector x
 		@return the function value at x
 	*/
 	virtual double targetFunk(double x[]);
 
+    /** 
+        optimize mixture weights using EM algorithm 
+        @return log-likelihood of optimized weights
+    */
     double optimizeWeights();
 
+    /** 
+        optimize rate parameters using EM algorithm
+        @param gradient_epsilon
+        @return log-likelihood of optimized parameters
+    */
+    double optimizeWithEM(double gradient_epsilon);
+
+
 	/**
 		optimize model parameters
 		@return the best likelihood
@@ -103,6 +130,11 @@ public:
 	virtual void writeParameters(ostream &out);
 
 	/**
+	 * @return model name
+	 */
+	virtual string getName();
+
+	/**
 	 * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f}
 	 */
 	virtual string getNameParams();
@@ -137,8 +169,9 @@ protected:
 		this function is served for the multi-dimension optimization. It should assign the model parameters
 		from a vector of variables that is index from 1 (NOTE: not from 0)
 		@param variables vector of variables, indexed from 1
+		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
 	*/
-	virtual void getVariables(double *variables);
+	virtual bool getVariables(double *variables);
 
 };
 
diff --git a/model/modelprotein.cpp b/model/modelprotein.cpp
index 762fe23..66379b4 100644
--- a/model/modelprotein.cpp
+++ b/model/modelprotein.cpp
@@ -3164,8 +3164,8 @@ void ModelProtein::init(const char *model_name, string model_params, StateFreqTy
 		readParameters(model_name);
 	}
 	if (freq_params != "") {
-		stringstream ss(freq_params);
-		readStateFreq(ss);
+//		stringstream ss(freq_params);
+		readStateFreq(freq_params);
 	}
 /*	if (name == "WAG") { model_str = model_WAG;}
 	else if (name == "cpREV") model_str = model_cpREV;
diff --git a/model/modelset.cpp b/model/modelset.cpp
index bbba7ce..a75e549 100644
--- a/model/modelset.cpp
+++ b/model/modelset.cpp
@@ -106,11 +106,13 @@ void ModelSet::decomposeRateMatrix()
 }
 
 
-void ModelSet::getVariables(double* variables)
+bool ModelSet::getVariables(double* variables)
 {
 	assert(size());
+    bool changed = false;
 	for (iterator it = begin(); it != end(); it++)
-		(*it)->getVariables(variables);
+		changed |= (*it)->getVariables(variables);
+    return changed;
 }
 
 void ModelSet::setVariables(double* variables)
diff --git a/model/modelset.h b/model/modelset.h
index 2a4829e..ba77370 100644
--- a/model/modelset.h
+++ b/model/modelset.h
@@ -173,8 +173,9 @@ protected:
 		this function is served for the multi-dimension optimization. It should assign the model parameters 
 		from a vector of variables that is index from 1 (NOTE: not from 0)
 		@param variables vector of variables, indexed from 1
+		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
 	*/
-	virtual void getVariables(double *variables);
+	virtual bool getVariables(double *variables);
 
 	
 };
diff --git a/model/modelsubst.h b/model/modelsubst.h
index 8d3e8a2..e7a5442 100644
--- a/model/modelsubst.h
+++ b/model/modelsubst.h
@@ -41,6 +41,16 @@ public:
 	virtual int getNDim() { return 0; }
 
 	/**
+		@return the number of dimensions corresponding to state frequencies
+	*/
+	virtual int getNDimFreq() { return 0; }
+	
+	/**
+	 * @return model name
+	 */
+	virtual string getName() { return name; }
+
+	/**
 	 * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f}
 	 */
 	virtual string getNameParams() { return name; }
@@ -291,8 +301,9 @@ protected:
 		this function is served for the multi-dimension optimization. It should assign the model parameters
 		from a vector of variables that is index from 1 (NOTE: not from 0)
 		@param variables vector of variables, indexed from 1
+		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
 	*/
-	virtual void getVariables(double *variables) {}
+	virtual bool getVariables(double *variables) { return false; }
 
 };
 
diff --git a/model/partitionmodel.cpp b/model/partitionmodel.cpp
index c4624f8..c1a8be0 100644
--- a/model/partitionmodel.cpp
+++ b/model/partitionmodel.cpp
@@ -19,10 +19,12 @@
  ***************************************************************************/
 #include "partitionmodel.h"
 #include "superalignment.h"
+#include "model/rategamma.h"
 
 PartitionModel::PartitionModel()
         : ModelFactory()
 {
+	linked_alpha = -1.0;
 }
 
 PartitionModel::PartitionModel(Params &params, PhyloSuperTree *tree, ModelsBlock *models_block)
@@ -32,6 +34,7 @@ PartitionModel::PartitionModel(Params &params, PhyloSuperTree *tree, ModelsBlock
 	is_storing = false;
 	joint_optimize = params.optimize_model_rate_joint;
 	fused_mix_rate = false;
+    linked_alpha = -1.0;
 
 	// create dummy model
 	model = new ModelSubst(tree->aln->num_states);
@@ -41,6 +44,10 @@ PartitionModel::PartitionModel(Params &params, PhyloSuperTree *tree, ModelsBlock
     string model_name = params.model_name;
     PhyloSuperTree::iterator it;
     int part;
+    if (params.link_alpha) {
+        params.gamma_shape = fabs(params.gamma_shape);
+        linked_alpha = params.gamma_shape;
+    }
     for (it = tree->begin(), part = 0; it != tree->end(); it++, part++) {
         assert(!((*it)->getModelFactory()));
         params.model_name = tree->part_info[part].model_name;
@@ -55,6 +62,7 @@ PartitionModel::PartitionModel(Params &params, PhyloSuperTree *tree, ModelsBlock
         	(*it)->aln->computeStateFreq((*it)->getModel()->state_freq, (*it)->aln->getNSite() * (tree->aln->getNSeq() - (*it)->aln->getNSeq()));
         	(*it)->getModel()->decomposeRateMatrix();
         }
+        
         //string taxa_set = ((SuperAlignment*)tree->aln)->getPattern(part);
         //(*it)->copyTree(tree, taxa_set);
         //(*it)->drawTree(cout);
@@ -67,9 +75,36 @@ int PartitionModel::getNParameters() {
     for (PhyloSuperTree::iterator it = tree->begin(); it != tree->end(); it++) {
     	df += (*it)->getModelFactory()->getNParameters();
     }
+    if (linked_alpha > 0)
+        df ++;
     return df;
 }
 
+double PartitionModel::computeFunction(double shape) {
+    PhyloSuperTree *tree = (PhyloSuperTree*)site_rate->getTree();
+    double res = 0.0;
+    linked_alpha = shape;
+    for (PhyloSuperTree::iterator it = tree->begin(); it != tree->end(); it++) 
+        if ((*it)->getRate()->isGammaRate()) {
+            res += (*it)->getRate()->computeFunction(shape);
+        }
+    if (res == 0.0)
+        outError("No partition has Gamma rate heterogeneity!");
+	return res;
+}
+
+double PartitionModel::optimizeLinkedAlpha(bool write_info, double gradient_epsilon) {
+    if (write_info)
+        cout << "Optimizing linked gamma shape..." << endl;
+	double negative_lh;
+	double current_shape = linked_alpha;
+	double ferror, optx;
+	optx = minimizeOneDimen(MIN_GAMMA_SHAPE, current_shape, MAX_GAMMA_SHAPE, max(gradient_epsilon, TOL_GAMMA_SHAPE), &negative_lh, &ferror);
+    if (write_info)
+        cout << "Linked alpha across partitions: " << linked_alpha << endl;
+	return site_rate->getTree()->computeLikelihood();
+    
+}
 
 double PartitionModel::optimizeParameters(bool fixed_len, bool write_info, double logl_epsilon, double gradient_epsilon) {
     PhyloSuperTree *tree = (PhyloSuperTree*)site_rate->getTree();
@@ -78,7 +113,7 @@ double PartitionModel::optimizeParameters(bool fixed_len, bool write_info, doubl
 
     if (tree->part_order.empty()) tree->computePartitionOrder();
 	#ifdef _OPENMP
-	#pragma omp parallel for reduction(+: tree_lh) schedule(dynamic)
+	#pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(ntrees >= tree->params->num_threads)
 	#endif
     for (int i = 0; i < ntrees; i++) {
         int part = tree->part_order[i];
@@ -95,6 +130,10 @@ double PartitionModel::optimizeParameters(bool fixed_len, bool write_info, doubl
             logl_epsilon/min(ntrees,10), gradient_epsilon/min(ntrees,10));
     }
     //return ModelFactory::optimizeParameters(fixed_len, write_info);
+
+    if (tree->params->link_alpha) {
+        tree_lh = optimizeLinkedAlpha(write_info, gradient_epsilon);
+    }
     return tree_lh;
 }
 
diff --git a/model/partitionmodel.h b/model/partitionmodel.h
index ed36693..b3a72be 100644
--- a/model/partitionmodel.h
+++ b/model/partitionmodel.h
@@ -61,6 +61,19 @@ public:
 	 */
 	virtual bool isUnstableParameters();
 
+	/** optimize linked alpha parameter of over all partitions with Gamma rate */
+	double optimizeLinkedAlpha(bool write_info, double gradient_epsilon);
+
+	/**
+		override function from Optimization class, used by the minimizeOneDimen() to optimize
+		gamma shape parameter
+	*/
+	virtual double computeFunction(double shape);
+
+
+protected:
+	double linked_alpha;
+
 };
 
 #endif
diff --git a/model/ratefree.cpp b/model/ratefree.cpp
index 12a62c9..0acbf37 100644
--- a/model/ratefree.cpp
+++ b/model/ratefree.cpp
@@ -152,7 +152,7 @@ int RateFree::getNDim() {
     if (fix_params) return 0;
     if (optimizing_params == 0) return (2*ncategory-2); 
     if (optimizing_params == 1) // rates
-        return ncategory;
+        return ncategory-1;
     if (optimizing_params == 2) // proportions
         return ncategory-1;
     return 0;
@@ -183,7 +183,8 @@ double RateFree::optimizeParameters(double gradient_epsilon) {
 	if (verbose_mode >= VB_MED)
 		cout << "Optimizing " << name << " model parameters by " << optimize_alg << " algorithm..." << endl;
 
-    if (optimize_alg == "EM")
+    // TODO: turn off EM algorithm for +ASC model
+    if (optimize_alg.find("EM") != string::npos && phylo_tree->getModelFactory()->unobserved_ptns.empty())
         return optimizeWithEM();
 
 	//if (freq_type == FREQ_ESTIMATE) scaleStateFreq(false);
@@ -197,12 +198,13 @@ double RateFree::optimizeParameters(double gradient_epsilon) {
 //    score = optimizeWeights();
 
     int left = 1, right = 2;
-    if (optimize_alg.substr(0, 6) == "1-BFGS") {
+    if (optimize_alg.find("1-BFGS") != string::npos) {
         left = 0; 
         right = 0;
     }
 
-    for (optimizing_params = left; optimizing_params <= right; optimizing_params++) {
+    // changed to Wi -> Ri by Thomas on Sept 11, 15
+    for (optimizing_params = right; optimizing_params >= left; optimizing_params--) {
     
         ndim = getNDim();
         // by BFGS algorithm
@@ -212,7 +214,7 @@ double RateFree::optimizeParameters(double gradient_epsilon) {
 //        if (optimizing_params == 2 && optimize_alg.find("-EM") != string::npos)
 //            score = optimizeWeights();
 //        else 
-        if (optimize_alg.substr(optimize_alg.length()-2,2) == "-B")
+        if (optimize_alg.find("BFGS-B") != string::npos)
             score = -L_BFGS_B(ndim, variables+1, lower_bound+1, upper_bound+1, max(gradient_epsilon, TOL_FREE_RATE));
         else
             score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(gradient_epsilon, TOL_FREE_RATE));
@@ -290,7 +292,7 @@ void RateFree::setVariables(double *variables) {
             variables[i+1] = prop[i] / prop[ncategory-1];
     } else if (optimizing_params == 1) {
         // rates
-        for (i = 0; i < ncategory; i++)
+        for (i = 0; i < ncategory-1; i++)
             variables[i+1] = rates[i];
     } else {
         // both rates and weights
@@ -302,10 +304,10 @@ void RateFree::setVariables(double *variables) {
 
 }
 
-void RateFree::getVariables(double *variables) {
-	if (getNDim() == 0) return;
+bool RateFree::getVariables(double *variables) {
+	if (getNDim() == 0) return false;
 	int i;
-
+    bool changed = false;
 	// Modified by Thomas on 13 May 2015
 	// --start--
 	/*
@@ -340,21 +342,47 @@ void RateFree::getVariables(double *variables) {
             sum += variables[i+1];
         }
         for (i = 0; i < ncategory-1; i++) {
+            changed |= (prop[i] != variables[i+1] / sum);
             prop[i] = variables[i+1] / sum;
         }
+        changed |= (prop[ncategory-1] != 1.0 / sum);
         prop[ncategory-1] = 1.0 / sum;
+        // added by Thomas on Sept 10, 15
+        // update the values of rates, in order to
+        // maintain the sum of prop[i]*rates[i] = 1
+//        sum = 0;
+//        for (i = 0; i < ncategory; i++) {
+//            sum += prop[i] * rates[i];
+//        }
+//        for (i = 0; i < ncategory; i++) {
+//            rates[i] = rates[i] / sum;
+//        }
     } else if (optimizing_params == 1) {
         // rates
-        for (i = 0; i < ncategory; i++)
+        for (i = 0; i < ncategory-1; i++) {
+            changed |= (rates[i] != variables[i+1]);
             rates[i] = variables[i+1];
+        }
+        // added by Thomas on Sept 10, 15
+        // need to normalize the values of rates, in order to
+        // maintain the sum of prop[i]*rates[i] = 1
+//        sum = 0;
+//        for (i = 0; i < ncategory; i++) {
+//            sum += prop[i] * rates[i];
+//        }
+//        for (i = 0; i < ncategory; i++) {
+//            rates[i] = rates[i] / sum;
+//        }
     } else {
         // both weights and rates
         for (i = 0; i < ncategory-1; i++) {
             sum += variables[i+1];
         }
         for (i = 0; i < ncategory-1; i++) {
+            changed |= (prop[i] != variables[i+1] / sum);
             prop[i] = variables[i+1] / sum;
         }
+        changed |= (prop[ncategory-1] != 1.0 / sum);
         prop[ncategory-1] = 1.0 / sum;
         
         // then rates
@@ -363,12 +391,14 @@ void RateFree::getVariables(double *variables) {
     		sum += prop[i] * variables[i+ncategory];
     	}
     	for (i = 0; i < ncategory-1; i++) {
+            changed |= (rates[i] != variables[i+ncategory] / sum);
     		rates[i] = variables[i+ncategory] / sum;
     	}
+        changed |= (rates[ncategory-1] != 1.0 / sum);
     	rates[ncategory-1] = 1.0 / sum;
     }
 	// --end--
-
+    return changed;
 }
 
 /**
@@ -406,6 +436,7 @@ double RateFree::optimizeWithEM() {
     // initialize model
     ModelFactory *model_fac = new ModelFactory();
     model_fac->joint_optimize = phylo_tree->params->optimize_model_rate_joint;
+//    model_fac->unobserved_ptns = phylo_tree->getModelFactory()->unobserved_ptns;
 
     RateHeterogeneity *site_rate = new RateHeterogeneity; 
     tree->setRate(site_rate);
@@ -419,14 +450,7 @@ double RateFree::optimizeWithEM() {
     for (int step = 0; step < ncategory; step++) {
         // first compute _pattern_lh_cat
         double score;
-        if (!phylo_tree->getModel()->isMixture())
-            score = phylo_tree->computeLikelihoodBranchEigen((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); 
-        else if (phylo_tree->getModelFactory()->fused_mix_rate) {
-            score = phylo_tree->computeMixrateLikelihoodBranchEigen((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); 
-        } else {
-            outError("Mixture model does not work with FreeRate model!");
-            score = phylo_tree->computeMixtureLikelihoodBranchEigen((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); 
-        }
+        score = phylo_tree->computePatternLhCat(WSL_RATECAT);
         memset(new_prop, 0, nmix*sizeof(double));
                 
         // E-step
@@ -462,13 +486,16 @@ double RateFree::optimizeWithEM() {
         for (c = 0; c < nmix; c++) {
             tree->copyPhyloTree(phylo_tree);
             ModelGTR *subst_model;
-            if (phylo_tree->getModel()->isMixture())
+            if (phylo_tree->getModel()->isMixture() && phylo_tree->getModelFactory()->fused_mix_rate)
                 subst_model = ((ModelMixture*)phylo_tree->getModel())->at(c);
             else
                 subst_model = (ModelGTR*)phylo_tree->getModel();
             tree->setModel(subst_model);
             subst_model->setTree(tree);
             model_fac->model = subst_model;
+            if (subst_model->isMixture())
+                tree->setLikelihoodKernel(phylo_tree->sse);
+
                         
             // initialize likelihood
             tree->initializeAllPartialLh();
@@ -497,71 +524,3 @@ double RateFree::optimizeWithEM() {
     aligned_free(new_prop);
     return phylo_tree->computeLikelihood();
 }
-
-//double RateFree::optimizeWeights() {
-//    // first compute _pattern_lh_cat
-//    double score;
-//    if (!phylo_tree->getModel()->isMixture())
-//        score = phylo_tree->computeLikelihoodBranchEigen((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); 
-//    else if (phylo_tree->getModelFactory()->fused_mix_rate) {
-//        score = phylo_tree->computeMixrateLikelihoodBranchEigen((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); 
-//    } else {
-//        outError("Mixture model does not work with FreeRate model!");
-//        score = phylo_tree->computeMixtureLikelihoodBranchEigen((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); 
-//    }
-//    size_t ptn, c;
-//    size_t nptn = phylo_tree->aln->getNPattern();
-//    size_t nmix = ncategory;
-//    
-//    double *lk_ptn = aligned_alloc<double>(nptn);
-//    double *new_prop = aligned_alloc<double>(nmix);
-//    
-//        
-//    // EM algorithm loop described in Wang, Li, Susko, and Roger (2008)
-//    for (int step = 0; step < 100; step++) {
-//        // E-step
-//        memset(lk_ptn, 0, nptn*sizeof(double));
-//        if (step == 0) {
-//            for (c = 0; c < nmix; c++) 
-//                new_prop[c] = 1.0 / prop[c];
-//            // decoupled weights (prop) from _pattern_lh_cat to obtain L_ci and compute pattern likelihood L_i
-//            for (ptn = 0; ptn < nptn; ptn++) {
-//                double *this_lk_cat = phylo_tree->_pattern_lh_cat + ptn*nmix;
-//                for (c = 0; c < nmix; c++) {
-//                    lk_ptn[ptn] += this_lk_cat[c];
-//                    this_lk_cat[c] *= new_prop[c];
-//                }
-//            } 
-//        } else {
-//            // update L_i according to (**)
-//            for (ptn = 0; ptn < nptn; ptn++) {
-//                double *this_lk_cat = phylo_tree->_pattern_lh_cat + ptn*nmix;
-//                for (c = 0; c < nmix; c++) {
-//                    lk_ptn[ptn] += this_lk_cat[c] * prop[c];
-//                }
-//            }        
-//        }
-//        
-//        // M-step, update weights according to (*)
-//        memset(new_prop, 0, nmix*sizeof(double));
-//        for (ptn = 0; ptn < nptn; ptn++) {
-//            double inv_lk_ptn = phylo_tree->ptn_freq[ptn] / lk_ptn[ptn];
-//            double *this_lk_cat = phylo_tree->_pattern_lh_cat + ptn*nmix;
-//            for (c = 0; c < nmix; c++)
-//                new_prop[c] += this_lk_cat[c] * inv_lk_ptn;
-//        }
-//        
-//        bool converged = true;
-//        for (c = 0; c < nmix; c++) {
-//            new_prop[c] = prop[c] * (new_prop[c] / phylo_tree->getAlnNSite());
-//            // check for convergence
-//            converged = converged && (fabs(prop[c]-new_prop[c]) < 1e-4);
-//            prop[c] = new_prop[c];
-//        }
-//        if (converged) break;
-//    }
-//    
-//    aligned_free(new_prop);
-//    aligned_free(lk_ptn);
-//    return phylo_tree->computeLikelihood();
-//}
diff --git a/model/ratefree.h b/model/ratefree.h
index a445a2c..6a9b341 100644
--- a/model/ratefree.h
+++ b/model/ratefree.h
@@ -25,7 +25,7 @@ public:
 	/**
 		@return true if this is a Gamma model (default: false)
 	*/	
-    virtual bool isGammaRate() { return false; }
+    virtual int isGammaRate() { return 0; }
 
 	/**
 	 * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f}
@@ -57,11 +57,12 @@ public:
 	*/
 	virtual double optimizeParameters(double gradient_epsilon);
 
-    /** optimize weights using EM algorithm */
+    /** 
+        optimize rate parameters using EM algorithm 
+        @return log-likelihood of optimized parameters
+    */
     double optimizeWithEM();
 
-    double optimizeWeights();
-
 	/**
 		return the number of dimensions
 	*/
@@ -115,8 +116,9 @@ protected:
 		this function is served for the multi-dimension optimization. It should assign the model parameters
 		from a vector of variables that is index from 1 (NOTE: not from 0)
 		@param variables vector of variables, indexed from 1
+		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
 	*/
-	virtual void getVariables(double *variables);
+	virtual bool getVariables(double *variables);
 
 	/**
 	 * proportion of sites for each rate categories
diff --git a/model/ratefreeinvar.cpp b/model/ratefreeinvar.cpp
index 989de24..2b0213a 100644
--- a/model/ratefreeinvar.cpp
+++ b/model/ratefreeinvar.cpp
@@ -74,9 +74,11 @@ void RateFreeInvar::setVariables(double *variables) {
 	from a vector of variables that is index from 1 (NOTE: not from 0)
 	@param variables vector of variables, indexed from 1
 */
-void RateFreeInvar::getVariables(double *variables) {
-	RateFree::getVariables(variables);
-	if (RateInvar::getNDim() == 0) return;
+bool RateFreeInvar::getVariables(double *variables) {
+	bool changed = RateFree::getVariables(variables);
+	if (RateInvar::getNDim() == 0) return changed;
+    changed |= (p_invar != variables[getNDim()]);
 	p_invar = variables[getNDim()];
+    return changed;
 }
 
diff --git a/model/ratefreeinvar.h b/model/ratefreeinvar.h
index 6f42e3e..458e9d7 100644
--- a/model/ratefreeinvar.h
+++ b/model/ratefreeinvar.h
@@ -101,8 +101,9 @@ protected:
 		this function is served for the multi-dimension optimization. It should assign the model parameters
 		from a vector of variables that is index from 1 (NOTE: not from 0)
 		@param variables vector of variables, indexed from 1
+		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
 	*/
-	virtual void getVariables(double *variables);
+	virtual bool getVariables(double *variables);
 
 private:
 
diff --git a/model/rategamma.cpp b/model/rategamma.cpp
index 2bbd2bf..c0ddd47 100644
--- a/model/rategamma.cpp
+++ b/model/rategamma.cpp
@@ -131,9 +131,11 @@ void RateGamma::setGammaShape(double gs) {
 }
 
 double RateGamma::computeFunction(double shape) {
-	gamma_shape = shape;
-	computeRates();
-	phylo_tree->clearAllPartialLH();
+	if (gamma_shape != shape) {
+		gamma_shape = shape;
+		computeRates();
+		phylo_tree->clearAllPartialLH();
+	}
 	return -phylo_tree->computeLikelihood();
 }
 
@@ -156,9 +158,11 @@ void RateGamma::setVariables(double *variables) {
 	variables[1] = gamma_shape;
 }
 
-void RateGamma::getVariables(double *variables) {
-	if (getNDim() == 0) return;
+bool RateGamma::getVariables(double *variables) {
+	if (getNDim() == 0) return false;
+    bool changed = (gamma_shape != variables[1]);
 	gamma_shape = variables[1];
+    return changed;
 }
 
 double RateGamma::optimizeParameters(double gradient_epsilon, double min_gamma, double max_gamma) {
@@ -170,10 +174,13 @@ double RateGamma::optimizeParameters(double gradient_epsilon, double min_gamma,
 	double current_shape = gamma_shape;
 	double ferror, optx;
 	optx = minimizeOneDimen(min_gamma, current_shape, max_gamma, max(gradient_epsilon, TOL_GAMMA_SHAPE), &negative_lh, &ferror);
-	gamma_shape = optx;
-	computeRates();
-	phylo_tree->clearAllPartialLH();
-	return -negative_lh;
+//	if (gamma_shape != optx) {
+//		gamma_shape = optx;
+//		computeRates();
+//		phylo_tree->clearAllPartialLH();
+//	}
+//	return phylo_tree->computeLikelihood();
+	return -computeFunction(optx);
 }
 
 double RateGamma::optimizeParameters(double gradient_epsilon) {
@@ -185,10 +192,11 @@ double RateGamma::optimizeParameters(double gradient_epsilon) {
 	double current_shape = gamma_shape;
 	double ferror, optx;
 	optx = minimizeOneDimen(MIN_GAMMA_SHAPE, current_shape, MAX_GAMMA_SHAPE, max(gradient_epsilon, TOL_GAMMA_SHAPE), &negative_lh, &ferror);
-	gamma_shape = optx;
-	computeRates();
-	phylo_tree->clearAllPartialLH();
-	return -negative_lh;
+//	gamma_shape = optx;
+//	computeRates();
+//	phylo_tree->clearAllPartialLH();
+//	return -negative_lh;
+	return -computeFunction(optx);
 }
 
 void RateGamma::writeInfo(ostream &out) {
@@ -203,19 +211,8 @@ void RateGamma::writeParameters(ostream &out) {
 
 int RateGamma::computePatternRates(DoubleVector &pattern_rates, IntVector &pattern_cat) {
 	//cout << "Computing Gamma site rates by empirical Bayes..." << endl;
-//	double *ptn_rates = new double[npattern];
-	if (phylo_tree->sse == LK_NORMAL || phylo_tree->sse == LK_SSE)
-		phylo_tree->computeLikelihoodBranchNaive((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root);
-	else {
-//		switch (phylo_tree->aln->num_states) {
-//		case 4: phylo_tree->computeLikelihoodBranchEigen<4>((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); break;
-//		case 20: phylo_tree->computeLikelihoodBranchEigen<20>((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); break;
-//		case 2: phylo_tree->computeLikelihoodBranchEigen<2>((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); break;
-//		case 64: phylo_tree->computeLikelihoodBranchEigen<64>((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); break;
-//		default: outError("Option unsupported yet for this sequence type. Contact author if you really need it."); break;
-//		}
-        phylo_tree->computeLikelihoodBranchEigen((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root);
-	}
+
+	phylo_tree->computePatternLhCat(WSL_RATECAT);
 
 	int npattern = phylo_tree->aln->getNPattern();
 	pattern_rates.resize(npattern);
diff --git a/model/rategamma.h b/model/rategamma.h
index 0cf4bd7..df55149 100644
--- a/model/rategamma.h
+++ b/model/rategamma.h
@@ -28,6 +28,9 @@ const double MIN_GAMMA_SHAPE = 0.02;
 const double MAX_GAMMA_SHAPE = 1000.0;
 const double TOL_GAMMA_SHAPE = 0.001;
 
+const int GAMMA_CUT_MEDIAN = 1; // 2 discrete Gamma approximations (mean or median) of Yang 1994
+const int GAMMA_CUT_MEAN   = 2;
+
 class PhyloTree;
 /**
 Discrete gamma distributed site-rate model from Yang 1994
@@ -56,7 +59,10 @@ public:
 	/**
 		@return true if this is a Gamma model (default: false)
 	*/	
-    virtual bool isGammaRate() { return true; }
+    virtual int isGammaRate() { 
+        if (cut_median) return GAMMA_CUT_MEDIAN; 
+        return GAMMA_CUT_MEAN;
+    }
 
 	virtual double getGammaShape() { return gamma_shape; }
 
@@ -69,8 +75,9 @@ public:
 
 	/**
 		@return TRUE to use median rate for discrete categories, FALSE to use mean rate instead
+        OBSOLETE, see isGammaRate()
 	*/
-	bool isCutMedian() { return cut_median; }
+//	bool isCutMedian() { return cut_median; }
 
 	/**
 		@return the number of rate categories
@@ -192,8 +199,9 @@ protected:
 		this function is served for the multi-dimension optimization. It should assign the model parameters
 		from a vector of variables that is index from 1 (NOTE: not from 0)
 		@param variables vector of variables, indexed from 1
+		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
 	*/
-	virtual void getVariables(double *variables);
+	virtual bool getVariables(double *variables);
 
 	/**
 		number of rate categories
diff --git a/model/rategammainvar.cpp b/model/rategammainvar.cpp
index 6995366..cd36744 100644
--- a/model/rategammainvar.cpp
+++ b/model/rategammainvar.cpp
@@ -25,6 +25,7 @@ RateGammaInvar::RateGammaInvar(int ncat, double shape, bool median,
 	name = "+I" + name;
 	full_name = "Invar+" + full_name;
 	joint_optimize = simultaneous;
+    cur_optimize = 0;
 	computeRates();
 }
 
@@ -66,10 +67,11 @@ void RateGammaInvar::setVariables(double *variables) {
 	RateInvar::setVariables(variables+gid);
 }
 
-void RateGammaInvar::getVariables(double *variables) {
+bool RateGammaInvar::getVariables(double *variables) {
 	int gid = RateGamma::getNDim();
-	RateGamma::getVariables(variables);
-	RateInvar::getVariables(variables+gid);
+	bool changed = RateGamma::getVariables(variables);
+	changed |= RateInvar::getVariables(variables+gid);
+    return changed;
 }
 
 double RateGammaInvar::targetFunk(double x[]) {
@@ -97,11 +99,6 @@ double RateGammaInvar::optimizeParameters(double gradient_epsilon) {
 
 	if (!joint_optimize) {
 //		double lh = phylo_tree->computeLikelihood();
-		cur_optimize = 1;
-		double invar_lh = -DBL_MAX;
-        invar_lh = RateInvar::optimizeParameters(gradient_epsilon);
-//		assert(tree_lh >= lh-0.1);
-//		lh = tree_lh;
 		cur_optimize = 0;
 		double gamma_lh;
 		if (Params::getInstance().testAlpha) {
@@ -109,9 +106,17 @@ double RateGammaInvar::optimizeParameters(double gradient_epsilon) {
 		} else {
             gamma_lh = RateGamma::optimizeParameters(gradient_epsilon);
         }
-		assert(gamma_lh >= invar_lh - 0.1);
+		cur_optimize = 1;
+		double invar_lh = -DBL_MAX;
+        invar_lh = RateInvar::optimizeParameters(gradient_epsilon);
+//		assert(tree_lh >= lh-0.1);
+//		lh = tree_lh;
+
+//		assert(gamma_lh >= invar_lh - 0.1);
 		phylo_tree->clearAllPartialLH();
-		return gamma_lh;
+//		return gamma_lh;
+        cur_optimize = 0;
+        return invar_lh;
 	}
 
 	if (verbose_mode >= VB_MAX)
@@ -146,19 +151,8 @@ double RateGammaInvar::optimizeParameters(double gradient_epsilon) {
 
 int RateGammaInvar::computePatternRates(DoubleVector &pattern_rates, IntVector &pattern_cat) {
 	//cout << "Computing Gamma site rates by empirical Bayes..." << endl;
-//	double *ptn_rates = new double[npattern];
-	if (phylo_tree->sse == LK_NORMAL || phylo_tree->sse == LK_SSE)
-		phylo_tree->computeLikelihoodBranchNaive((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root);
-	else {
-//		switch (phylo_tree->aln->num_states) {
-//		case 4: phylo_tree->computeLikelihoodBranchEigen<4>((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); break;
-//		case 20: phylo_tree->computeLikelihoodBranchEigen<20>((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); break;
-//		case 2: phylo_tree->computeLikelihoodBranchEigen<2>((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); break;
-//		case 64: phylo_tree->computeLikelihoodBranchEigen<64>((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); break;
-//		default: outError("Option unsupported yet for this sequence type. Contact author if you really need it."); break;
-//		}
-        phylo_tree->computeLikelihoodBranchEigen((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root);
-	}
+
+	phylo_tree->computePatternLhCat(WSL_RATECAT);
 
 	int npattern = phylo_tree->aln->getNPattern();
 	pattern_rates.resize(npattern);
diff --git a/model/rategammainvar.h b/model/rategammainvar.h
index ef6e53f..16b6901 100644
--- a/model/rategammainvar.h
+++ b/model/rategammainvar.h
@@ -126,8 +126,9 @@ protected:
 		this function is served for the multi-dimension optimization. It should assign the model parameters
 		from a vector of variables that is index from 1 (NOTE: not from 0)
 		@param variables vector of variables, indexed from 1
+		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
 	*/
-	virtual void getVariables(double *variables);
+	virtual bool getVariables(double *variables);
 
 private:
 
diff --git a/model/rateheterogeneity.h b/model/rateheterogeneity.h
index 235c956..596038e 100644
--- a/model/rateheterogeneity.h
+++ b/model/rateheterogeneity.h
@@ -130,15 +130,27 @@ public:
 	virtual double getPInvar() { return 0.0; }
 
 	/**
+		set the proportion of invariable sites. Default: do nothing
+		@param pinv the proportion of invariable sites
+	*/
+	virtual void setPInvar(double pinv) { }
+
+	/**
+		Set whether or not to optimize p_invar
+		@param opt TRUE to optimize p_invar, FALSE otherwise
+	*/
+	virtual void setOptimizePInvar(bool opt) { }
+
+	/**
 		get the Gamma shape. Default returns 0.0 since it is homogeneous model
 		@return Gamma shape
 	*/	
 	virtual double getGammaShape() { return 0.0; }
 
 	/**
-		@return true if this is a Gamma model (default: false)
+		@return >0 if this is a Gamma model (default: 0)
 	*/	
-    virtual bool isGammaRate() { return false; }
+    virtual int isGammaRate() { return 0; }
 
 	/**
 		the target function which needs to be optimized
@@ -239,8 +251,9 @@ protected:
 		this function is served for the multi-dimension optimization. It should assign the model parameters
 		from a vector of variables that is index from 1 (NOTE: not from 0)
 		@param variables vector of variables, indexed from 1
+		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
 	*/
-	virtual void getVariables(double *variables) {}
+	virtual bool getVariables(double *variables) { return false; }
 
 	
 };
diff --git a/model/rateinvar.cpp b/model/rateinvar.cpp
index 67ab1b8..d0fea17 100644
--- a/model/rateinvar.cpp
+++ b/model/rateinvar.cpp
@@ -28,6 +28,7 @@ RateInvar::RateInvar(double p_invar_sites, PhyloTree *tree)
 	else
 		p_invar = MIN_PINVAR;
 	fix_p_invar = false;
+    optimize_p_invar = true;
 	phylo_tree = tree;
 	name = "+I";
 	full_name = "Invar";
@@ -66,7 +67,7 @@ void RateInvar::setBounds(double *lower_bound, double *upper_bound, bool *bound_
 }
 
 double RateInvar::optimizeParameters(double gradient_epsilon) {
-	if (fix_p_invar)
+	if (fix_p_invar || !optimize_p_invar)
 		return -computeFunction(p_invar);
 	if (verbose_mode >= VB_MAX)
 		cout << "Optimizing proportion of invariable sites..." << endl;
@@ -76,7 +77,8 @@ double RateInvar::optimizeParameters(double gradient_epsilon) {
 	//p_invar = minimizeOneDimen(MIN_PINVAR, p_invar, 1.0 - MIN_PINVAR, TOL_PINVAR, &negative_lh, &ferror);
     phylo_tree->clearAllPartialLH();
 	phylo_tree->computePtnInvar();
-	return -negative_lh;
+//	return -negative_lh;
+    return phylo_tree->computeLikelihood();
 }
 
 void RateInvar::writeInfo(ostream &out) {
@@ -92,7 +94,9 @@ void RateInvar::setVariables(double *variables) {
 	variables[1] = p_invar;
 }
 
-void RateInvar::getVariables(double *variables) {
-	if (RateInvar::getNDim() == 0) return;
+bool RateInvar::getVariables(double *variables) {
+	if (RateInvar::getNDim() == 0) return false;
+    bool changed = (p_invar != variables[1]);
 	p_invar = variables[1];
+    return changed;
 }
diff --git a/model/rateinvar.h b/model/rateinvar.h
index 54ff1bc..b20a515 100644
--- a/model/rateinvar.h
+++ b/model/rateinvar.h
@@ -119,11 +119,22 @@ public:
 		fix_p_invar = fixPInvar;
 	}
 
-	void setPInvar(double pInvar) {
+
+	/**
+		set the proportion of invariable sites. Default: do nothing
+		@param pinv the proportion of invariable sites
+	*/
+	virtual void setPInvar(double pInvar) {
 		p_invar = pInvar;
 	}
 
 	/**
+		Set whether or not to optimize p_invar
+		@param opt TRUE to optimize p_invar, FALSE otherwise
+	*/
+	virtual void setOptimizePInvar(bool opt) { optimize_p_invar = opt; }
+
+	/**
 		proportion of invariable sites
 	*/
 	double p_invar;
@@ -133,6 +144,11 @@ public:
 	*/
 	bool fix_p_invar;
 
+    /**
+        TRUE to optimize p_invar (if not fixed), FALSE otherwise (e.g. in case of mixture model)
+    */
+    bool optimize_p_invar;
+
 protected:
 
 	/**
@@ -146,8 +162,9 @@ protected:
 		this function is served for the multi-dimension optimization. It should assign the model parameters
 		from a vector of variables that is index from 1 (NOTE: not from 0)
 		@param variables vector of variables, indexed from 1
+		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
 	*/
-	virtual void getVariables(double *variables);
+	virtual bool getVariables(double *variables);
 
 };
 
diff --git a/model/ratekategory.cpp b/model/ratekategory.cpp
index b84c7ea..fe20122 100644
--- a/model/ratekategory.cpp
+++ b/model/ratekategory.cpp
@@ -98,17 +98,8 @@ double RateKategory::optimizeParameters(double gradient_epsilon)
 int RateKategory::computePatternRates(DoubleVector& pattern_rates, IntVector& pattern_cat)
 {
 	cout << "Computing site rates by empirical Bayes..." << endl;
-	if (phylo_tree->sse == LK_NORMAL || phylo_tree->sse == LK_SSE)
-		phylo_tree->computeLikelihoodBranchNaive((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root);
-	else {
-//		switch (phylo_tree->aln->num_states) {
-//		case 4: phylo_tree->computeLikelihoodBranchEigen<4>((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); break;
-//		case 20: phylo_tree->computeLikelihoodBranchEigen<20>((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); break;
-//		case 2: phylo_tree->computeLikelihoodBranchEigen<2>((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root); break;
-//		default: outError("Option unsupported yet for this sequence type. Contact author if you really need it."); break;
-//		}
-        phylo_tree->computeLikelihoodBranchEigen((PhyloNeighbor*)phylo_tree->root->neighbors[0], (PhyloNode*)phylo_tree->root);
-	}
+
+	phylo_tree->computePatternLhCat(WSL_RATECAT);
 
 	int npattern = phylo_tree->aln->getNPattern();
 	pattern_rates.resize(npattern);
@@ -145,11 +136,12 @@ int RateKategory::computePatternRates(DoubleVector& pattern_rates, IntVector& pa
 //	delete [] ptn_rates;
 }
 
-void RateKategory::getVariables(double* variables)
+bool RateKategory::getVariables(double* variables)
 {
-	if (ncategory == 1) return;
+	if (ncategory == 1) return false;
+    bool changed = (rates[0] != 1.0);
 	rates[0] = 1.0;
-	memcpy(rates, variables+1, (ncategory-1) * sizeof(double));
+	changed |= memcmpcpy(rates, variables+1, (ncategory-1) * sizeof(double));
 	double sum = 0.0;
 	int i;
 	for (i = 0; i < ncategory-1; i++) 
@@ -157,7 +149,9 @@ void RateKategory::getVariables(double* variables)
 	/*
 	for (i = 0; i < ncategory; i++) 
 		rates[i] = rates[i]*ncategory/sum;*/
+    changed |= (rates[ncategory-1] != ncategory - sum);
 	rates[ncategory-1] = ncategory - sum;
+    return changed;
 }
 
 void RateKategory::setVariables(double* variables)
diff --git a/model/ratekategory.h b/model/ratekategory.h
index f841e38..4289dd3 100644
--- a/model/ratekategory.h
+++ b/model/ratekategory.h
@@ -124,8 +124,9 @@ protected:
 		this function is served for the multi-dimension optimization. It should assign the model parameters 
 		from a vector of variables that is index from 1 (NOTE: not from 0)
 		@param variables vector of variables, indexed from 1
+		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
 	*/
-	virtual void getVariables(double *variables);
+	virtual bool getVariables(double *variables);
 
 };
 
diff --git a/msetsblock.cpp b/msetsblock.cpp
index e8693e0..839467e 100644
--- a/msetsblock.cpp
+++ b/msetsblock.cpp
@@ -33,6 +33,11 @@ MSetsBlock::~MSetsBlock()
 		delete *it;
 	}
 	sets.clear();
+
+    for (vector<CharSet* >::reverse_iterator it2 = charsets.rbegin(); it2 != charsets.rend(); it2++)
+        delete *it2;
+        
+    charsets.clear();
 }
 
 
diff --git a/mtree.cpp b/mtree.cpp
index 78813b1..0a74fa2 100644
--- a/mtree.cpp
+++ b/mtree.cpp
@@ -33,7 +33,10 @@ MTree::MTree() {
     leafNum = 0;
     nodeNum = 0;
     rooted = false;
-    num_precision = 6;
+    if (Params::getInstance().min_branch_length <= 0)
+        num_precision = 6;
+    else
+        num_precision = max((int)ceil(-log10(Params::getInstance().min_branch_length))+1, 6);
     len_scale = 1.0;
 	fig_char = "|-+++";
 }
@@ -44,7 +47,10 @@ MTree::MTree(const char *userTreeFile, bool &is_rooted)
 }
 
 void MTree::init(const char *userTreeFile, bool &is_rooted) {
-    num_precision = 10;
+    if (Params::getInstance().min_branch_length <= 0)
+        num_precision = 6;
+    else
+        num_precision = max((int)ceil(-log10(Params::getInstance().min_branch_length))+1, 6);
     len_scale = 1.0;
     readTree(userTreeFile, is_rooted);
     //printInfo();
@@ -296,7 +302,7 @@ int MTree::printTree(ostream &out, int brtype, Node *node, Node *dad)
 
         if (brtype & WT_BR_LEN) {
         	out.setf( std::ios::fixed, std:: ios::floatfield ); // some sofware does handle number format like '1.234e-6'
-            out.precision(10); // increase precision to avoid zero branch (like in RAxML)
+//            out.precision(10); // increase precision to avoid zero branch (like in RAxML)
         	double len = node->neighbors[0]->length;
             if (brtype & WT_BR_SCALE) len *= len_scale;
             if (brtype & WT_BR_LEN_ROUNDING) len = round(len);
@@ -580,8 +586,8 @@ void MTree::initializeTree(Node *node, Node* dad)
 void MTree::parseFile(istream &infile, char &ch, Node* &root, double &branch_len)
 {
     Node *node;
-    int maxlen = 10000;
-    char seqname[10000];
+    int maxlen = 1000;
+    string seqname;
     int seqlen;
     double brlen;
     branch_len = -1.0;
@@ -618,25 +624,30 @@ void MTree::parseFile(istream &infile, char &ch, Node* &root, double &branch_len
     seqlen = 0;
     char end_ch = 0;
     if (ch == '\'' || ch == '"') end_ch = ch;
+    seqname = "";
 
     while (!infile.eof() && seqlen < maxlen)
     {
         if (end_ch == 0) {
             if (is_newick_token(ch) || controlchar(ch)) break;
         }
-        seqname[seqlen++] = ch;
+        seqname += ch;
+        seqlen++;
+//        seqname[seqlen++] = ch;
         ch = infile.get();
         in_column++;
         if (end_ch != 0 && ch == end_ch) {
-            seqname[seqlen++] = ch;
+            seqname += ch;
+            seqlen++;
+//            seqname[seqlen++] = ch;
             break;
         }
     }
     if ((controlchar(ch) || ch == '[' || ch == end_ch) && !infile.eof())
         ch = readNextChar(infile, ch);
     if (seqlen == maxlen)
-        throw "Too long name ( > 100)";
-    seqname[seqlen] = 0;
+        throw "Too long name ( > 1000)";
+//    seqname[seqlen] = 0;
     if (seqlen == 0 && root->isLeaf())
         throw "A taxon has no name.";
     if (seqlen > 0)
@@ -655,9 +666,11 @@ void MTree::parseFile(istream &infile, char &ch, Node* &root, double &branch_len
     {
         ch = readNextChar(infile);
         seqlen = 0;
+        seqname = "";
         while (!is_newick_token(ch) && !controlchar(ch) && !infile.eof() && seqlen < maxlen)
         {
-            seqname[seqlen] = ch;
+//            seqname[seqlen] = ch;
+            seqname += ch;
             seqlen++;
             ch = infile.get();
             in_column++;
@@ -666,8 +679,8 @@ void MTree::parseFile(istream &infile, char &ch, Node* &root, double &branch_len
             ch = readNextChar(infile, ch);
         if (seqlen == maxlen || infile.eof())
             throw "branch length format error.";
-        seqname[seqlen] = 0;
-        branch_len = convert_double(seqname);
+//        seqname[seqlen] = 0;
+        branch_len = convert_double(seqname.c_str());
     }
 }
 
@@ -2035,3 +2048,69 @@ void MTree::removeTaxa(StrVector &taxa_names) {
 	leafNum = taxa.size();
 	initializeTree();
 }
+
+Node *MTree::findFarthestLeaf(Node *node, Node *dad) {
+    if (!node) 
+        node = root;
+    
+    if (dad && node->isLeaf()) {
+        node->height = 0.0;
+        return node;
+    }
+    Node *res = NULL;
+    node->height = 0.0;
+    FOR_NEIGHBOR_IT(node, dad, it) {
+        Node *leaf = findFarthestLeaf((*it)->node, node);
+        if (node->height < (*it)->node->height+1) {
+            node->height = (*it)->node->height+1;
+            res = leaf;
+        }
+    }
+    return res;
+}
+
+//void MTree::sortNeighborBySubtreeSize(Node *node, Node *dad) {
+//    if (dad && node->isLeaf()) {
+//        node->height = 0.0;
+//        return;
+//    }
+//    
+//    node->height = 0.0;
+//    FOR_NEIGHBOR_DECLARE(node, dad, it) {
+//        sortNeighborBySubtreeSize((*it)->node, node);
+//        if (node->height < (*it)->node->height+1)
+//            node->height = (*it)->node->height+1;
+//    }
+//    
+//    // sort neighbors in ascending order of tree height
+//    FOR_NEIGHBOR(node, dad, it)
+//        for (NeighborVec::iterator it2 = it+1; it2 != node->neighbors.end(); it2++)
+//            if ((*it)->node != dad && (*it)->node->height > (*it2)->node->height) {
+//                Neighbor *nei;
+//                nei = *it;
+//                *it = *it2;
+//                *it2 = nei;
+//            }
+//}
+
+void MTree::getPreOrderBranches(NodeVector &nodes, NodeVector &nodes2, Node *node, Node *dad) {
+    if (dad) {
+        nodes.push_back(node);
+        nodes2.push_back(dad);
+    }
+
+    NeighborVec neivec = node->neighbors;
+    NeighborVec::iterator i1, i2;
+    for (i1 = neivec.begin(); i1 != neivec.end(); i1++)
+        for (i2 = i1+1; i2 != neivec.end(); i2++)
+            if ((*i1)->node->height > (*i2)->node->height) {
+                Neighbor *nei = *i1;
+                *i1 = *i2;
+                *i2 = nei;
+            }
+    for (i1 = neivec.begin(); i1 != neivec.end(); i1++)
+        if ((*i1)->node != dad)
+            getPreOrderBranches(nodes, nodes2, (*i1)->node, node);
+//    FOR_NEIGHBOR_IT(node, dad, it) 
+//        getPreOrderBranches(nodes, nodes2, (*it)->node, node);
+}
diff --git a/mtree.h b/mtree.h
index 2a094c0..6cad508 100644
--- a/mtree.h
+++ b/mtree.h
@@ -606,6 +606,26 @@ public:
 	Node *findFirstTaxon(Node *node = NULL, Node *dad = NULL);
 
 	/********************************************************
+            TREE TRAVERSAL
+     ********************************************************/
+
+    /** 
+        @return the leaf farthest from the node within the subtree rooted at node 
+        @param node the starting node, NULL to start from the root
+        @param dad dad of the node, used to direct the search
+    */
+    Node *findFarthestLeaf(Node *node = NULL, Node *dad = NULL);
+        
+    /** 
+        @get pre-order branches going into swallow subtrees first
+        @param nodes one endpoint of branch
+        @params nodes2 other endpoint of branch
+        @param node the starting node, NULL to start from the root
+        @param dad dad of the node, used to direct the search
+    */
+    void getPreOrderBranches(NodeVector &nodes, NodeVector &nodes2, Node *node, Node *dad = NULL);
+
+	/********************************************************
             PROPERTIES OF TREE
      ********************************************************/
     /**
diff --git a/mtreeset.cpp b/mtreeset.cpp
index 307ebff..01c1926 100644
--- a/mtreeset.cpp
+++ b/mtreeset.cpp
@@ -104,6 +104,35 @@ void MTreeSet::init(StringIntMap &treels, bool &is_rooted, IntVector &weights) {
 	//tree_weights.resize(size(), 1);
 }
 
+void MTreeSet::init(StrVector &treels, bool &is_rooted) {
+	//resize(treels.size(), NULL);
+	int count = 0;
+	//IntVector ok_trees;
+	//ok_trees.resize(treels.size(), 0);
+	//for (i = 0; i < trees_id.size(); i++) ok_trees[trees_id[i]] = 1;
+
+	for (StrVector::iterator it = treels.begin(); it != treels.end(); it++)
+	{
+		count++;
+		MTree *tree = newTree();
+		stringstream ss(*it);
+		bool myrooted = is_rooted;
+		tree->readTree(ss, myrooted);
+		NodeVector taxa;
+		tree->getTaxa(taxa);
+		for (NodeVector::iterator taxit = taxa.begin(); taxit != taxa.end(); taxit++)
+			(*taxit)->id = atoi((*taxit)->name.c_str());
+		//at(it->second) = tree;
+		push_back(tree);
+		tree_weights.push_back(1);
+		//cout << "Tree " << it->second << ": ";
+		//tree->printTree(cout, WT_NEWLINE);
+	}
+	if (verbose_mode >= VB_MED)
+		cout << count << " tree(s) converted" << endl;
+	//tree_weights.resize(size(), 1);
+}
+
 void MTreeSet::init(vector<string> &trees, vector<string> &taxonNames, bool &is_rooted) {
 	int count = 0;
 	for (vector<string>::iterator it = trees.begin(); it != trees.end(); it++) {
diff --git a/mtreeset.h b/mtreeset.h
index 0969720..37f8458 100644
--- a/mtreeset.h
+++ b/mtreeset.h
@@ -58,6 +58,8 @@ public:
 
 	void init(StringIntMap &treels, bool &is_rooted, IntVector &weights);
 
+	void init(StrVector &treels, bool &is_rooted);
+
 	/**
 	 *  Add trees from \a trees to the tree set
 	 *
diff --git a/ncl/nxstoken.h b/ncl/nxstoken.h
index f534fae..e1d4cc4 100644
--- a/ncl/nxstoken.h
+++ b/ncl/nxstoken.h
@@ -191,7 +191,7 @@ inline char NxsToken::GetNextChar()
 		fileline++;
 		filecol = 1L;
 
-		if (ch == 13 && (int)in.peek() == 10)
+		if (ch == 13 && (int)in.peek() == 10) 
 			ch = in.get();
 
 		atEOL = 1;
@@ -207,7 +207,9 @@ inline char NxsToken::GetNextChar()
 #	if defined(__DECCXX)
 		filepos = 0L;
 #	else
-		filepos = in.tellg();
+    // BQM this cause crash compiling with clang under Windows!
+//		filepos = in.tellg();
+    filepos += 1;
 #	endif
 
 	if (atEOF)
diff --git a/ngs.cpp b/ngs.cpp
index 801af40..b8be2af 100644
--- a/ngs.cpp
+++ b/ngs.cpp
@@ -321,13 +321,14 @@ void NGSRateCat::setVariables(double *variables) {
     memcpy(variables+ncategory+1, proportion, (ncategory-1)*sizeof(double));
 }
 
-void NGSRateCat::getVariables(double *variables) {
-    memcpy(rates, variables+1, ncategory * sizeof(double));
-    memcpy(proportion, variables+ncategory+1, (ncategory-1)*sizeof(double));
+bool NGSRateCat::getVariables(double *variables) {
+    bool changed = memcmpcpy(rates, variables+1, ncategory * sizeof(double));
+    changed |= memcmpcpy(proportion, variables+ncategory+1, (ncategory-1)*sizeof(double));
     double sum = 0.0;
     for (int i = 0; i < ncategory-1; i++)
         sum += proportion[i];
     proportion[ncategory-1] = 1.0 - sum;
+    return changed;
 }
 
 
diff --git a/ngs.h b/ngs.h
index 458732b..89ac89d 100644
--- a/ngs.h
+++ b/ngs.h
@@ -256,8 +256,9 @@ protected:
 		this function is served for the multi-dimension optimization. It should assign the model parameters 
 		from a vector of variables that is index from 1 (NOTE: not from 0)
 		@param variables vector of variables, indexed from 1
+		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
 	*/
-	virtual void getVariables(double *variables);
+	virtual bool getVariables(double *variables);
 };
 
 
diff --git a/optimization.cpp b/optimization.cpp
index 40fd5cb..22c21e9 100644
--- a/optimization.cpp
+++ b/optimization.cpp
@@ -300,8 +300,9 @@ double Optimization::minimizeOneDimen(double xmin, double xguess, double xmax, d
 	if (cx > xmax) cx = xmax;
 	
 	/* check if this works */
-	fa = computeFunction(ax);
+    // compute fb first to save some computation, if any
 	fb = computeFunction(bx);
+	fa = computeFunction(ax);
 	fc = computeFunction(cx);
 
 	/* if it works use these borders else be conservative */
@@ -730,6 +731,7 @@ double Optimization::derivativeFunk(double x[], double dfx[]) {
 	double *h = new double[ndim+1];
     double temp;
     int dim;
+	double fx = targetFunk(x);
 	for (dim = 1; dim <= ndim; dim++ ){
 		temp = x[dim];
 		h[dim] = ERROR_X * fabs(temp);
@@ -739,7 +741,6 @@ double Optimization::derivativeFunk(double x[], double dfx[]) {
 		dfx[dim] = (targetFunk(x));
 		x[dim] = temp;
 	}
-	double fx = targetFunk(x);
 	for (dim = 1; dim <= ndim; dim++ )
         dfx[dim] = (dfx[dim] - fx) / h[dim];
     delete [] h;
diff --git a/optimization.h b/optimization.h
index 8fbeb39..e3c8fb2 100644
--- a/optimization.h
+++ b/optimization.h
@@ -141,7 +141,7 @@ public:
      @return minimized function value
      After the function is invoked, the values of x will be updated
     */
-    double L_BFGS_B(int nvar, double* vars, double* lower, double* upper, double pgtol = 1e-5, int maxit = 1000);
+    double L_BFGS_B(int nvar, double* vars, double* lower, double* upper, double pgtol = 1e-5, int maxit = 5); // changed maxit 1000 -> 5 by Thomas on Sept 11, 15
 
     /** internal function called by L_BFGS_B
         should return function value 
diff --git a/pda.cpp b/pda.cpp
index c7e9b81..cbb0aa0 100644
--- a/pda.cpp
+++ b/pda.cpp
@@ -2086,6 +2086,24 @@ void processECOpd(Params &params) {
 	}
 }
 
+void collapseLowBranchSupport(char *user_file, char *split_threshold_str) {
+    DoubleVector minsup;
+    convert_double_vec(split_threshold_str, minsup, '/');
+    if (minsup.empty())
+        outError("wrong -minsupnew argument, please use back-slash separated string");
+    MExtTree tree;
+    bool isrooted = false;
+    tree.readTree(user_file, isrooted);
+    tree.collapseLowBranchSupport(minsup);
+    tree.collapseZeroBranches();
+    if (verbose_mode >= VB_MED)
+        tree.drawTree(cout);
+    string outfile = (string)user_file + ".collapsed";
+    tree.printTree(outfile.c_str());
+    cout << "Tree with collapsed branches written to " << outfile << endl;
+}
+
+
 /********************************************************
 	main function
 ********************************************************/
@@ -2363,6 +2381,9 @@ int main(int argc, char *argv[])
 			/**MINH ANH: for some comparison*/
 			case COMPARE: compare(Params::getInstance()); break; //MA
 		}
+    } else if (Params::getInstance().split_threshold_str) {
+        // for Ricardo: keep those splits from input tree above given support threshold
+        collapseLowBranchSupport(Params::getInstance().user_file, Params::getInstance().split_threshold_str);
 	} else {
 		Params::getInstance().intype = detectInputFile(Params::getInstance().user_file);
 		if (Params::getInstance().intype == IN_NEWICK && Params::getInstance().pdtaxa_file && Params::getInstance().tree_gen == NONE) {
diff --git a/pdnetwork.cpp b/pdnetwork.cpp
index 6f679cd..6b1127a 100644
--- a/pdnetwork.cpp
+++ b/pdnetwork.cpp
@@ -879,7 +879,7 @@ void PDNetwork::findPD_LP(Params &params, vector<SplitSet> &taxa_set) {
 		taxa_set[index].push_back(pd_set);
 	}
 	cout << endl;
-	delete variables;	
+	delete [] variables;	
 }
 
 void PDNetwork::transformLP_Area2(Params &params, const char *outfile, int total_size, bool make_bin) {
@@ -988,7 +988,7 @@ double PDNetwork::findMinKArea_LP(Params &params, const char* filename, double p
 	} else {
 		budget_k = area.countTaxa();
 	}
-	delete variables;
+	delete [] variables;
 	return budget_k;
 }
 
diff --git a/phyloanalysis.cpp b/phyloanalysis.cpp
index e92b5ba..4f1e5d4 100644
--- a/phyloanalysis.cpp
+++ b/phyloanalysis.cpp
@@ -48,7 +48,7 @@
 #include "whtest_wrapper.h"
 #include "model/partitionmodel.h"
 #include "model/modelmixture.h"
-#include "guidedbootstrap.h"
+//#include "guidedbootstrap.h"
 #include "model/modelset.h"
 #include "timeutil.h"
 #include "upperbounds.h"
@@ -276,8 +276,8 @@ void reportModel(ofstream &out, PhyloTree &tree) {
 	int i;
 
 	if (tree.getModel()->isMixture()) {
-		out << "Mixture model of substitution: " << tree.params->model_name << endl;
-		out << "Full name: " << tree.getModelName() << endl;
+		out << "Mixture model of substitution: " << tree.getModelName() << endl;
+//		out << "Full name: " << tree.getModelName() << endl;
 		ModelMixture *mmodel = (ModelMixture*) tree.getModel();
 		out << endl << "  No  Component      Rate    Weight   Parameters" << endl;
 		i = 0;
@@ -332,7 +332,7 @@ void reportRate(ofstream &out, PhyloTree &tree) {
 			out << endl;
 		}
 		if (rate_model->isGammaRate()) {
-			out << "Relative rates are computed as " << ((dynamic_cast<RateGamma*>(rate_model)->isCutMedian()) ? "MEDIAN" : "MEAN") <<
+			out << "Relative rates are computed as " << ((rate_model->isGammaRate() == GAMMA_CUT_MEDIAN) ? "MEDIAN" : "MEAN") <<
 				" of the portion of the Gamma distribution falling in the category." << endl;
 		}
 	}
@@ -350,8 +350,10 @@ void reportTree(ofstream &out, Params &params, PhyloTree &tree, double tree_lh,
 	double AIC_score, AICc_score, BIC_score;
 	computeInformationScores(tree_lh, df, ssize, AIC_score, AICc_score, BIC_score);
     
-	out << "Log-likelihood of the tree: " << fixed << tree_lh << " (s.e. "
-			<< sqrt(lh_variance) << ")" << endl;
+	out << "Log-likelihood of the tree: " << fixed << tree_lh;
+    if (lh_variance > 0.0) 
+        out << " (s.e. " << sqrt(lh_variance) << ")";
+    out << endl;
     out	<< "Unconstrained log-likelihood (without tree): " << tree.aln->computeUnconstrainedLogL() << endl;
 
     out << "Number of free parameters (#branches + #model parameters): " << df << endl;
@@ -434,11 +436,11 @@ void reportTree(ofstream &out, Params &params, PhyloTree &tree, double tree_lh,
 		out << "         Such branches are denoted by '**' in the figure below"
 				<< endl << endl;
 	}
-	int long_branches = tree.countLongBranches(NULL, NULL, MAX_BRANCH_LEN-0.2);
+	int long_branches = tree.countLongBranches(NULL, NULL, params.max_branch_length-0.2);
 	if (long_branches > 0) {
 		//stringstream sstr;
 		out << "WARNING: " << long_branches << " too long branches (>" 
-            << MAX_BRANCH_LEN-0.2 << ") should be treated with caution!" << endl;
+            << params.max_branch_length-0.2 << ") should be treated with caution!" << endl;
 		//out << sstr.str();
 		//cout << sstr.str();
 	}
@@ -468,21 +470,25 @@ void reportTree(ofstream &out, Params &params, PhyloTree &tree, double tree_lh,
     if (params.aLRT_replicates > 0 || params.gbo_replicates || (params.num_bootstrap_samples && params.compute_ml_tree)) {
         out << "Numbers in parentheses are ";
         if (params.aLRT_replicates > 0) {
-            out << "SH-aLRT supports";
+            out << "SH-aLRT support (%)";
             if (params.localbp_replicates)
-                out << " / local bootstrap (LBP)";
+                out << " / local bootstrap support (%)";
         }
+        if (params.aLRT_test)
+            out << " / parametric aLRT support";
+        if (params.aBayes_test)
+            out << " / aBayes support";
         if (params.num_bootstrap_samples && params.compute_ml_tree) {
-            if (params.aLRT_replicates > 0)
+            if (params.aLRT_replicates > 0 || params.aLRT_test || params.aBayes_test)
                 out << " /";
-            out << " standard bootstrap supports";
+            out << " standard bootstrap support (%)";
         }
         if (params.gbo_replicates) {
-            if (params.aLRT_replicates > 0)
+            if (params.aLRT_replicates > 0 || params.aLRT_test || params.aBayes_test)
                 out << " /";
-            out << " ultrafast bootstrap supports";
+            out << " ultrafast bootstrap support (%)";
         }
-        out << " (%)" << endl;
+        out << endl;
     }
     out << endl;
 
@@ -750,7 +756,7 @@ void reportPhyloAnalysis(Params &params, string &original_model,
 			tree.setRootNode(params.root);
             
             if (params.gbo_replicates) {
-                if (tree.boot_consense_logl > tree.candidateTrees.getBestScore()) {
+                if (tree.boot_consense_logl > tree.candidateTrees.getBestScore() + 0.1) {
                     out << endl << "**NOTE**: Consensus tree has higher likelihood than ML tree found! Please use consensus tree below." << endl;
                 }
             }
@@ -779,7 +785,8 @@ void reportPhyloAnalysis(Params &params, string &original_model,
 						root_name = (*it)->aln->getSeqName(0);
 					(*it)->root = (*it)->findNodeName(root_name);
 					assert((*it)->root);
-					reportTree(out, params, *(*it), (*it)->computeLikelihood(), (*it)->computeLogLVariance(), false);
+//					reportTree(out, params, *(*it), (*it)->computeLikelihood(), (*it)->computeLogLVariance(), false);
+					reportTree(out, params, *(*it), stree->part_info[part].cur_score, 0.0, false);
 				}
 			}
 
@@ -1072,7 +1079,7 @@ void checkZeroDist(Alignment *aln, double *dist) {
 		string str = "";
 		bool first = true;
 		for (j = i + 1; j < ntaxa; j++)
-			if (dist[i * ntaxa + j] <= 1e-6) {
+			if (dist[i * ntaxa + j] <= Params::getInstance().min_branch_length) {
 				if (first)
 					str = "ZERO distance between sequences "
 							+ aln->getSeqName(i);
@@ -1211,7 +1218,7 @@ void computeInitialDist(Params &params, IQTree &iqtree, string &dist_file) {
 
 }
 
-void initializeParams(Params &params, IQTree &iqtree, vector<ModelInfo> &model_info) {
+void initializeParams(Params &params, IQTree &iqtree, vector<ModelInfo> &model_info, ModelsBlock *models_block) {
 //    iqtree.setCurScore(-DBL_MAX);
     bool test_only = params.model_name.find("ONLY") != string::npos;
     /* initialize substitution model */
@@ -1255,7 +1262,7 @@ void initializeParams(Params &params, IQTree &iqtree, vector<ModelInfo> &model_i
         fmodel.precision(4);
         fmodel << fixed;
 
-        params.model_name = testModel(params, &iqtree, model_info, fmodel, "", true);
+        params.model_name = testModel(params, &iqtree, model_info, fmodel, models_block, "", true);
         fmodel.close();
         params.startCPUTime = start_cpu_time;
         params.start_real_time = start_real_time;
@@ -1295,7 +1302,7 @@ void pruneTaxa(Params &params, IQTree &iqtree, double *pattern_lh, NodeVector &p
 		double curScore =  iqtree.getCurScore();
 		iqtree.computePatternLikelihood(pattern_lh, &curScore);
 		num_low_support = iqtree.testAllBranches(params.aLRT_threshold, curScore,
-				pattern_lh, params.aLRT_replicates, params.localbp_replicates);
+				pattern_lh, params.aLRT_replicates, params.localbp_replicates, params.aLRT_test, params.aBayes_test);
 		iqtree.printResultTree();
 		cout << "  " << getCPUTime() - mytime << " sec." << endl;
 		cout << num_low_support << " branches show low support values (<= " << params.aLRT_threshold << "%)" << endl;
@@ -1419,10 +1426,10 @@ void printMiscInfo(Params &params, IQTree &iqtree, double *pattern_lh) {
 	if (params.print_site_lh && !params.pll) {
 		string site_lh_file = params.out_prefix;
 		site_lh_file += ".sitelh";
-		if (params.print_site_lh == 1)
+		if (params.print_site_lh == WSL_SITE)
 			printSiteLh(site_lh_file.c_str(), &iqtree, pattern_lh);
 		else
-			printSiteLhCategory(site_lh_file.c_str(), &iqtree);
+			printSiteLhCategory(site_lh_file.c_str(), &iqtree, params.print_site_lh);
 	}
 
     if (params.print_site_posterior) {
@@ -1545,6 +1552,7 @@ void printFinalSearchInfo(Params &params, IQTree &iqtree, double search_cpu_time
 	params.run_time = (getCPUTime() - params.startCPUTime);
 	cout << endl;
 	cout << "Total number of iterations: " << iqtree.stop_rule.getCurIt() << endl;
+//    cout << "Total number of partial likelihood vector computations: " << iqtree.num_partial_lh_computations << endl;
 	cout << "CPU time used for tree search: " << search_cpu_time
 			<< " sec (" << convert_time(search_cpu_time) << ")" << endl;
 	cout << "Wall-clock time used for tree search: " << search_real_time
@@ -1597,7 +1605,7 @@ void runTreeReconstruction(Params &params, string &original_model, IQTree &iqtre
     }
 
     /***************** Initialization for PLL and sNNI ******************/
-    if (params.start_tree == STT_PLL_PARSIMONY || params.pll) {
+    if (params.start_tree == STT_PLL_PARSIMONY || params.start_tree == STT_RANDOM_TREE || params.pll) {
         /* Initialized all data structure for PLL*/
     	iqtree.initializePLL(params);
     }
@@ -1625,12 +1633,17 @@ void runTreeReconstruction(Params &params, string &original_model, IQTree &iqtre
 
    	// FOR TUNG: swapping the order cause bug for -m TESTLINK
 //    iqtree.initSettings(params);
-    initializeParams(params, iqtree, model_info);
+
+	ModelsBlock *models_block = readModelsDefinition(params);
+
+    initializeParams(params, iqtree, model_info, models_block);
     iqtree.initSettings(params);
 
     /*********************** INITIAL MODEL OPTIMIZATION *****************/
 
-    iqtree.initializeModel(params);
+    iqtree.initializeModel(params, models_block);
+
+    delete models_block;
 
     // UpperBounds analysis. Here, to analyse the initial tree without any tree search or optimization
     if (params.upper_bound) {
@@ -1707,18 +1720,33 @@ void runTreeReconstruction(Params &params, string &original_model, IQTree &iqtre
 	iqtree.clearAllPartialLH();
 	initTree = iqtree.optimizeModelParameters(true, initEpsilon);
 
+    // now overwrite with random tree
+    if (params.start_tree == STT_RANDOM_TREE) {
+        cout << "Generate random initial Yule-Harding tree..." << endl;
+        iqtree.generateRandomTree(YULE_HARDING);
+        iqtree.wrapperFixNegativeBranch(true);
+        iqtree.initializeAllPartialLh();
+        initTree = iqtree.optimizeBranches(2);
+        cout << "Log-likelihood of random tree: " << iqtree.getCurScore() << endl;
+    }
 
     /****************** NOW PERFORM MAXIMUM LIKELIHOOD TREE RECONSTRUCTION ******************/
 
     // Update best tree
     iqtree.candidateTrees.update(initTree, iqtree.getCurScore());
 
+    if (params.min_iterations > 0) {
+        cout << "--------------------------------------------------------------------" << endl;
+        cout << "|             INITIALIZING CANDIDATE TREE SET                      |" << endl;
+        cout << "--------------------------------------------------------------------" << endl;
+    }
+
     // Compute maximum likelihood distance
     // ML distance is only needed for IQP
 //    if ( params.start_tree != STT_BIONJ && ((params.snni && !params.iqp) || params.min_iterations == 0)) {
 //        params.compute_ml_dist = false;
 //    }
-    if (params.min_iterations == 0 && params.start_tree != STT_BIONJ)
+    if (params.min_iterations <= 1 && params.start_tree != STT_BIONJ)
         params.compute_ml_dist = false;
     
     if ((params.user_file || params.start_tree == STT_RANDOM_TREE) && params.snni && !params.iqp) {
@@ -1756,7 +1784,8 @@ void runTreeReconstruction(Params &params, string &original_model, IQTree &iqtre
     if (params.min_iterations > 0) {
         double initTime = getCPUTime();
 
-        if (!params.user_file && (params.start_tree == STT_PARSIMONY || params.start_tree == STT_PLL_PARSIMONY)) {
+//        if (!params.user_file && (params.start_tree == STT_PARSIMONY || params.start_tree == STT_PLL_PARSIMONY)) 
+//        {
         	iqtree.initCandidateTreeSet(params.numInitTrees - iqtree.candidateTrees.size(), params.numNNITrees);
         	assert(iqtree.candidateTrees.size() != 0);
         	cout << "Finish initializing candidate tree set. ";
@@ -1764,14 +1793,7 @@ void runTreeReconstruction(Params &params, string &original_model, IQTree &iqtre
         	if (params.write_local_optimal_trees) {
         		printSuboptimalTrees(iqtree, params, ".init_suboptimal_trees");
         	}
-        } else {
-            int nni_count = 0;
-            int nni_steps = 0;
-            cout << "Doing NNI on the initial tree ... " << endl;
-            string tree = iqtree.doNNISearch(nni_count, nni_steps);
-        	iqtree.candidateTrees.update(tree, iqtree.getCurScore(), true);
-
-        }
+//        }
         cout << "Current best tree score: " << iqtree.candidateTrees.getBestScore() << " / CPU time: "
                 << getCPUTime() - initTime << endl;
 	}
@@ -1804,7 +1826,8 @@ void runTreeReconstruction(Params &params, string &original_model, IQTree &iqtre
 		iqtree.readTreeString(iqtree.candidateTrees.getTopTrees()[0]);
 		iqtree.doTreeSearch();
 		iqtree.setAlignment(iqtree.aln);
-        cout << "TREE SEARCH COMPLETED AFTER " << iqtree.stop_rule.getCurIt() << " ITERATIONS" << endl << endl;
+        cout << "TREE SEARCH COMPLETED AFTER " << iqtree.stop_rule.getCurIt() << " ITERATIONS" 
+            << " / Time: " << convert_time(getRealTime() - params.start_real_time) << endl << endl;
 	} else {
 		/* do SPR with likelihood function */
 		if (params.tree_spr) {
@@ -1879,20 +1902,34 @@ void runTreeReconstruction(Params &params, string &original_model, IQTree &iqtre
 	printMiscInfo(params, iqtree, pattern_lh);
 
 	/****** perform SH-aLRT test ******************/
-	if ((params.aLRT_replicates > 0 || params.localbp_replicates > 0) && !params.pll) {
+	if ((params.aLRT_replicates > 0 || params.localbp_replicates > 0 || params.aLRT_test || params.aBayes_test) && !params.pll) {
 		double mytime = getCPUTime();
 		params.aLRT_replicates = max(params.aLRT_replicates, params.localbp_replicates);
-		cout << endl << "Testing tree branches by SH-like aLRT with "
+        cout << endl;
+        if (params.aLRT_replicates > 0) 
+            cout << "Testing tree branches by SH-like aLRT with "
 				<< params.aLRT_replicates << " replicates..." << endl;
+        if (params.localbp_replicates)
+            cout << "Testing tree branches by local-BP test with " << params.localbp_replicates << " replicates..." << endl;
+        if (params.aLRT_test)
+            cout << "Testing tree branches by aLRT parametric test..." << endl;
+        if (params.aBayes_test)
+            cout << "Testing tree branches by aBayes parametric test..." << endl;
 		iqtree.setRootNode(params.root);
-		iqtree.testAllBranches(params.aLRT_threshold, iqtree.getCurScore(),
-				pattern_lh, params.aLRT_replicates, params.localbp_replicates);
-		cout << "CPU Time used:  " << getCPUTime() - mytime << " sec." << endl;
+        if (iqtree.isBifurcating()) {
+            iqtree.testAllBranches(params.aLRT_threshold, iqtree.getCurScore(),
+                    pattern_lh, params.aLRT_replicates, params.localbp_replicates, params.aLRT_test, params.aBayes_test);
+            cout << "CPU Time used:  " << getCPUTime() - mytime << " sec." << endl;
+        } else {
+            outWarning("Tree is multifurcating and such test is not applicable");
+            params.aLRT_replicates = params.localbp_replicates = params.aLRT_test = params.aBayes_test = 0;
+        }
 	}
 
 	if (params.gbo_replicates > 0) {
 		if (!params.online_bootstrap)
-			runGuidedBootstrap(params, iqtree.aln, iqtree);
+			outError("Obsolete feature");
+//			runGuidedBootstrap(params, iqtree.aln, iqtree);
 		else
 			iqtree.summarizeBootstrap(params);
 	}
@@ -1973,7 +2010,16 @@ void searchGAMMAInvarByRestarting(IQTree &iqtree) {
 	else
 		iqtree.setCurScore(iqtree.computeLikelihood());
 	RateGammaInvar* site_rates = dynamic_cast<RateGammaInvar*>(iqtree.getRate());
-	double initAlphas[] = { 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0 };
+	double values[] = { 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0 };
+	vector<double> initAlphas;
+	if (Params::getInstance().randomAlpha) {
+		while (initAlphas.size() < 10) {
+			double initAlpha = random_double();
+			initAlphas.push_back(initAlpha + MIN_GAMMA_SHAPE*2);
+		}
+	} else {
+		initAlphas.assign(values, values+10);
+	}
 	double bestLogl = iqtree.getCurScore();
 	double bestAlpha = 0.0;
 	double bestPInvar = 0.0;
@@ -2004,7 +2050,7 @@ void searchGAMMAInvarByRestarting(IQTree &iqtree) {
 		site_rates->setPInvar(initPInvar);
 		site_rates->computeRates();
 		iqtree.clearAllPartialLH();
-		iqtree.optimizeModelParameters(verbose_mode >= VB_MED, 0.1);
+		iqtree.optimizeModelParameters(verbose_mode >= VB_MED, Params::getInstance().testAlphaEps);
         double estAlpha = iqtree.getRate()->getGammaShape();
         double estPInv = iqtree.getRate()->getPInvar();
         double logl = iqtree.getCurScore();
@@ -2200,9 +2246,9 @@ void runStandardBootstrap(Params &params, string &original_model, Alignment *ali
 		if (params.num_bootstrap_samples == 1)
 			reportPhyloAnalysis(params, original_model, *boot_tree, *model_info);
 		// WHY was the following line missing, which caused memory leak?
+		bootstrap_alignment = boot_tree->aln;
 		delete boot_tree;
 		// fix bug: bootstrap_alignment might be changed
-		bootstrap_alignment = boot_tree->aln;
 		delete bootstrap_alignment;
 	}
 
@@ -2264,6 +2310,7 @@ void convertAlignment(Params &params, IQTree *iqtree) {
 		bootstrap_alignment->createBootstrapAlignment(alignment, NULL, params.bootstrap_spec);
 		delete alignment;
 		alignment = bootstrap_alignment;
+        iqtree->aln = alignment;
 	}
 	if (alignment->isSuperAlignment()) {
 		((SuperAlignment*)alignment)->printCombinedAlignment(params.aln_output);
@@ -2322,6 +2369,17 @@ void runPhyloAnalysis(Params &params) {
 		tree = new IQTree(alignment);
 	}
 
+    if (params.min_branch_length <= 0.0) {
+        params.min_branch_length = 1e-6;
+        if (tree->getAlnNSite() >= 100000) {
+            params.min_branch_length = 0.1 / (tree->getAlnNSite());
+            tree->num_precision = max((int)ceil(-log10(Params::getInstance().min_branch_length))+1, 6);
+            cout.precision(12);
+            cout << "NOTE: minimal branch length is reduced to " << params.min_branch_length << " for long alignment" << endl;
+            cout.precision(3);
+        }
+    }
+
 	string original_model = params.model_name;
 
 	if (params.concatenate_aln) {
@@ -2351,13 +2409,16 @@ void runPhyloAnalysis(Params &params) {
 		convertAlignment(params, tree);
 	} else if (params.gbo_replicates > 0 && params.user_file && params.second_tree) {
 		// run one of the UFBoot analysis
-		runGuidedBootstrap(params, alignment, *tree);
+//		runGuidedBootstrap(params, alignment, *tree);
+		outError("Obsolete feature");
 	} else if (params.avh_test) {
 		// run one of the wondering test for Arndt
-		runAvHTest(params, alignment, *tree);
+//		runAvHTest(params, alignment, *tree);
+		outError("Obsolete feature");
 	} else if (params.bootlh_test) {
 		// run Arndt's plot of tree likelihoods against bootstrap alignments
-		runBootLhTest(params, alignment, *tree);
+//		runBootLhTest(params, alignment, *tree);
+		outError("Obsolete feature");
 	} else if (params.num_bootstrap_samples == 0) {
 		// the main Maximum likelihood tree reconstruction
 		vector<ModelInfo> *model_info = new vector<ModelInfo>;
@@ -2453,9 +2514,12 @@ void runPhyloAnalysis(Params &params) {
 			((PhyloSuperTreePlen*) tree)->printNNIcasesNUM();
 		}
 	}
+    // 2015-09-22: bug fix, move this line to before deleting tree
+    alignment = tree->aln;
 	delete tree;
 	// BUG FIX: alignment can be changed, should delete tree->aln instead
-	alignment = tree->aln;
+    // 2015-09-22: THIS IS STUPID: after deleting tree, one cannot access tree->aln anymore
+//	alignment = tree->aln;
 	delete alignment;
 }
 
diff --git a/phylokernel.h b/phylokernel.h
index 5497634..2a5a2c2 100644
--- a/phylokernel.h
+++ b/phylokernel.h
@@ -75,6 +75,8 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy
         return;
     dad_branch->partial_lh_computed |= 1;
 
+    num_partial_lh_computations++;
+
     size_t nptn = aln->size() + model_factory->unobserved_ptns.size();
     PhyloNode *node = (PhyloNode*)(dad_branch->node);
 
@@ -678,6 +680,8 @@ void PhyloTree::computeLikelihoodDervEigenSIMD(PhyloNeighbor *dad_branch, PhyloN
 
 			// ptn_invar[ptn] is not aligned
 			lh_ptn = horizontal_add(vc_ptn) + VectorClass().load(&ptn_invar[ptn]);
+			df_ptn = horizontal_add(vc_df);
+			ddf_ptn = horizontal_add(vc_ddf);
 
 		}
 		switch ((nptn-orig_nptn) % VCSIZE) {
@@ -1360,7 +1364,7 @@ void PhyloTree::computePartialParsimonyFastSIMD(PhyloNeighbor *dad_branch, Phylo
     		break;
     	}
         // add dummy states
-        if (site > 0) {
+        if (site > 0 && site < NUM_BITS) {
             x += site/UINT_BITS;
         	*x |= ~((1<<(site%UINT_BITS)) - 1);
             x++;
diff --git a/phylokernelmixrate.h b/phylokernelmixrate.h
index 44e27f0..02ffa4e 100644
--- a/phylokernelmixrate.h
+++ b/phylokernelmixrate.h
@@ -647,6 +647,8 @@ void PhyloTree::computeMixrateLikelihoodDervEigenSIMD(PhyloNeighbor *dad_branch,
 
 			// ptn_invar[ptn] is not aligned
 			lh_ptn = horizontal_add(vc_ptn) + VectorClass().load(&ptn_invar[ptn]);
+			df_ptn = horizontal_add(vc_df);
+			ddf_ptn = horizontal_add(vc_ddf);
 
 		}
 		switch ((nptn-orig_nptn) % VCSIZE) {
diff --git a/phylokernelmixture.h b/phylokernelmixture.h
index 747c4a2..068db03 100644
--- a/phylokernelmixture.h
+++ b/phylokernelmixture.h
@@ -675,6 +675,8 @@ void PhyloTree::computeMixtureLikelihoodDervEigenSIMD(PhyloNeighbor *dad_branch,
 
 			// ptn_invar[ptn] is not aligned
 			lh_ptn = horizontal_add(vc_ptn) + VectorClass().load(&ptn_invar[ptn]);
+			df_ptn = horizontal_add(vc_df);
+			ddf_ptn = horizontal_add(vc_ddf);
 
 		}
 		switch ((nptn-orig_nptn) % VCSIZE) {
diff --git a/phylolib.h b/phylolib.h
deleted file mode 100755
index 3c80012..0000000
--- a/phylolib.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- *  phylolib.h
- *
- *  Created on: Nov 19, 2012
- *  Author: tung
- */
-
-#ifndef PHYLOLIB_H_
-#define PHYLOLIB_H_
-
-//#define _GNU_SOURCE
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-
-
-#endif /* PHYLOLIB_H_ */
diff --git a/phylonode.cpp b/phylonode.cpp
index d59129f..fa39d84 100644
--- a/phylonode.cpp
+++ b/phylonode.cpp
@@ -40,12 +40,14 @@ void PhyloNeighbor::reorientPartialLh(Node *dad) {
 
 
 void PhyloNode::clearReversePartialLh(PhyloNode *dad) {
-	PhyloNeighbor *node_nei = (PhyloNeighbor*)findNeighbor(dad);
-	assert(node_nei);
-	node_nei->partial_lh_computed = 0;
+//	PhyloNeighbor *node_nei = (PhyloNeighbor*)findNeighbor(dad);
+//	assert(node_nei);
+//	node_nei->partial_lh_computed = 0;
 	for (NeighborVec::iterator it = neighbors.begin(); it != neighbors.end(); it ++)
-		if ((*it)->node != dad)
+		if ((*it)->node != dad) {
+			((PhyloNeighbor*)(*it)->node->findNeighbor(this))->partial_lh_computed = 0;
 			((PhyloNode*)(*it)->node)->clearReversePartialLh(this);
+		}
 }
 
 void PhyloNode::clearAllPartialLh(bool make_null, PhyloNode *dad) {
diff --git a/phylosupertree.cpp b/phylosupertree.cpp
index 9c42346..44d207e 100644
--- a/phylosupertree.cpp
+++ b/phylosupertree.cpp
@@ -308,6 +308,7 @@ void PhyloSuperTree::readPartitionNexus(Params &params) {
 
     if (input_aln)
     	delete input_aln;
+    delete sets_block;
 }
 
 void PhyloSuperTree::printPartition(const char *filename) {
@@ -505,8 +506,8 @@ PhyloSuperTree::PhyloSuperTree(Params &params) :  IQTree() {
     
 #ifdef _OPENMP
     if (params.num_threads > size()) {
-        outWarning("More threads (" + convertIntToString(params.num_threads) + ") than number of partitions (" + convertIntToString(size()) + ") is not necessary. ");
-        outWarning("Please rerun again with -nt " + convertIntToString(size()));
+        outWarning("More threads (" + convertIntToString(params.num_threads) + ") than number of partitions (" + convertIntToString(size()) + ") might not be necessary.");
+        outWarning("You are recommended to rerun with '-nt " + convertIntToString(size()) + "' and see if this is faster");
     }
 #endif
 	cout << endl;
@@ -919,7 +920,7 @@ double PhyloSuperTree::computeLikelihood(double *pattern_lh) {
 	} else {
         if (part_order.empty()) computePartitionOrder();
 		#ifdef _OPENMP
-		#pragma omp parallel for reduction(+: tree_lh) schedule(dynamic)
+		#pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(ntrees >= params->num_threads)
 		#endif
 		for (int j = 0; j < ntrees; j++) {
             int i = part_order[j];
@@ -930,12 +931,12 @@ double PhyloSuperTree::computeLikelihood(double *pattern_lh) {
 	return tree_lh;
 }
 
-void PhyloSuperTree::computePatternLikelihood(double *pattern_lh, double *cur_logl, double *ptn_lh_cat) {
+void PhyloSuperTree::computePatternLikelihood(double *pattern_lh, double *cur_logl, double *ptn_lh_cat, SiteLoglType wsl) {
 	int offset = 0, offset_lh_cat = 0;
 	iterator it;
 	for (it = begin(); it != end(); it++) {
 		if (ptn_lh_cat)
-			(*it)->computePatternLikelihood(pattern_lh + offset, NULL, ptn_lh_cat + offset_lh_cat);
+			(*it)->computePatternLikelihood(pattern_lh + offset, NULL, ptn_lh_cat + offset_lh_cat, wsl);
 		else
 			(*it)->computePatternLikelihood(pattern_lh + offset);
 		offset += (*it)->aln->getNPattern();
@@ -966,7 +967,7 @@ double PhyloSuperTree::optimizeAllBranches(int my_iterations, double tolerance,
 	int ntrees = size();
     if (part_order.empty()) computePartitionOrder();
 	#ifdef _OPENMP
-	#pragma omp parallel for reduction(+: tree_lh) schedule(dynamic)
+	#pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(ntrees >= params->num_threads)
 	#endif
 	for (int j = 0; j < ntrees; j++) {
         int i = part_order[j];
@@ -1069,7 +1070,7 @@ NNIMove PhyloSuperTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NN
 
     if (part_order.empty()) computePartitionOrder();
 	#ifdef _OPENMP
-	#pragma omp parallel for reduction(+: nni_score1, nni_score2, local_totalNNIs, local_evalNNIs) private(part) schedule(dynamic)
+	#pragma omp parallel for reduction(+: nni_score1, nni_score2, local_totalNNIs, local_evalNNIs) private(part) schedule(dynamic) if(ntrees >= params->num_threads)
 	#endif
 	for (int treeid = 0; treeid < ntrees; treeid++) {
         part = part_order_by_nptn[treeid];
@@ -1290,12 +1291,12 @@ void PhyloSuperTree::changeNNIBrans(NNIMove move) {
 
 }
 
-void PhyloSuperTree::restoreAllBrans(PhyloNode *node, PhyloNode *dad) {
-	int part = 0;
-	for (iterator it = begin(); it != end(); it++, part++) {
-		(*it)->setBranchLengths(part_info[part].cur_brlen);
-	}
-}
+//void PhyloSuperTree::restoreAllBrans(PhyloNode *node, PhyloNode *dad) {
+//	int part = 0;
+//	for (iterator it = begin(); it != end(); it++, part++) {
+//		(*it)->setBranchLengths(part_info[part].cur_brlen);
+//	}
+//}
 
 void PhyloSuperTree::reinsertLeaves(PhyloNodeVector &del_leaves) {
 	IQTree::reinsertLeaves(del_leaves);
diff --git a/phylosupertree.h b/phylosupertree.h
index dd3ce97..d4b7769 100644
--- a/phylosupertree.h
+++ b/phylosupertree.h
@@ -205,7 +205,7 @@ public:
             @param pattern_lh_cat (OUT) if not NULL, store all pattern-likelihood per category
      */
     virtual void computePatternLikelihood(double *pattern_lh, double *cur_logl = NULL,
-    		double *pattern_lh_cat = NULL);
+    		double *pattern_lh_cat = NULL, SiteLoglType wsl = WSL_RATECAT);
 
     /**
             optimize all branch lengths of all subtrees, then compute branch lengths
@@ -241,7 +241,7 @@ public:
 	 * @param node the current node of the post-order tree traversal
 	 * @param dad the dad of that node used to direct the traversal
      */
-    virtual void restoreAllBrans(PhyloNode *node = NULL, PhyloNode *dad = NULL);
+//    virtual void restoreAllBrans(PhyloNode *node = NULL, PhyloNode *dad = NULL);
 
     /**
             reinsert the whole list of leaves back into the supertree then call mapTrees
diff --git a/phylosupertreeplen.cpp b/phylosupertreeplen.cpp
index 1efb692..52d6ed6 100644
--- a/phylosupertreeplen.cpp
+++ b/phylosupertreeplen.cpp
@@ -95,12 +95,11 @@ double PartitionModelPlen::optimizeParameters(bool fixed_len, bool write_info, d
     	cur_lh = 0.0;
         if (tree->part_order.empty()) tree->computePartitionOrder();
         #ifdef _OPENMP
-        #pragma omp parallel for reduction(+: cur_lh) schedule(dynamic)
+        #pragma omp parallel for reduction(+: cur_lh) schedule(dynamic) if(ntrees >= tree->params->num_threads)
         #endif
     	for (int partid = 0; partid < ntrees; partid++) {
             int part = tree->part_order[partid];
     		// Subtree model parameters optimization
-//        	tree->part_info[part].cur_score = tree->at(part)->getModelFactory()->optimizeParameters(true, false, logl_epsilon, gradient_epsilon);
         	tree->part_info[part].cur_score = tree->at(part)->getModelFactory()->optimizeParametersOnly(gradient_epsilon/min(min(i,ntrees),10));
             if (tree->part_info[part].cur_score == 0.0)
                 tree->part_info[part].cur_score = tree->at(part)->computeLikelihood();
@@ -117,10 +116,26 @@ double PartitionModelPlen::optimizeParameters(bool fixed_len, bool write_info, d
         	}
 
     	}
+        if (tree->params->link_alpha) {
+            cur_lh = optimizeLinkedAlpha(write_info, gradient_epsilon);
+        }
+        if (verbose_mode >= VB_MED)
+            cout << "LnL after optimizing individual models: " << cur_lh << endl;
+        assert(cur_lh > tree_lh - 1.0 && "individual model opt reduces LnL");
+        
     	tree->clearAllPartialLH();
     	// Optimizing gene rate
     	if(!tree->fixed_rates){
     		cur_lh = optimizeGeneRate(gradient_epsilon);
+            if (verbose_mode >= VB_MED) {
+                cout << "LnL after optimizing partition-specific rates: " << cur_lh << endl;
+                cout << "Partition-specific rates: ";
+                for(int part = 0; part < ntrees; part++){
+                    cout << " " << tree->part_info[part].part_rate;
+                }
+                cout << endl;
+            }
+            assert(cur_lh > tree_lh - 1.0 && "partition rate opt reduces LnL");
     	}
 
     	// Optimizing branch lengths
@@ -137,7 +152,7 @@ double PartitionModelPlen::optimizeParameters(bool fixed_len, bool write_info, d
     		break;
         }
     	// make sure that the new logl is not so bad compared with previous logl
-    	assert(cur_lh > tree_lh - 1.0);
+    	assert(cur_lh > tree_lh - 1.0 && "branch length opt reduces LnL");
     	tree_lh = cur_lh;
     }
 //    cout <<"OPTIMIZE MODEL has finished"<< endl;
@@ -153,73 +168,20 @@ double PartitionModelPlen::optimizeParameters(bool fixed_len, bool write_info, d
     return tree_lh;
 }
 
-//double PartitionModelPlen::computeFunction(double value) {
-//	PhyloSuperTreePlen *tree = (PhyloSuperTreePlen*)site_rate->getTree();
-//    if (value != tree->part_info[optimizing_part].part_rate) {
-//        tree->part_info[optimizing_part].part_rate = value;
-//        tree->mapBranchLen(optimizing_part);
-//        tree->at(optimizing_part)->clearAllPartialLH();
-//    }
-//    return -tree->at(optimizing_part)->computeLikelihood();
-//}
-
 
 double PartitionModelPlen::optimizeGeneRate(double gradient_epsilon)
 {
 	PhyloSuperTreePlen *tree = (PhyloSuperTreePlen*)site_rate->getTree();
-/*    
-	int ndim = tree->size()-1;
-
-	double *variables   = new double[ndim+1];
-	double *upper_bound = new double[ndim+1];
-	double *lower_bound = new double[ndim+1];
-	bool   *bound_check = new bool[ndim+1];
-	int i;
-	double score;
-
-	// gene rates are optimized by BFGS algorithm
-
-	setVariables(variables);
-
-	for (i = 1; i <= ndim; i++) {
-		//cout << variables[i] << endl;
-		lower_bound[i] = 1e-4;
-		upper_bound[i] = tree->size();
-		bound_check[i] = false;
-	}
-
-	score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, tol);
-
-	getVariables(variables);
-	tree->clearAllPartialLH();
-
-	delete [] bound_check;
-	delete [] lower_bound;
-	delete [] upper_bound;
-	delete [] variables;
-
-	return score;
-*/
     // BQM 22-05-2015: change to optimize individual rates
     int i;
     double score = 0.0;
 
     if (tree->part_order.empty()) tree->computePartitionOrder();
     #ifdef _OPENMP
-    #pragma omp parallel for reduction(+: score) private(i) schedule(dynamic)
+    #pragma omp parallel for reduction(+: score) private(i) schedule(dynamic) if(tree->size() >= tree->params->num_threads)
     #endif    
     for (int j = 0; j < tree->size(); j++) {
         int i = tree->part_order[j];
-//        double gene_rate = tree->part_info[i].part_rate;
-//        double negative_lh, ferror;
-//        optimizing_part = i;
-//        gene_rate = minimizeOneDimen(MIN_GENE_RATE, gene_rate, MAX_GENE_RATE, max(TOL_GENE_RATE, gradient_epsilon), &negative_lh, &ferror);
-//    	if (gene_rate != tree->part_info[optimizing_part].part_rate) {
-//            tree->part_info[i].part_rate = gene_rate;
-//            tree->mapBranchLen(i);
-//            tree->at(i)->clearAllPartialLH();
-//        }
-//        tree->part_info[i].cur_score = tree->at(i)->computeLikelihood();
         tree->part_info[i].cur_score = tree->at(i)->optimizeTreeLengthScaling(tree->part_info[i].part_rate, gradient_epsilon);
         score += tree->part_info[i].cur_score;
     }
@@ -233,7 +195,6 @@ double PartitionModelPlen::optimizeGeneRate(double gradient_epsilon)
         else
             nsite += tree->at(i)->aln->getNSite();
     }
-//    sum /= tree->getAlnNSite();
     sum /= nsite;
     tree->scaleLength(sum);
     sum = 1.0/sum;
@@ -242,65 +203,20 @@ double PartitionModelPlen::optimizeGeneRate(double gradient_epsilon)
     return score;
 }
 
-//double PartitionModelPlen::targetFunk(double x[]) {
-//	PhyloSuperTreePlen *tree = (PhyloSuperTreePlen*)site_rate->getTree();
-//
-//	double sum = 0.0;
-//	int part;
-//	for( part = 0; part < tree->size()-1; part ++){
-//		sum += x[part+1];
-//	}
-//	if (tree->size() - sum < 1e-4) return 1.0e+12;
-//
-//	for( part = 0, sum = 0.0; part < tree->size(); part ++){
-//		double rate;
-//		if (part < tree->size() - 1)
-//			rate = x[part+1];
-//		else
-//			rate = tree->size() - sum;
-//		sum += rate;
-//		if(tree->part_info[part].part_rate != rate){
-//			tree->at(part)->clearAllPartialLH();
-//			//tree->at(part)->scaleLength(rate/tree->part_info[part].part_rate);
-//			tree->part_info[part].part_rate = rate;
-//			tree->part_info[part].cur_score = 0.0;
-//		}
-//	}
-//	tree->mapBranchLen();
-//	//getVariables(x);
-//
-//	return -tree->computeLikelihood();
-//}
-
-//void PartitionModelPlen::getVariables(double *variables) {
-//	PhyloSuperTreePlen *tree = (PhyloSuperTreePlen*)site_rate->getTree();
-//	int ntrees = tree->size()-1;
-//	double sum = 0.0;
-//	for(int part = 0; part < ntrees; part++){
-//		tree->part_info[part].part_rate = variables[part+1];
-//		sum += variables[part+1];
-//	}
-//	tree->part_info[ntrees].part_rate = tree->size() - sum;
-//}
-//
-//void PartitionModelPlen::setVariables(double *variables) {
-//	PhyloSuperTreePlen *tree = (PhyloSuperTreePlen*)site_rate->getTree();
-//	int ntrees = tree->size()-1;
-//	for(int part = 0; part < ntrees; part++){
-//		variables[part+1] = tree->part_info[part].part_rate;
-//	}
-//}
 
 int PartitionModelPlen::getNParameters() {
     PhyloSuperTreePlen *tree = (PhyloSuperTreePlen*)site_rate->getTree();
 	int df = 0;
     for (PhyloSuperTreePlen::iterator it = tree->begin(); it != tree->end(); it++) {
-    	df += (*it)->getModelFactory()->model->getNDim()+(*it)->getModelFactory()->site_rate->getNDim();
-		if ( (*it)->getModelFactory()->model->freq_type == FREQ_EMPIRICAL) df +=  (*it)->getModelFactory()->model->num_states-1;
+    	df += (*it)->getModelFactory()->model->getNDim() +
+            (*it)->getModelFactory()->model->getNDimFreq() +
+            (*it)->getModelFactory()->site_rate->getNDim();
     }
     df += tree->branchNum;
     if(!tree->fixed_rates)
     	df += tree->size()-1;
+    if (linked_alpha > 0.0)
+        df ++;
     return df;
 }
 
@@ -349,7 +265,7 @@ void PhyloSuperTreePlen::deleteAllPartialLh() {
 		// reset these pointers so that they are not deleted
 		(*it)->central_partial_lh = NULL;
 		(*it)->central_scale_num = NULL;
-		(*it)->central_partial_pars = NULL;
+//		(*it)->central_partial_pars = NULL;
 		(*it)->_pattern_lh = NULL;
 		(*it)->_pattern_lh_cat = NULL;
 		(*it)->theta_all = NULL;
@@ -368,7 +284,7 @@ PhyloSuperTreePlen::~PhyloSuperTreePlen()
 		// reset these pointers so that they are not deleted
 		(*it)->central_partial_lh = NULL;
 		(*it)->central_scale_num = NULL;
-		(*it)->central_partial_pars = NULL;
+//		(*it)->central_partial_pars = NULL;
 		(*it)->_pattern_lh = NULL;
 		(*it)->_pattern_lh_cat = NULL;
 		(*it)->theta_all = NULL;
@@ -406,8 +322,6 @@ void PhyloSuperTreePlen::mapTrees() {
             noncodon_present = true;
             break;
         }
-//	if (verbose_mode >= VB_DEBUG)
-//		drawTree(cout,  WT_BR_SCALE | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE | WT_BR_ID);
 	for (it = begin(); it != end(); it++, part++) {
 		string taxa_set = ((SuperAlignment*)aln)->getPattern(part);
 		(*it)->copyTree(this, taxa_set);
@@ -425,36 +339,15 @@ void PhyloSuperTreePlen::mapTrees() {
 			int id = ((SuperAlignment*)aln)->taxa_index[i][part];
 			if (id >=0) part_taxa[i] = my_taxa[id];
 		}
-//		if (verbose_mode >= VB_DEBUG) {
-//			cout << "Subtree for partition " << part << endl;
-//			(*it)->drawTree(cout,  WT_BR_SCALE | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE | WT_BR_ID);
-//		}
 		linkTree(part, part_taxa);
 	}
-	//if (verbose_mode >= VB_DEBUG) printMapInfo();
 	if (getModel())
 		initializeAllPartialLh();
 }
 
 void PhyloSuperTreePlen::linkTrees() {
 	mapTrees();
-//	int part = 0;
-//	iterator it;
-//	for (it = begin(), part = 0; it != end(); it++, part++) {
-//		(*it)->initializeTree();
-//		(*it)->setAlignment((*it)->aln);
-//		NodeVector my_taxa, part_taxa;
-//		(*it)->getOrderedTaxa(my_taxa);
-//		part_taxa.resize(leafNum, NULL);
-//		int i;
-//		for (i = 0; i < leafNum; i++) {
-//			int id = ((SuperAlignment*)aln)->taxa_index[i][part];
-//			if (id >=0) part_taxa[i] = my_taxa[id];
-//		}
-//		linkTree(part, part_taxa);
-//	}
-//	if (getModel())
-//		initializeAllPartialLh();
+
 
 }
 
@@ -466,17 +359,6 @@ double PhyloSuperTreePlen::optimizeAllBranches(int my_iterations, double toleran
 		part_info[part].cur_score = 0.0;
 	}
 
-//	double logLH1=computeLikelihood();
-//	clearAllPartialLH();
-//	double logLH2=computeLikelihood();
-//	if(fabs(logLH1-logLH2)>1){
-//		cout<<"---------------------------------------------------------"<<endl;
-//		cout<<"BEFORE calling phylotree::optimize all branches "<<endl;
-//		cout<<"DIFFERENCE IN RECOMPUTATION of log-lh = "<<fabs(logLH1-logLH2)<<endl;
-//		cout<<"  initial    = "<<logLH1<<endl;
-//		cout<<"  recomputed = "<<logLH2<<endl;
-//	}
-
 	return PhyloTree::optimizeAllBranches(my_iterations,tolerance, maxNRStep);
 }
 
@@ -506,7 +388,7 @@ void PhyloSuperTreePlen::optimizeOneBranch(PhyloNode *node1, PhyloNode *node2, b
     if (part_order.empty()) computePartitionOrder();
 	// bug fix: assign cur_score into part_info
     #ifdef _OPENMP
-    #pragma omp parallel for private(part) schedule(dynamic)
+    #pragma omp parallel for private(part) schedule(dynamic) if(size() >= params->num_threads)
     #endif    
 	for (int partid = 0; partid < size(); partid++) {
         part = part_order_by_nptn[partid];
@@ -546,7 +428,7 @@ double PhyloSuperTreePlen::computeFunction(double value) {
 
     if (part_order.empty()) computePartitionOrder();
     #ifdef _OPENMP
-    #pragma omp parallel for reduction(+: tree_lh) schedule(dynamic)
+    #pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(ntrees >= params->num_threads)
     #endif    
 	for (int partid = 0; partid < ntrees; partid++) {
             int part = part_order_by_nptn[partid];
@@ -568,6 +450,12 @@ double PhyloSuperTreePlen::computeFunction(double value) {
     return -tree_lh;
 }
 
+double PhyloSuperTreePlen::computeLikelihoodBranch(PhyloNeighbor *dad_branch, PhyloNode *dad) {
+    current_it = dad_branch;
+    current_it_back = (PhyloNeighbor*)dad_branch->node->findNeighbor(dad);
+    return -computeFunction(dad_branch->length);
+}
+
 double PhyloSuperTreePlen::computeLikelihoodFromBuffer() {
     //return -computeFunction(current_it->length);
 	double score = 0.0;
@@ -598,7 +486,7 @@ void PhyloSuperTreePlen::computeFuncDerv(double value, double &df_ret, double &d
 
     if (part_order.empty()) computePartitionOrder();
     #ifdef _OPENMP
-    #pragma omp parallel for reduction(+: df, ddf) schedule(dynamic)
+    #pragma omp parallel for reduction(+: df, ddf) schedule(dynamic) if(ntrees >= params->num_threads)
     #endif    
 	for (int partid = 0; partid < ntrees; partid++) {
         int part = part_order_by_nptn[partid];
@@ -616,22 +504,17 @@ void PhyloSuperTreePlen::computeFuncDerv(double value, double &df_ret, double &d
 					cout<<"NEGATIVE BRANCH len = "<<nei1_part->length<<endl<<" rate = "<<part_info[part].part_rate<<endl;
 					outError("shit!!   ",__func__);
 				}
-//				part_info[part].cur_score = at(part)->computeLikelihoodDerv(nei2_part,(PhyloNode*)nei1_part->node, df_aux, ddf_aux);
 				at(part)->computeLikelihoodDerv(nei2_part,(PhyloNode*)nei1_part->node, df_aux, ddf_aux);
-//				tree_lh += part_info[part].cur_score;
 				df += part_info[part].part_rate*df_aux;
 				ddf += part_info[part].part_rate*part_info[part].part_rate*ddf_aux;
 			}
 			else {
-//				part_info[part].cur_score = 0.0;
 				if (part_info[part].cur_score == 0.0)
 					part_info[part].cur_score = at(part)->computeLikelihood();
-//				tree_lh += part_info[part].cur_score;
 			}
 		}
     df_ret = -df;
     ddf_ret = -ddf;
-//    return -tree_lh;
 }
 
 NNIMove PhyloSuperTreePlen::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove *nniMoves)
@@ -640,37 +523,9 @@ NNIMove PhyloSuperTreePlen::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2
 
 	double backupScore = curScore;
 
-//	SuperNeighbor *nei1 = ((SuperNeighbor*)node1->findNeighbor(node2));
-//	SuperNeighbor *nei2 = ((SuperNeighbor*)node2->findNeighbor(node1));
-//	assert(nei1 && nei2);
-//
-//	SuperNeighbor *node1_nei = NULL;
-//	SuperNeighbor *node2_nei = NULL;
-//	SuperNeighbor *node2_nei_other = NULL;
-//
-//	FOR_NEIGHBOR_DECLARE(node1, node2, node1_it) {
-//		node1_nei = (SuperNeighbor*)(*node1_it);
-//		break;
-//	}
-//	FOR_NEIGHBOR_DECLARE(node2, node1, node2_it) {
-//		node2_nei = (SuperNeighbor*)(*node2_it);
-//		break;
-//	}
-//
-//	FOR_NEIGHBOR_IT(node2, node1, node2_it_other)
-//	if ((*node2_it_other) != node2_nei) {
-//		node2_nei_other = (SuperNeighbor*)(*node2_it_other);
-//		break;
-//	}
 
-/*	#ifdef _OPENMP
-	#pragma omp parallel for reduction(+: nni1_score, nni2_score) private(part)
-	#endif
-*/
 	SwapNNIParam nni_param;
 	// nni_param.node1/2_nei tell swapNNIBranch what to swap first
-//	nni_param.node1_nei = node1_nei;
-//	nni_param.node2_nei = node2_nei;
 
 	// ------------------------------------------------------------------
     int cnt;
@@ -713,32 +568,6 @@ NNIMove PhyloSuperTreePlen::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2
 
 	this->swapNNIBranch(0.0, node1, node2, &nni_param, nniMoves);
 
-/*
-    NNIMove myMove;
-    myMove.newloglh = 0;
-
-	// Choose NNI move for SuperTree===========================================
-	if (nni_param.nni1_score > nni_param.nni2_score) {
-		myMove.swap_id = 1;
-		myMove.node1Nei_it = node1->findNeighborIt(node1_nei->node);
-		myMove.node2Nei_it = node2->findNeighborIt(node2_nei->node);
-		myMove.newloglh = nni_param.nni1_score;
-		myMove.node1 = node1;
-		myMove.node2 = node2;
-		myMove.newLen[0] = nni_param.nni1_brlen;
-		//myMove.oldLen[0] = oldLEN;
-	} else {
-		myMove.swap_id = 2;
-		myMove.node1Nei_it = node1->findNeighborIt(node1_nei->node);
-		myMove.node2Nei_it = node2->findNeighborIt(node2_nei_other->node);
-		myMove.newloglh = nni_param.nni2_score;
-		myMove.node1 = node1;
-		myMove.node2 = node2;
-		myMove.newLen[0] = nni_param.nni2_brlen;
-		//myMove.oldLen[0] = oldLEN;
-	}
-	// ========================================================================
-*/
 
 	 // restore curScore
 	 curScore = backupScore;
@@ -821,8 +650,6 @@ void PhyloSuperTreePlen::doNNI(NNIMove &move, bool clearLH)
 	}
 //	PhyloTree::doNNI(move,clearLH);
 	PhyloTree::doNNI(move,false);
-	//nei1->length = move.newLen[0];
-	//nei2->length = move.newLen[0];
 	PhyloNode *node1, *node2;
 
 	for (it = begin(), part = 0; it != end(); it++, part++) {
@@ -872,17 +699,14 @@ void PhyloSuperTreePlen::doNNI(NNIMove &move, bool clearLH)
 
 double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, PhyloNode *node2, SwapNNIParam *nni_param, NNIMove *nniMoves) {
 
-//	for (iterator it = begin(); it != end(); it++)
-//		if ((*it)->sse != LK_EIGEN_SSE)
-//			outError("hey!");
-
-//	double score_mine = this->computeLikelihood();
-
-	//cout<<"starting NNI evaluation"<<endl;
-	//checkBranchLen();
 
 	int i = 0, id = 0;
 	int part, ntrees = size();
+    uint64_t total_block_size = 0, total_scale_block_size = 0;
+    for (int j = 0; j < ntrees; j++) {
+        total_block_size += block_size[j];
+        total_scale_block_size += scale_block_size[j];
+    }
 
 	/*===========================================================================================
 	 * Identify NNIType for partitions
@@ -1058,8 +882,9 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
 
 	// For ONE_epsilon case: saves "id" of the neighbors that have an empty image
 	int id_eps[part];
-
-	for(part = 0; part < ntrees; part++){
+    uint64_t lh_addr = 0, scale_addr = 0;
+	for(int partid = 0; partid < ntrees; partid++){
+        part = part_order[partid];
 		if(is_nni[part]==NNI_NO_EPSILON){
 			//evalNNIs++;
 			//part_info[part].evalNNIs++;
@@ -1079,8 +904,8 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
 
 				// Create a new PhyloNeighbor, with new partial lhs, scale number and set the branch id as before
 				*sub_saved_it[part*6 + id] = new PhyloNeighbor(nei_link, saved_nei[id]->link_neighbors[part]->length);
-				((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->partial_lh = at(part)->newPartialLh();
-				((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->scale_num = at(part)->newScaleNum();
+				((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->partial_lh = nni_partial_lh + (id*total_block_size + lh_addr);
+				((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->scale_num = nni_scale_num + (id*total_scale_block_size + scale_addr);
 				(*sub_saved_it[part*6 + id])->id = saved_nei[id]->link_neighbors[part]->id;
 
 				// update link_neighbor[part]: for New SuperNeighbor we set the corresponding new PhyloNeighbor on partition part
@@ -1094,8 +919,8 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
 					node_link = ((SuperNeighbor*)(*node_nei_it[id-2]))->link_neighbors[part]->node;
 					sub_saved_it[part*6 + id] = node_link->findNeighborIt(nei_link);
 					*sub_saved_it[part*6 + id] = new PhyloNeighbor(nei_link, saved_nei[id]->link_neighbors[part]->length);
-					((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->partial_lh = at(part)->newPartialLh();
-					((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->scale_num = at(part)->newScaleNum();
+					((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->partial_lh = nni_partial_lh + (id*total_block_size + lh_addr);
+					((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->scale_num = nni_scale_num + (id*total_scale_block_size + scale_addr);
 					(*sub_saved_it[part*6 + id])->id = saved_nei[id]->link_neighbors[part]->id;
 
 					// update link_neighbor[part]
@@ -1138,8 +963,8 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
 					sub_saved_branch[6*part + id] = nei->link_neighbors[part]->length;
 
 					*sub_saved_it[part*6 + id] = new PhyloNeighbor(nei_link, nei->link_neighbors[part]->length);
-					((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->partial_lh = at(part)->newPartialLh();
-					((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->scale_num = at(part)->newScaleNum();
+					((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->partial_lh = nni_partial_lh + (id*total_block_size + lh_addr);
+					((PhyloNeighbor*) (*sub_saved_it[part*6 + id]))->scale_num = nni_scale_num + (id*total_scale_block_size + scale_addr);
 					(*sub_saved_it[part*6 + id])->id = nei->link_neighbors[part]->id;
 
 					// If nni5 we update the link neighbors already here, otherwise
@@ -1168,6 +993,8 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
 				}
 			}
 		}
+        lh_addr += block_size[part];
+        scale_addr += scale_block_size[part];
 	}
 
 	/* -------------------------------------------------------------------
@@ -1608,8 +1435,8 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
 				// restore the Neighbors*
 				for (i = IT_NUM-1; i >= 0; i--) {
 					if((*sub_saved_it[part*6+i])){
-						aligned_free(((PhyloNeighbor*) *sub_saved_it[part*6+i])->scale_num);
-						aligned_free(((PhyloNeighbor*) *sub_saved_it[part*6+i])->partial_lh);
+//						aligned_free(((PhyloNeighbor*) *sub_saved_it[part*6+i])->scale_num);
+//						aligned_free(((PhyloNeighbor*) *sub_saved_it[part*6+i])->partial_lh);
 						if (*sub_saved_it[part*6+i] == at(part)->current_it) at(part)->current_it = saved_nei[i]->link_neighbors[part];
 						if (*sub_saved_it[part*6+i] == at(part)->current_it_back) at(part)->current_it_back = saved_nei[i]->link_neighbors[part];
 
@@ -1631,8 +1458,8 @@ double PhyloSuperTreePlen::swapNNIBranch(double cur_score, PhyloNode *node1, Phy
 				for (id = 5; id >= 2; id--) {
 					//if((*sub_saved_it[part*6+id])){
 					if(((SuperNeighbor*)(*node_nei_it[id-2]))->link_neighbors[part]){
-						aligned_free(((PhyloNeighbor*) *sub_saved_it[part*6+id])->scale_num);
-						aligned_free(((PhyloNeighbor*) *sub_saved_it[part*6+id])->partial_lh);
+//						aligned_free(((PhyloNeighbor*) *sub_saved_it[part*6+id])->scale_num);
+//						aligned_free(((PhyloNeighbor*) *sub_saved_it[part*6+id])->partial_lh);
 
 						// It was commented, not sure why.. Just keep in mind------------------
 						if (*sub_saved_it[part*6+id] == at(part)->current_it)
@@ -1685,10 +1512,10 @@ void PhyloSuperTreePlen::linkCheckRe(int part,Node* node, Node* dad, PhyloNeighb
 		}
 	}
 }
-void PhyloSuperTreePlen::restoreAllBrans(PhyloNode *node, PhyloNode *dad) {
-	IQTree::restoreAllBrans(node,dad);
-	mapTrees();
-}
+//void PhyloSuperTreePlen::restoreAllBrans(PhyloNode *node, PhyloNode *dad) {
+//	IQTree::restoreAllBrans(node,dad);
+//	mapTrees();
+//}
 
 bool PhyloSuperTreePlen::checkBranchLen(){
 
@@ -1854,7 +1681,7 @@ void PhyloSuperTreePlen::changeNNIBrans(NNIMove nnimove) {
  */
 void PhyloSuperTreePlen::initializeAllPartialLh() {
 	iterator it;
-	int part;
+	int part, partid;
 	int ntrees = size();
 
 	block_size.resize(ntrees);
@@ -1865,7 +1692,12 @@ void PhyloSuperTreePlen::initializeAllPartialLh() {
 	lh_cat_size.resize(ntrees);
 	uint64_t total_mem_size = 0, total_block_size = 0, total_lh_cat_size = 0;
 
-	for (it = begin(), part = 0; it != end(); it++, part++) {
+	if (part_order.empty())
+		computePartitionOrder();
+
+	for (partid = 0; partid < ntrees; partid++) {
+		part = part_order[partid];
+        it = begin() + part;
 		size_t nptn = (*it)->getAlnNPattern() + (*it)->aln->num_states; // extra #numStates for ascertainment bias correction
 		if (instruction_set >= 7)
 			mem_size[part] = ((nptn +3)/4)*4;
@@ -1884,43 +1716,46 @@ void PhyloSuperTreePlen::initializeAllPartialLh() {
 
     if (!_pattern_lh)
         _pattern_lh = aligned_alloc<double>(total_mem_size);
-    front()->_pattern_lh = _pattern_lh;
+    at(part_order[0])->_pattern_lh = _pattern_lh;
     if (!_pattern_lh_cat)
         _pattern_lh_cat = aligned_alloc<double>(total_lh_cat_size);
-    front()->_pattern_lh_cat = _pattern_lh_cat;
+    at(part_order[0])->_pattern_lh_cat = _pattern_lh_cat;
     if (!theta_all)
         theta_all = aligned_alloc<double>(total_block_size);
-    front()->theta_all = theta_all;
+    at(part_order[0])->theta_all = theta_all;
     if (!ptn_freq) {
         ptn_freq = aligned_alloc<double>(total_mem_size);
         ptn_freq_computed = false;
     }
-    front()->ptn_freq = ptn_freq;
-    front()->ptn_freq_computed = false;
+    at(part_order[0])->ptn_freq = ptn_freq;
+    at(part_order[0])->ptn_freq_computed = false;
     if (!ptn_invar)
         ptn_invar = aligned_alloc<double>(total_mem_size);
-    front()->ptn_invar = ptn_invar;
+    at(part_order[0])->ptn_invar = ptn_invar;
 
     size_t IT_NUM = (params->nni5) ? 6 : 2;
     if (!nni_partial_lh) {
         nni_partial_lh = aligned_alloc<double>(IT_NUM*total_block_size);
     }
-    front()->nni_partial_lh = nni_partial_lh;
+    at(part_order[0])->nni_partial_lh = nni_partial_lh;
     
     if (!nni_scale_num) {
         nni_scale_num = aligned_alloc<UBYTE>(IT_NUM*total_mem_size);
     }
-    front()->nni_scale_num = nni_scale_num;
-
-	for (it = begin()+1, part = 0; it != end(); it++, part++) {
-		(*it)->_pattern_lh = (*(it-1))->_pattern_lh + mem_size[part];
-		(*it)->_pattern_lh_cat = (*(it-1))->_pattern_lh_cat + lh_cat_size[part];
-		(*it)->theta_all = (*(it-1))->theta_all + block_size[part];
-		(*it)->ptn_freq = (*(it-1))->ptn_freq + mem_size[part];
+    at(part_order[0])->nni_scale_num = nni_scale_num;
+
+	for (partid = 1; partid < ntrees; partid++) {
+        part = part_order[partid-1];
+        it = begin() + part_order[partid];
+        iterator prev_it = begin()+part_order[partid-1];
+		(*it)->_pattern_lh = (*prev_it)->_pattern_lh + mem_size[part];
+		(*it)->_pattern_lh_cat = (*prev_it)->_pattern_lh_cat + lh_cat_size[part];
+		(*it)->theta_all = (*prev_it)->theta_all + block_size[part];
+		(*it)->ptn_freq = (*prev_it)->ptn_freq + mem_size[part];
 		(*it)->ptn_freq_computed = false;
-		(*it)->ptn_invar = (*(it-1))->ptn_invar + mem_size[part];
-        (*it)->nni_partial_lh = (*(it-1))->nni_partial_lh + IT_NUM*block_size[part];
-        (*it)->nni_scale_num = (*(it-1))->nni_scale_num + IT_NUM*mem_size[part];
+		(*it)->ptn_invar = (*prev_it)->ptn_invar + mem_size[part];
+        (*it)->nni_partial_lh = (*prev_it)->nni_partial_lh + IT_NUM*block_size[part];
+        (*it)->nni_scale_num = (*prev_it)->nni_scale_num + IT_NUM*mem_size[part];
 	}
 
 	// compute total memory for all partitions
@@ -1981,7 +1816,8 @@ void PhyloSuperTreePlen::initializeAllPartialLh(double* &lh_addr, UBYTE* &scale_
         // assign a region in central_partial_lh to both Neihgbors (dad->node, and node->dad)
         SuperNeighbor *nei = (SuperNeighbor*) node->findNeighbor(dad);
 		SuperNeighbor *nei_back = (SuperNeighbor*) dad->findNeighbor(node);
-        for (int part = 0; part < size(); part++) {
+        for (int partid = 0; partid < size(); partid++) {
+            int part = part_order[partid];
         	PhyloNeighbor *nei_part = nei->link_neighbors[part];
         	if (!nei_part) continue;
         	PhyloNeighbor *nei_part_back = nei_back->link_neighbors[part];
@@ -2037,3 +1873,12 @@ void PhyloSuperTreePlen::initializeAllPartialLh(int &index, int &indexlh, PhyloN
 	assert(0);
 }
 
+
+string PhyloSuperTreePlen::getTreeString() {
+    return PhyloTree::getTreeString();
+}
+
+void PhyloSuperTreePlen::readTreeString(const string &tree_string) {
+    PhyloTree::readTreeString(tree_string);
+
+}
diff --git a/phylosupertreeplen.h b/phylosupertreeplen.h
index 0ccb320..aab6642 100644
--- a/phylosupertreeplen.h
+++ b/phylosupertreeplen.h
@@ -150,6 +150,19 @@ public:
 	~PhyloSuperTreePlen();
 
     /**
+            Read the tree saved with Taxon Names and branch lengths.
+            @param tree_string tree string to read from
+     */
+    virtual void readTreeString(const string &tree_string);
+
+    /**
+     * Return the tree string containing taxon names and branch lengths
+     * @return tree string
+     */
+    virtual string getTreeString();
+
+
+    /**
             compute the distance between 2 sequences.
             @param seq1 index of sequence 1
             @param seq2 index of sequence 2
@@ -195,8 +208,32 @@ public:
 	 */
 	void getNNIType(PhyloNode *node1, PhyloNode *node2, vector<NNIType> &nni_type);
 
-	virtual void computeFuncDerv(double value, double &df, double &ddf);
-	virtual double computeFunction(double value);
+    /**
+            Inherited from Optimization class.
+            This function calculate f(value), first derivative f'(value) and 2nd derivative f''(value).
+            used by Newton raphson method to minimize the function.
+            @param value current branch length
+            @param df (OUT) first derivative
+            @param ddf (OUT) second derivative
+            @return negative of likelihood (for minimization)
+     */
+    virtual void computeFuncDerv(double value, double &df, double &ddf);
+
+    /**
+            inherited from Optimization class, to return to likelihood of the tree
+            when the current branch length is set to value
+            @param value current branch length
+            @return negative of likelihood (for minimization)
+     */
+    virtual double computeFunction(double value);
+
+    /**
+            compute tree likelihood on a branch. used to optimize branch length
+            @param dad_branch the branch leading to the subtree
+            @param dad its dad, used to direct the tranversal
+            @return tree likelihood
+     */
+    virtual double computeLikelihoodBranch(PhyloNeighbor *dad_branch, PhyloNode *dad);
 
     /**
             compute tree likelihood on a branch given buffer (theta_all), used after optimizing branch length
@@ -278,7 +315,7 @@ public:
 	void mapBranchLen(int part);
 	virtual void printMapInfo();
 
-	virtual void restoreAllBrans(PhyloNode *node, PhyloNode *dad);
+//	virtual void restoreAllBrans(PhyloNode *node, PhyloNode *dad);
 
 	/**
 	 * initialize partition information for super tree
diff --git a/phylotesting.cpp b/phylotesting.cpp
index 59cc791..3988bd7 100644
--- a/phylotesting.cpp
+++ b/phylotesting.cpp
@@ -29,6 +29,7 @@
 #include "model/modelbin.h"
 #include "model/modelcodon.h"
 #include "model/modelmorphology.h"
+#include "model/modelmixture.h"
 #include "timeutil.h"
 
 #include "phyloanalysis.h"
@@ -237,29 +238,61 @@ void printSiteLh(const char*filename, PhyloTree *tree, double *ptn_lh,
 		delete[] pattern_lh;
 }
 
-void printSiteLhCategory(const char*filename, PhyloTree *tree) {
-    // TODO: mixture model!
-    if (tree->getModel()->isMixture() && !tree->getModelFactory()->fused_mix_rate)
-        outError("Unsupported feature, please contact author if you really need this", __func__);
+void printSiteLhCategory(const char*filename, PhyloTree *tree, SiteLoglType wsl) {
+
+    if (wsl == WSL_NONE || wsl == WSL_SITE)
+        return;
+    // error checking
+    if (!tree->getModel()->isMixture()) {
+        if (wsl != WSL_RATECAT) {
+            outWarning("Switch now to '-wslr' as it is the only option for non-mixture model");
+            wsl = WSL_RATECAT;
+        }
+    } else {
+        // mixture model
+        if (wsl == WSL_MIXTURE_RATECAT && tree->getModelFactory()->fused_mix_rate) {
+            outWarning("-wslmr is not suitable for fused mixture model, switch now to -wslm");
+            wsl = WSL_MIXTURE;
+        }
+    }
+	int ncat = tree->getNumLhCat(wsl);
 	double *pattern_lh, *pattern_lh_cat;
 	int i;
-	int discrete_cat = tree->getRate()->getNDiscreteRate();
 	pattern_lh = new double[tree->getAlnNPattern()];
-	pattern_lh_cat = new double[tree->getAlnNPattern()*(discrete_cat)];
-	tree->computePatternLikelihood(pattern_lh, NULL, pattern_lh_cat);
-        
+	pattern_lh_cat = new double[tree->getAlnNPattern()*ncat];
+	tree->computePatternLikelihood(pattern_lh, NULL, pattern_lh_cat, wsl);
+
+    
 	try {
 		ofstream out;
 		out.exceptions(ios::failbit | ios::badbit);
 		out.open(filename);
 		out << "Note : P(D|M) is the probability of site D given the model M (i.e., the site likelihood)" << endl;
-		out << "P(D|M,rr[x]) is the probability of site D given the model M and the relative rate" << endl;
-		out << "of evolution rr[x], where x is the class of rate to be considered." << endl;
-		out << "We have P(D|M) = \\sum_x P(x) x P(D|M,rr[x])." << endl << endl;
-		out << "Site   logP(D|M)       ";
-		for (i = 0; i < discrete_cat; i++) {
-			out << "logP(D|M,rr[" << i+1 << "]=" << tree->getRate()->getRate(i)<< ") ";
-		}
+        if (wsl == WSL_RATECAT) {
+            out << "P(D|M,rr[x]) is the probability of site D given the model M and the relative rate" << endl;
+            out << "of evolution rr[x], where x is the class of rate to be considered." << endl;
+            out << "We have P(D|M) = \\sum_x P(x) x P(D|M,rr[x])." << endl << endl;
+            out << "Site   logP(D|M)       ";
+            for (i = 0; i < ncat; i++)
+                out << "logP(D|M,rr[" << i+1 << "]=" << tree->getRate()->getRate(i)<< ") ";
+        } else if (wsl == WSL_MIXTURE) {
+            out << "P(D|M[x]) is the probability of site D given the model M[x]," << endl;
+            out << "where x is the mixture class to be considered." << endl;
+            out << "We have P(D|M) = \\sum_x P(x) x P(D|M[x])." << endl << endl;
+            out << "Site   logP(D|M)       ";
+            for (i = 0; i < ncat; i++)
+                out << "logP(D|M[" << i+1 << "]) ";
+        } else {
+            // WSL_MIXTURE_RATECAT
+            out << "P(D|M[x],rr[y]) is the probability of site D given the model M[x] and the relative rate" << endl;
+            out << "of evolution rr[y], where x and y are the mixture class and rate class, respectively." << endl;
+            out << "We have P(D|M) = \\sum_x \\sum_y P(x) x P(y) x P(D|M[x],rr[y])." << endl << endl;
+            out << "Site   logP(D|M)       ";
+            for (i = 0; i < tree->getModel()->getNMixtures(); i++)
+                for (int j = 0; j < tree->getRate()->getNRate(); j++) {
+                    out << "logP(D|M[" << i+1 << "],rr[" << j+1 << "]=" << tree->getRate()->getRate(j) << ") ";
+                }
+        }
 		out << endl;
 		IntVector pattern_index;
 		tree->aln->getSitePatternIndex(pattern_index);
@@ -268,9 +301,9 @@ void printSiteLhCategory(const char*filename, PhyloTree *tree) {
 			out << left << i+1 << " ";
 			out.width(15);
 			out << pattern_lh[pattern_index[i]] << " ";
-			for (int j = 0; j < discrete_cat; j++) {
+			for (int j = 0; j < ncat; j++) {
 				out.width(15);
-				out << pattern_lh_cat[pattern_index[i]*discrete_cat+j] << " ";
+				out << pattern_lh_cat[pattern_index[i]*ncat+j] << " ";
 			}
 			out << endl;
 		}
@@ -384,9 +417,11 @@ int getModelList(Params &params, Alignment *aln, StrVector &models, bool separat
 	const char *rate_options[]    = {  "", "+I", "+ASC", "+G", "+I+G", "+ASC+G", "+R", "+ASC+R"};
 	bool test_options_default[]   = {true, true,  false, true,   true,    false,false,    false};
 	bool test_options_morph[]     = {true,false,   true, true,  false,     true,false,    false};    
+	bool test_options_noASC_I[]   = {true,false,  false, true,  false,    false,false,    false};    
 	bool test_options_asc[]       ={false,false,   true,false,  false,     true,false,    false};
 	bool test_options_new[]       = {true, true,  false, true,   true,    false, true,    false};
 	bool test_options_morph_new[] = {true,false,   true, true,  false,     true, true,     true};
+	bool test_options_noASC_I_new[] = {true,false,  false, true,  false,    false, true,    false};
 	bool test_options_asc_new[]   ={false,false,   true,false,  false,     true,false,     true};
     bool *test_options = test_options_default;
 //	bool test_options_codon[] =  {true,false,  false,false,  false,    false};
@@ -508,15 +543,19 @@ int getModelList(Params &params, Alignment *aln, StrVector &models, bool separat
 //		for (i = 0; i < noptions; i++)
 //			test_options[i] = test_options_codon[i];
 //	} else 
-    if (seq_type == SEQ_MORPH || aln->frac_const_sites == 0.0) {
+    if (seq_type == SEQ_MORPH || (aln->frac_const_sites == 0.0)) {
         // morphological or SNP data: activate +ASC
         if (with_new) {
             if (with_asc)
                 test_options = test_options_asc_new;
+            else if (seq_type == SEQ_PROTEIN)
+                test_options = test_options_noASC_I_new;
             else
                 test_options = test_options_morph_new;
         } else if (with_asc)
             test_options = test_options_asc;
+        else if (seq_type == SEQ_PROTEIN)
+            test_options = test_options_noASC_I;
         else
             test_options = test_options_morph;
 	} else {
@@ -571,7 +610,7 @@ int getModelList(Params &params, Alignment *aln, StrVector &models, bool separat
             for (int k = params.min_rate_cats+1; k <= params.max_rate_cats; k++) {
                 ratehet.insert(ratehet.begin()+j+k-params.min_rate_cats, str.substr(0, pos+2) + convertIntToString(k) + str.substr(pos+2));
             }
-            break;
+//            break;
         }
 
     if (separate_rate) {
@@ -586,6 +625,11 @@ int getModelList(Params &params, Alignment *aln, StrVector &models, bool separat
                 models.push_back(model_names[i] + ratehet[j]);
             }
     }
+    if (params.model_extra_set) {
+        StrVector extra_model_names;
+        convert_string_vec(params.model_extra_set, extra_model_names);        
+        models.insert(models.end(), extra_model_names.begin(), extra_model_names.end());
+    }
     return max_cats;
 }
 
@@ -752,7 +796,7 @@ void printModelFile(ostream &fmodel, Params &params, PhyloTree *tree, ModelInfo
  * @param model_info (IN/OUT) all model information
  * @return total number of parameters
  */
-void testPartitionModel(Params &params, PhyloSuperTree* in_tree, vector<ModelInfo> &model_info, ostream &fmodel) {
+void testPartitionModel(Params &params, PhyloSuperTree* in_tree, vector<ModelInfo> &model_info, ostream &fmodel, ModelsBlock *models_block ) {
 //    params.print_partition_info = true;
 //    params.print_conaln = true;
 	int i = 0;
@@ -811,7 +855,7 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, vector<ModelInf
         stringstream this_fmodel;
 		// do the computation
 //#ifdef _OPENMP
-		string model = testModel(params, this_tree, part_model_info, this_fmodel, in_tree->part_info[i].name);
+		string model = testModel(params, this_tree, part_model_info, this_fmodel, models_block, in_tree->part_info[i].name);
 //#else
 //		string model = testModel(params, this_tree, part_model_info, fmodel, in_tree->part_info[i].name);
 //#endif
@@ -952,7 +996,7 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, vector<ModelInf
                 tree->setAlignment(aln);
                 extractModelInfo(set_name, model_info, part_model_info);
 //#ifdef _OPENMP
-                model = testModel(params, tree, part_model_info, this_fmodel, set_name);
+                model = testModel(params, tree, part_model_info, this_fmodel, models_block, set_name);
 //#else
 //                model = testModel(params, tree, part_model_info, fmodel, set_name);
 //#endif
@@ -1060,7 +1104,9 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, vector<ModelInf
 	in_tree->printBestPartitionRaxml((string(params.out_prefix) + ".best_scheme").c_str());
 }
 
-string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_info, ostream &fmodel, string set_name, bool print_mem_usage) {
+string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_info, ostream &fmodel, ModelsBlock *models_block,
+    string set_name, bool print_mem_usage) 
+{
 	SeqType seq_type = in_tree->aln->seq_type;
 	if (in_tree->isSuperTree())
 		seq_type = ((PhyloSuperTree*)in_tree)->front()->aln->seq_type;
@@ -1096,7 +1142,7 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
 	if (in_tree->isSuperTree()) {
 		// select model for each partition
 		PhyloSuperTree *stree = (PhyloSuperTree*)in_tree;
-		testPartitionModel(params, stree, model_info, fmodel);
+		testPartitionModel(params, stree, model_info, fmodel, models_block);
 		string res_models = "";
 		for (vector<PartitionInfo>::iterator it = stree->part_info.begin(); it != stree->part_info.end(); it++) {
 			if (it != stree->part_info.begin()) res_models += ",";
@@ -1160,7 +1206,7 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
 		it->BIC_score = DBL_MAX;
 	}
 
-	int num_cat = 0;
+	uint64_t RAM_requirement = 0;
     int model_aic = -1, model_aicc = -1, model_bic = -1;
     string prev_tree_string = "";
     int prev_model_id = -1;
@@ -1186,82 +1232,109 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
             model_names[model] = best_model + model_names[model];
         }
 		PhyloTree *tree = in_tree;
+        ModelFactory *this_model_fac = NULL;
+        bool mixture_model = false;
+        int ncat = 0;
+        string orig_name = params.model_name;
         
-        if (model_names[model].find("+ASC") != string::npos) {
-            model_fac->unobserved_ptns = in_tree->aln->getUnobservedConstPatterns();
-            if (model_fac->unobserved_ptns.size() == 0) {
-                cout.width(3);
-                cout << right << model+1 << "  ";
-                cout.width(13);
-                cout << left << model_names[model] << " ";                
-                cout << "Skipped since +ASC is not applicable" << endl;
-                continue;
+        if (models_block->findMixModel(model_names[model])) {
+            // mixture model
+            try {
+                mixture_model = true;
+                params.model_name = model_names[model];
+                this_model_fac = new ModelFactory(params, tree, models_block);
+                tree->setModelFactory(this_model_fac);
+                tree->setModel(this_model_fac->model);
+                tree->setRate(this_model_fac->site_rate);
+                tree->deleteAllPartialLh();
+                tree->initializeAllPartialLh();
+                RAM_requirement = max(RAM_requirement, tree->getMemoryRequired());
+            } catch (string &str) {
+                outError("Invalid -madd model " + model_names[model] + ": " + str);
             }
-            tree->aln->buildSeqStates(true);
-            if (model_fac->unobserved_ptns.size() < tree->aln->getNumNonstopCodons())
-                outError("Invalid use of +ASC because constant patterns are observed in the alignment");
         } else {
-            model_fac->unobserved_ptns = "";
-            tree->aln->buildSeqStates(false);
-        }
-        // initialize tree
-        // initialize model
-        subst_model->setTree(tree);
-        StateFreqType freq_type = FREQ_UNKNOWN;
-        if (model_names[model].find("+F1X4") != string::npos)
-            freq_type = FREQ_CODON_1x4;
-        else if (model_names[model].find("+F3X4C") != string::npos)
-            freq_type = FREQ_CODON_3x4C;
-        else if (model_names[model].find("+F3X4") != string::npos)
-            freq_type = FREQ_CODON_3x4;
-        else if (model_names[model].find("+FQ") != string::npos)
-            freq_type = FREQ_EQUAL;
-        else if (model_names[model].find("+F") != string::npos)
-            freq_type = FREQ_EMPIRICAL;
-            
-        subst_model->init(model_names[model].substr(0, model_names[model].find('+')).c_str(), "", freq_type, "");
-        tree->params = ¶ms;
-
-        tree->setModel(subst_model);
-        // initialize rate
-        size_t pos;
-        int ncat = 0;
-        if ((pos = model_names[model].find("+R")) != string::npos) {
-            ncat = params.num_rate_cats;
-            if (model_names[model].length() > pos+2 && isdigit(model_names[model][pos+2])) {
-                ncat = convert_int(model_names[model].c_str() + pos+2);
-//                tree->getRate()->setNCategory(ncat);
-            }
-            if (ncat <= 1) outError("Number of rate categories for " + model_names[model] + " is <= 1");
-            if (ncat > params.max_rate_cats)
-                outError("Number of rate categories for " + model_names[model] + " exceeds " + convertIntToString(params.max_rate_cats));
-            tree->setRate(rate_class[2+ncat]);
-        } else if (model_names[model].find("+I") != string::npos && (pos = model_names[model].find("+G")) != string::npos) {
-            tree->setRate(rate_class[3]);
-            if (model_names[model].length() > pos+2 && isdigit(model_names[model][pos+2])) {
-                int ncat = convert_int(model_names[model].c_str() + pos+2);
-                if (ncat < 1) outError("Wrong number of category for +G in " + model_names[model]);
-                tree->getRate()->setNCategory(ncat);
-            }
-        } else if ((pos = model_names[model].find("+G")) != string::npos) {
-            tree->setRate(rate_class[2]);
-            if (model_names[model].length() > pos+2 && isdigit(model_names[model][pos+2])) {
-                ncat = convert_int(model_names[model].c_str() + pos+2);
-                if (ncat < 1) outError("Wrong number of category for +G in " + model_names[model]);
-                tree->getRate()->setNCategory(ncat);
+            // kernel might be changed if mixture model was tested
+            in_tree->setLikelihoodKernel(params.SSE);
+            // normal model
+            if (model_names[model].find("+ASC") != string::npos) {
+                model_fac->unobserved_ptns = in_tree->aln->getUnobservedConstPatterns();
+                if (model_fac->unobserved_ptns.size() == 0) {
+                    cout.width(3);
+                    cout << right << model+1 << "  ";
+                    cout.width(13);
+                    cout << left << model_names[model] << " ";                
+                    cout << "Skipped since +ASC is not applicable" << endl;
+                    continue;
+                }
+                tree->aln->buildSeqStates(true);
+                if (model_fac->unobserved_ptns.size() < tree->aln->getNumNonstopCodons())
+                    outError("Invalid use of +ASC because constant patterns are observed in the alignment");
+            } else {
+                model_fac->unobserved_ptns = "";
+                tree->aln->buildSeqStates(false);
             }
-        } else if (model_names[model].find("+I") != string::npos)
-            tree->setRate(rate_class[1]);
-        else
-            tree->setRate(rate_class[0]);
-
-        tree->getRate()->setTree(tree);
+            // initialize tree
+            // initialize model
+            subst_model->setTree(tree);
+            StateFreqType freq_type = FREQ_UNKNOWN;
+            if (model_names[model].find("+F1X4") != string::npos)
+                freq_type = FREQ_CODON_1x4;
+            else if (model_names[model].find("+F3X4C") != string::npos)
+                freq_type = FREQ_CODON_3x4C;
+            else if (model_names[model].find("+F3X4") != string::npos)
+                freq_type = FREQ_CODON_3x4;
+            else if (model_names[model].find("+FQ") != string::npos)
+                freq_type = FREQ_EQUAL;
+            else if (model_names[model].find("+FO") != string::npos)
+                freq_type = FREQ_ESTIMATE;
+            else if (model_names[model].find("+FU") != string::npos)
+                freq_type = FREQ_USER_DEFINED;
+            else if (model_names[model].find("+F") != string::npos)
+                freq_type = FREQ_EMPIRICAL;
+                
+            subst_model->init(model_names[model].substr(0, model_names[model].find('+')).c_str(), "", freq_type, "");
+            tree->params = ¶ms;
+
+            tree->setModel(subst_model);
+            // initialize rate
+            size_t pos;
+            if ((pos = model_names[model].find("+R")) != string::npos) {
+                ncat = params.num_rate_cats;
+                if (model_names[model].length() > pos+2 && isdigit(model_names[model][pos+2])) {
+                    ncat = convert_int(model_names[model].c_str() + pos+2);
+    //                tree->getRate()->setNCategory(ncat);
+                }
+                if (ncat <= 1) outError("Number of rate categories for " + model_names[model] + " is <= 1");
+                if (ncat > params.max_rate_cats)
+                    outError("Number of rate categories for " + model_names[model] + " exceeds " + convertIntToString(params.max_rate_cats));
+                tree->setRate(rate_class[2+ncat]);
+            } else if (model_names[model].find("+I") != string::npos && (pos = model_names[model].find("+G")) != string::npos) {
+                tree->setRate(rate_class[3]);
+                if (model_names[model].length() > pos+2 && isdigit(model_names[model][pos+2])) {
+                    int ncat = convert_int(model_names[model].c_str() + pos+2);
+                    if (ncat < 1) outError("Wrong number of category for +G in " + model_names[model]);
+                    tree->getRate()->setNCategory(ncat);
+                }
+            } else if ((pos = model_names[model].find("+G")) != string::npos) {
+                tree->setRate(rate_class[2]);
+                if (model_names[model].length() > pos+2 && isdigit(model_names[model][pos+2])) {
+                    ncat = convert_int(model_names[model].c_str() + pos+2);
+                    if (ncat < 1) outError("Wrong number of category for +G in " + model_names[model]);
+                    tree->getRate()->setNCategory(ncat);
+                }
+            } else if (model_names[model].find("+I") != string::npos)
+                tree->setRate(rate_class[1]);
+            else
+                tree->setRate(rate_class[0]);
 
-        // initialize model factory
-        model_fac->model = subst_model;
-        model_fac->site_rate = tree->getRate();
-        tree->setModelFactory(model_fac);
+            tree->getRate()->setTree(tree);
 
+            // initialize model factory
+            model_fac->model = subst_model;
+            model_fac->site_rate = tree->getRate();
+            tree->setModelFactory(model_fac);
+        }
+        
         tree->clearAllPartialLH();
 
 
@@ -1269,7 +1342,10 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
 		ModelInfo info;        
 		info.set_name = set_name;
 		info.df = tree->getModelFactory()->getNParameters();
-		info.name = tree->getModelName();
+        if (mixture_model)
+            info.name = model_names[model];
+        else
+            info.name = tree->getModelName();
 		int model_id = -1;
         if (skip_model) {
             assert(prev_model_id>=0);
@@ -1290,11 +1366,14 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
             info.tree = model_info[model_id].tree;
             prev_tree_string = model_info[model_id].tree;
         } else if (skip_model) {
+            assert(prev_model_id >= 0);
+            if (prev_model_id >= 0) {
             info.logl = model_info[prev_model_id].logl;
             info.tree_len = model_info[prev_model_id].tree_len;
 //            info.tree = model_info[prev_model_id].tree;
-            prev_tree_string = model_info[prev_model_id].tree;
+//            prev_tree_string = model_info[prev_model_id].tree;
 //            cout << "Skipped " << info.name << endl;
+            }
 		} else {
             if (params.model_test_and_tree) {
                 string original_model = params.model_name;
@@ -1317,15 +1396,20 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
                 params.user_file = orig_user_tree;
                 tree = iqtree;
             } else {
-                if (tree->getRate()->getNRate() > num_cat) {
+                if (tree->getMemoryRequired() > RAM_requirement) {
                     tree->deleteAllPartialLh();
-                    num_cat = tree->getRate()->getNRate();
-                    tree->initializeAllPartialLh();
+                    RAM_requirement = tree->getMemoryRequired();
                 }
+                tree->initializeAllPartialLh();
                 if (prev_tree_string != "") {
                     tree->readTreeString(prev_tree_string);
                 }
                 prev_tree_string = "";
+                if (model_fac->unobserved_ptns.size() > 0 && tree->aln->seq_type == SEQ_PROTEIN) {
+                    // treatment for +ASC for protein data
+                    tree->fixNegativeBranch(true);
+                    tree->clearAllPartialLH();
+                }
                 info.logl = tree->getModelFactory()->optimizeParameters(false, false, TOL_LIKELIHOOD_MODELTEST, TOL_GRADIENT_MODELTEST);
                 info.tree_len = tree->treeLength();
                 if (prev_model_id >= 0) {
@@ -1356,7 +1440,7 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
             size_t pos_r = info.name.find("+R");
             if ( prev_pos_r != string::npos &&  pos_r != string::npos && 
             model_info[prev_model_id].name.substr(0,prev_pos_r) == info.name.substr(0, pos_r)) {
-                switch (params.model_test_stop_rule) {
+                switch (params.model_test_criterion) {
                 case MTC_ALL:
                     if (info.AIC_score > model_info[prev_model_id].AIC_score && info.AICc_score > model_info[prev_model_id].AICc_score &&
                         info.BIC_score > model_info[prev_model_id].BIC_score) {
@@ -1401,6 +1485,14 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
 		if (model_bic < 0 || model_info[model_id].BIC_score < model_info[model_bic].BIC_score)
 			model_bic = model_id;
         
+        if (mixture_model) {
+            delete this_model_fac->model;
+            delete this_model_fac->site_rate;
+            delete this_model_fac;
+            this_model_fac = NULL;
+            params.model_name = orig_name;
+        }
+        
         in_tree->setModel(NULL);
         in_tree->setModelFactory(NULL);
         in_tree->setRate(NULL);
@@ -1729,6 +1821,9 @@ void evaluateTrees(Params &params, IQTree *tree, vector<TreeInfo> &info, IntVect
 		tree->freeNode();
 		tree->readTree(in, params.is_rooted);
 		tree->setAlignment(tree->aln);
+        tree->setRootNode(params.root);
+		if (tree->isSuperTree())
+			((PhyloSuperTree*) tree)->mapTrees();
 		if ((tree->sse == LK_EIGEN || tree->sse == LK_EIGEN_SSE) && !tree->isBifurcating()) {
 			cout << "NOTE: Changing to old kernel as user tree is multifurcating" << endl;
 			if (tree->sse == LK_EIGEN)
@@ -1739,8 +1834,6 @@ void evaluateTrees(Params &params, IQTree *tree, vector<TreeInfo> &info, IntVect
 
 		tree->initializeAllPartialLh();
 		tree->fixNegativeBranch(false);
-		if (tree->isSuperTree())
-			((PhyloSuperTree*) tree)->mapTrees();
 		if (!params.fixed_branch_length) {
 			tree->setCurScore(tree->optimizeAllBranches(100, 0.001));
 		} else {
diff --git a/phylotesting.h b/phylotesting.h
index d1b1585..06747c9 100644
--- a/phylotesting.h
+++ b/phylotesting.h
@@ -68,7 +68,7 @@ bool checkModelFile(string model_file, bool is_partitioned, vector<ModelInfo> &i
  @return name of best-fit-model
  */
 string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_info, ostream &fmodel,
-		string set_name = "", bool print_mem_usage = false);
+		ModelsBlock *models_block, string set_name = "", bool print_mem_usage = false);
 
 /**
  * print site log likelihoods to a fileExists
@@ -86,7 +86,7 @@ void printSiteLh(const char*filename, PhyloTree *tree, double *ptn_lh = NULL,
  * @param filename output file name
  * @param tree phylogenetic tree
  */
-void printSiteLhCategory(const char*filename, PhyloTree *tree);
+void printSiteLhCategory(const char*filename, PhyloTree *tree, SiteLoglType wsl);
 
 /**
  * Evaluate user-trees with possibility of tree topology tests
diff --git a/phylotree.cpp b/phylotree.cpp
index 65554be..3670008 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -87,6 +87,7 @@ void PhyloTree::init() {
     ptn_invar = NULL;
     subTreeDistComputed = false;
     dist_matrix = NULL;
+    var_matrix = NULL;
     setLikelihoodKernel(LK_SSE);  // FOR TUNG: you forgot to initialize this variable!
     save_all_trees = 0;
     nodeBranchDists = NULL;
@@ -107,6 +108,7 @@ void PhyloTree::init() {
     params = NULL;
     current_scaling = 1.0;
     is_opt_scaling = false;
+    num_partial_lh_computations = 0;
 }
 
 PhyloTree::PhyloTree(Alignment *aln) : MTree() {
@@ -118,6 +120,16 @@ void PhyloTree::discardSaturatedSite(bool val) {
     discard_saturated_site = val;
 }
 
+void myPartitionsDestroy(partitionList *pl) {
+	int i;
+	for (i = 0; i < pl->numberOfPartitions; i++) {
+		rax_free(pl->partitionData[i]->partitionName);
+		rax_free(pl->partitionData[i]);
+	}
+	rax_free(pl->partitionData);
+	rax_free(pl);
+}
+
 PhyloTree::~PhyloTree() {
     if (nni_scale_num)
         aligned_free(nni_scale_num);
@@ -137,10 +149,13 @@ PhyloTree::~PhyloTree() {
     central_partial_pars = NULL;
     if (model_factory)
         delete model_factory;
+    model_factory = NULL;
     if (model)
         delete model;
+    model = NULL;
     if (site_rate)
         delete site_rate;
+    site_rate = NULL;
 //    if (tmp_scale_num1)
 //        delete[] tmp_scale_num1;
 //    if (tmp_scale_num2)
@@ -157,27 +172,53 @@ PhyloTree::~PhyloTree() {
     //	delete [] tmp_ptn_rates;
     if (_pattern_lh_cat)
         aligned_free(_pattern_lh_cat);
+    _pattern_lh_cat = NULL;
     if (_pattern_lh)
         aligned_free(_pattern_lh);
+    _pattern_lh = NULL;
     //if (state_freqs)
     //	delete [] state_freqs;
     if (theta_all)
         aligned_free(theta_all);
+    theta_all = NULL;
     if (ptn_freq)
         aligned_free(ptn_freq);
+    ptn_freq = NULL;
     ptn_freq_computed = false;
     if (ptn_invar)
     	aligned_free(ptn_invar);
+    ptn_invar = NULL;
     if (dist_matrix)
     	delete[] dist_matrix;
+    dist_matrix = NULL;
+
+    if (var_matrix)
+        delete[] var_matrix;
+    var_matrix = NULL;
+
+    if (pllPartitions)
+    	myPartitionsDestroy(pllPartitions);
+    if (pllAlignment)
+    	pllAlignmentDataDestroy(pllAlignment);
+    if (pllInst)
+        pllDestroyInstance(pllInst);
+
+    pllPartitions = NULL;
+    pllAlignment = NULL;
+    pllInst = NULL;
+    
 }
 
 void PhyloTree::readTree(const char *infile, bool &is_rooted) {
 	MTree::readTree(infile, is_rooted);
+    // 2015-10-14: has to reset this pointer when read in
+    current_it = current_it_back = NULL;
 }
 
 void PhyloTree::readTree(istream &in, bool &is_rooted) {
 	MTree::readTree(in, rooted);
+    // 2015-10-14: has to reset this pointer when read in
+    current_it = current_it_back = NULL;
 	// remove taxa if necessary
 	if (removed_seqs.size() > 0)
 		removeTaxa(removed_seqs);
@@ -254,6 +295,7 @@ void PhyloTree::setAlignment(Alignment *alignment) {
             node->id = seq;
         }
     }
+    if (err) outError("Tree taxa and alignment sequence do not match (see above)");
     StrVector taxname;
     getTaxaName(taxname);
     for (StrVector::iterator it = taxname.begin(); it != taxname.end(); it++)
@@ -395,6 +437,8 @@ void PhyloTree::clearAllPartialLH(bool make_null) {
         return;
     ((PhyloNode*) root->neighbors[0]->node)->clearAllPartialLh(make_null, (PhyloNode*) root);
     tip_partial_lh_computed = false;
+    // 2015-10-14: has to reset this pointer when read in
+    current_it = current_it_back = NULL;
 }
 
 void PhyloTree::computeAllPartialLh(PhyloNode *node, PhyloNode *dad) {
@@ -410,7 +454,7 @@ void PhyloTree::computeAllPartialLh(PhyloNode *node, PhyloNode *dad) {
 }
 
 string PhyloTree::getModelName() {
-	string name = model->name;
+	string name = model->getName();
 	if (model_factory->unobserved_ptns.size() > 0)
 		name += "+ASC";
 	if (model_factory->fused_mix_rate) {
@@ -418,18 +462,6 @@ string PhyloTree::getModelName() {
 	} else {
 		name += site_rate->name;
 	}
-	if (model->getFreqType() == FREQ_EMPIRICAL)
-		name += "+F";
-	else if (model->getFreqType() == FREQ_CODON_1x4)
-		name += "+F1X4";
-	else if (model->getFreqType() == FREQ_CODON_3x4)
-		name += "+F3X4";
-	else if (model->getFreqType() == FREQ_CODON_3x4C)
-		name += "+F3X4C";
-	else if (model->getFreqType() == FREQ_ESTIMATE && aln->seq_type != SEQ_DNA)
-		name += "+FO";
-	else if (model->getFreqType() == FREQ_EQUAL && aln->seq_type != SEQ_DNA)
-		name += "+FQ";
 	return name;
 }
 
@@ -445,33 +477,6 @@ string PhyloTree::getModelNameParams() {
 		name += rate_name;
 	}
 
-	if (model->getFreqType() == FREQ_EMPIRICAL || (model->getFreqType() == FREQ_USER_DEFINED && aln->seq_type == SEQ_DNA)) {
-		name += "+F";
-        double *state_freq = new double[model->num_states];
-        model->getStateFrequency(state_freq);
-        name += "{" + convertDoubleToString(state_freq[0]);
-        for (int i = 1; i < model->num_states; i++)
-            name += "," + convertDoubleToString(state_freq[i]);
-        name += "}";
-        delete [] state_freq;
-	} else if (model->getFreqType() == FREQ_CODON_1x4)
-		name += "+F1X4";
-	else if (model->getFreqType() == FREQ_CODON_3x4)
-		name += "+F3X4";
-	else if (model->getFreqType() == FREQ_CODON_3x4C)
-		name += "+F3X4C";
-	else if (model->getFreqType() == FREQ_ESTIMATE) {
-		name += "+FO";
-        double *state_freq = new double[model->num_states];
-        model->getStateFrequency(state_freq);
-        name += "{" + convertDoubleToString(state_freq[0]);
-        for (int i = 1; i < model->num_states; i++)
-            name += "," + convertDoubleToString(state_freq[i]);
-        name += "}";
-        delete [] state_freq;
-    }
-	else if (model->getFreqType() == FREQ_EQUAL && aln->seq_type != SEQ_DNA)
-		name += "+FQ";
 	return name;
 }
 
@@ -1261,6 +1266,12 @@ uint64_t PhyloTree::getMemoryRequired(size_t ncategory) {
             mem_size -= ((uint64_t)leafNum*2 - 4) * ((uint64_t)block_size*sizeof(double) + nptn * sizeof(UBYTE));
         }
     }
+	uint64_t tip_partial_lh_size;
+    if (model)
+        tip_partial_lh_size = aln->num_states * (aln->STATE_UNKNOWN+1) * model->getNMixtures() * sizeof(double);
+    else
+        tip_partial_lh_size = aln->num_states * (aln->STATE_UNKNOWN+1) * sizeof(double);
+    mem_size += tip_partial_lh_size;
     if (params->gbo_replicates)
         mem_size += params->gbo_replicates*nptn*sizeof(BootValType);
     return mem_size;
@@ -1471,16 +1482,33 @@ UBYTE *PhyloTree::newScaleNum() {
     return aligned_alloc<UBYTE>(aln->size()+aln->num_states);
 }
 
+Node *findFirstFarLeaf(Node *node, Node *dad = NULL) {
+
+    do {
+        FOR_NEIGHBOR_IT(node, dad, it) {
+            dad = node;
+            node = (*it)->node;
+            break; 
+        }
+    } while (!node->isLeaf());
+    return node;
+    
+}
+
 double PhyloTree::computeLikelihood(double *pattern_lh) {
     assert(model);
     assert(site_rate);
     assert(root->isLeaf());
-    PhyloNeighbor *nei = ((PhyloNeighbor*) root->neighbors[0]);
-    current_it = nei;
-    assert(current_it);
-    current_it_back = (PhyloNeighbor*) nei->node->findNeighbor(root);
-    assert(current_it_back);
-
+    if (!current_it) {
+        Node *leaf = findFarthestLeaf();
+        current_it = (PhyloNeighbor*)leaf->neighbors[0];
+        current_it_back = (PhyloNeighbor*)current_it->node->findNeighbor(leaf);
+//        PhyloNeighbor *nei = ((PhyloNeighbor*) root->neighbors[0]);
+//        current_it = nei;
+//        assert(current_it);
+//        current_it_back = (PhyloNeighbor*) nei->node->findNeighbor(root);
+//        assert(current_it_back);
+    }
     double score;
     string root_name = ROOT_NAME;
     Node *vroot = findLeafName(root_name);
@@ -1489,16 +1517,16 @@ double PhyloTree::computeLikelihood(double *pattern_lh) {
             cout << __func__ << " HIT ROOT STATE " << endl;
         score = computeLikelihoodRooted((PhyloNeighbor*) vroot->neighbors[0], (PhyloNode*) vroot);
     } else {
-        score = computeLikelihoodBranch(nei, (PhyloNode*) root);
+        score = computeLikelihoodBranch(current_it, (PhyloNode*) current_it_back->node);
     }
     if (pattern_lh)
         memmove(pattern_lh, _pattern_lh, aln->size() * sizeof(double));
 
-    if (pattern_lh && nei->lh_scale_factor < 0.0) {
+    if (pattern_lh && current_it->lh_scale_factor < 0.0) {
         int nptn = aln->getNPattern();
         //double check_score = 0.0;
         for (int i = 0; i < nptn; i++) {
-            pattern_lh[i] += max(nei->scale_num[i], UBYTE(0)) * LOG_SCALING_THRESHOLD;
+            pattern_lh[i] += max(current_it->scale_num[i], UBYTE(0)) * LOG_SCALING_THRESHOLD;
             //check_score += (pattern_lh[i] * (aln->at(i).frequency));
         }
         /*       if (fabs(score - check_score) > 1e-6) {
@@ -1527,37 +1555,89 @@ double PhyloTree::computeLikelihoodRooted(PhyloNeighbor *dad_branch, PhyloNode *
     return score;
 }
 
-void PhyloTree::computePatternLikelihood(double *ptn_lh, double *cur_logl, double *ptn_lh_cat) {
+int PhyloTree::getNumLhCat(SiteLoglType wsl) {
+    int ncat = 0;
+    switch (wsl) {
+    case WSL_NONE: assert(0 && "is not WSL_NONE"); return 0;
+    case WSL_SITE: assert(0 && "is not WSL_SITE"); return 0;
+    case WSL_MIXTURE_RATECAT: 
+        ncat = getRate()->getNDiscreteRate();
+        if (getModel()->isMixture() && !getModelFactory()->fused_mix_rate)
+            ncat *= getModel()->getNMixtures();
+        return ncat;
+    case WSL_RATECAT:
+        return getRate()->getNDiscreteRate();
+    case WSL_MIXTURE:
+        return getModel()->getNMixtures();
+    }
+}
+
+double PhyloTree::computePatternLhCat(SiteLoglType wsl) {
+    if (!current_it) {
+        Node *leaf = findFirstFarLeaf(root);
+        current_it = (PhyloNeighbor*)leaf->neighbors[0];
+        current_it_back = (PhyloNeighbor*)current_it->node->findNeighbor(leaf);
+    }
+    if (sse == LK_NORMAL || sse == LK_SSE) {
+        if (getModel()->isMixture())
+            outError("Naive kernel does not support mixture models, contact author if you really need this feature");
+        return computeLikelihoodBranchNaive(current_it, (PhyloNode*)current_it_back->node);
+    } else if (!getModel()->isMixture())
+        return computeLikelihoodBranchEigen(current_it, (PhyloNode*)current_it_back->node);
+    else if (getModelFactory()->fused_mix_rate)
+        return computeMixrateLikelihoodBranchEigen(current_it, (PhyloNode*)current_it_back->node);
+    else {
+        double score = computeMixtureLikelihoodBranchEigen(current_it, (PhyloNode*)current_it_back->node);
+        if (wsl == WSL_MIXTURE_RATECAT) return score;
+        
+        double *lh_cat = _pattern_lh_cat;
+        double *lh_res = _pattern_lh_cat;
+        size_t ptn, nptn = aln->getNPattern();
+        size_t m, nmixture = getModel()->getNMixtures();
+        size_t c, ncat = getRate()->getNRate();
+        if (wsl == WSL_MIXTURE && ncat > 1) {
+            // transform to lh per mixture class
+            for (ptn = 0; ptn < nptn; ptn++) {
+                for (m = 0; m < nmixture; m++) {
+                    double lh = lh_cat[0];
+                    for (c = 1; c < ncat; c++)
+                        lh += lh_cat[c];
+                    lh_res[m] = lh;
+                    lh_cat += ncat;
+                }
+                lh_res += nmixture;
+            }
+        } else if (wsl == WSL_RATECAT && nmixture > 1) {
+            // transform to lh per rate category
+            for (ptn = 0; ptn < nptn; ptn++) {
+                if (lh_res != lh_cat)
+                    memcpy(lh_res, lh_cat, ncat*sizeof(double));
+                lh_cat += ncat;
+                for (m = 1; m < nmixture; m++) {
+                    for (c = 0; c < ncat; c++)
+                        lh_res[c] += lh_cat[c];
+                    lh_cat += ncat;
+                }
+                lh_res += ncat;
+            }
+        }
+        return score;
+    }
+}
+
+
+void PhyloTree::computePatternLikelihood(double *ptn_lh, double *cur_logl, double *ptn_lh_cat, SiteLoglType wsl) {
     /*	if (!dad_branch) {
      dad_branch = (PhyloNeighbor*) root->neighbors[0];
      dad = (PhyloNode*) root;
      }*/
     int nptn = aln->getNPattern();
     int i;
-    int ncat = site_rate->getNDiscreteRate();
-    if (getModel()->isMixture() && !getModelFactory()->fused_mix_rate)
-        ncat *= getModel()->getNMixtures();
+    int ncat = getNumLhCat(wsl);
     if (ptn_lh_cat) {
     	// Right now only Naive version store _pattern_lh_cat!
-    	if (sse == LK_NORMAL || sse == LK_SSE)
-    		computeLikelihoodBranchNaive(current_it, (PhyloNode*)current_it_back->node);
-    	else {
-//    		switch (aln->num_states) {
-//    		case 4: computeLikelihoodBranchEigen<4>(current_it, (PhyloNode*)current_it_back->node); break;
-//    		case 20: computeLikelihoodBranchEigen<20>(current_it, (PhyloNode*)current_it_back->node); break;
-//    		case 2: computeLikelihoodBranchEigen<2>(current_it, (PhyloNode*)current_it_back->node); break;
-//    		case 64: computeLikelihoodBranchEigen<64>(current_it, (PhyloNode*)current_it_back->node); break;
-//    		default: outError("Option unsupported yet for this sequence type. Contact author if you really need it."); break;
-//    		}
-            if (!getModel()->isMixture()) {
-                computeLikelihoodBranchEigen(current_it, (PhyloNode*)current_it_back->node); 
-            } else if (getModelFactory()->fused_mix_rate) {
-                computeMixrateLikelihoodBranchEigen(current_it, (PhyloNode*)current_it_back->node); 
-            } else {
-                computeMixtureLikelihoodBranchEigen(current_it, (PhyloNode*)current_it_back->node); 
-            }
-        }
-    }
+        computePatternLhCat(wsl);
+    } 
     
     double sum_scaling = current_it->lh_scale_factor + current_it_back->lh_scale_factor;
     //double sum_scaling = 0.0;
@@ -1624,14 +1704,7 @@ void PhyloTree::computePatternLikelihood(double *ptn_lh, double *cur_logl, doubl
 int PhyloTree::computePatternCategories(IntVector *pattern_ncat) {
     if (sse != LK_EIGEN) {
         // compute _pattern_lh_cat
-        if (!getModel()->isMixture())
-            computeLikelihoodBranchEigen((PhyloNeighbor*)root->neighbors[0], (PhyloNode*)root);
-        else if (getModelFactory()->fused_mix_rate) {
-            computeMixrateLikelihoodBranchEigen((PhyloNeighbor*)root->neighbors[0], (PhyloNode*)root);
-            assert(getModel()->getNMixtures() == getRate()->getNRate());
-        } else {
-            computeMixtureLikelihoodBranchEigen((PhyloNeighbor*)root->neighbors[0], (PhyloNode*)root);
-        }
+        computePatternLhCat(WSL_MIXTURE_RATECAT);
     }
     
 	size_t npattern = aln->getNPattern();
@@ -2377,8 +2450,8 @@ double PhyloTree::computeBayesianBranchLength(PhyloNeighbor *dad_branch, PhyloNo
 
     }
     obsLen /= getAlnNSite();
-    if (obsLen < MIN_BRANCH_LEN)
-        obsLen = MIN_BRANCH_LEN;
+    if (obsLen < params->min_branch_length)
+        obsLen = params->min_branch_length;
     delete[] tmp_anscentral_state_prob2;
     delete[] tmp_anscentral_state_prob1;
     delete[] tmp_state_freq;
@@ -2395,7 +2468,7 @@ double PhyloTree::correctBranchLengthF81(double observedBran, double alpha) {
     observedBran = 1.0 - observedBran / H;
     // no gamma
     if (observedBran <= 0.0)
-        return MAX_BRANCH_LEN;
+        return params->max_branch_length;
 
     if (alpha <= 0.0) {
         correctedBranLen = -H * log(observedBran);
@@ -2405,10 +2478,10 @@ double PhyloTree::correctBranchLengthF81(double observedBran, double alpha) {
         correctedBranLen = H * alpha * (pow(observedBran, -1 / alpha) - 1);
     }
 
-    if (correctedBranLen < MIN_BRANCH_LEN)
-    	correctedBranLen = MIN_BRANCH_LEN;
-    if (correctedBranLen > MAX_BRANCH_LEN)
-    	correctedBranLen = MAX_BRANCH_LEN;
+    if (correctedBranLen < params->min_branch_length)
+    	correctedBranLen = params->min_branch_length;
+    if (correctedBranLen > params->max_branch_length)
+    	correctedBranLen = params->max_branch_length;
 
     return correctedBranLen;
 }
@@ -3004,7 +3077,7 @@ double PhyloTree::optimizeTreeLengthScaling(double &scaling, double gradient_eps
     is_opt_scaling = true;
     current_scaling = scaling;
     double negative_lh, ferror;
-    scaling = minimizeOneDimen(MIN_TREE_LENGTH_SCALE, scaling, MAX_TREE_LENGTH_SCALE, max(TOL_TREE_LENGTH_SCALE, gradient_epsilon), &negative_lh, &ferror);
+    scaling = minimizeOneDimen(min(current_scaling/2.0, MIN_TREE_LENGTH_SCALE), scaling, max(current_scaling*2.0, MAX_TREE_LENGTH_SCALE), max(TOL_TREE_LENGTH_SCALE, gradient_epsilon), &negative_lh, &ferror);
     if (scaling != current_scaling) {
         scaleLength(scaling / current_scaling);
         current_scaling = scaling;
@@ -3054,11 +3127,11 @@ void PhyloTree::optimizeOneBranch(PhyloNode *node1, PhyloNode *node2, bool clear
     theta_computed = false;
     if (optimize_by_newton) {
     	// Newton-Raphson method
-    	optx = minimizeNewton(MIN_BRANCH_LEN, current_len, MAX_BRANCH_LEN, TOL_BRANCH_LEN, negative_lh, maxNRStep);
+    	optx = minimizeNewton(params->min_branch_length, current_len, params->max_branch_length, params->min_branch_length, negative_lh, maxNRStep);
         if (verbose_mode >= VB_DEBUG) {
             cout << "minimizeNewton logl: " << computeLikelihoodFromBuffer() << endl;
         }
-    	if (optx > MAX_BRANCH_LEN*0.95) {
+    	if (optx > params->max_branch_length*0.95 && !isSuperTree()) {
     		// newton raphson diverged, reset
     	    double opt_lh = computeLikelihoodFromBuffer();
     	    current_it->length = current_len;
@@ -3070,7 +3143,7 @@ void PhyloTree::optimizeOneBranch(PhyloNode *node1, PhyloNode *node2, bool clear
     	}
 	}	else {
         // Brent method
-        optx = minimizeOneDimen(MIN_BRANCH_LEN, current_len, MAX_BRANCH_LEN, TOL_BRANCH_LEN, &negative_lh, &ferror);
+        optx = minimizeOneDimen(params->min_branch_length, current_len, params->max_branch_length, params->min_branch_length, &negative_lh, &ferror);
         if (verbose_mode >= VB_DEBUG) {
             cout << "minimizeBrent logl: " << -negative_lh << endl;
         }
@@ -3123,6 +3196,8 @@ void PhyloTree::optimizeAllBranchesLS(PhyloNode *node, PhyloNode *dad) {
 void PhyloTree::optimizeAllBranches(PhyloNode *node, PhyloNode *dad, int maxNRStep) {
 //    double tree_lh = -DBL_MAX;
 
+    if (!node) node = (PhyloNode*)root;
+
     for (NeighborVec::iterator it = (node)->neighbors.begin(); it != (node)->neighbors.end(); it++)
         if ((*it)->node != (dad)) {
             optimizeAllBranches((PhyloNode*) (*it)->node, node, maxNRStep);
@@ -3133,10 +3208,26 @@ void PhyloTree::optimizeAllBranches(PhyloNode *node, PhyloNode *dad, int maxNRSt
 //    return tree_lh;
 }
 
+void PhyloTree::computeBestTraversal(NodeVector &nodes, NodeVector &nodes2) {
+    Node *farleaf = findFarthestLeaf();
+//    Node *farleaf = root;
+
+    // double call to farthest leaf to find the longest path on the tree
+    findFarthestLeaf(farleaf);
+    if (verbose_mode >= VB_MAX)
+        cout << "Tree diameter: " << farleaf->height << endl;
+    getPreOrderBranches(nodes, nodes2, farleaf);
+}
+
 double PhyloTree::optimizeAllBranches(int my_iterations, double tolerance, int maxNRStep) {
     if (verbose_mode >= VB_MAX)
         cout << "Optimizing branch lengths (max " << my_iterations << " loops)..." << endl;
-    double tree_lh = computeLikelihood();
+    
+    NodeVector nodes, nodes2;
+    computeBestTraversal(nodes, nodes2);
+    
+    double tree_lh = computeLikelihoodBranch((PhyloNeighbor*)nodes[0]->findNeighbor(nodes2[0]), (PhyloNode*)nodes[0]);
+    
     if (verbose_mode >= VB_MAX) {
         cout << "Initial tree log-likelihood: " << tree_lh << endl;
     }
@@ -3149,7 +3240,21 @@ double PhyloTree::optimizeAllBranches(int my_iterations, double tolerance, int m
 //            printTree(cout, WT_BR_LEN+WT_NEWLINE);
 //        }
 
-    	optimizeAllBranches((PhyloNode*) root, NULL, maxNRStep);
+        for (int j = 0; j < nodes.size(); j++)
+            optimizeOneBranch((PhyloNode*)nodes[j], (PhyloNode*)nodes2[j]);
+
+//        if (i == 0) 
+//            optimizeOneBranch((PhyloNode*)nodes[0], (PhyloNode*)nodes2[0]);
+//        if (i % 2 == 0) {
+//            for (int j = 1; j < nodes.size(); j++)
+//                optimizeOneBranch((PhyloNode*)nodes[j], (PhyloNode*)nodes2[j]);
+//        } else {
+//            for (int j = nodes.size()-2; j >= 0; j--)
+//                optimizeOneBranch((PhyloNode*)nodes[j], (PhyloNode*)nodes2[j]);
+//        }
+
+//            optimizeAllBranches((PhyloNode*) root, NULL, maxNRStep);
+            
         double new_tree_lh = computeLikelihoodFromBuffer();
         //cout<<"After opt  log-lh = "<<new_tree_lh<<endl;
 
@@ -3499,10 +3604,31 @@ void PhyloTree::computeBioNJ(Params &params, Alignment *alignment, string &dist_
 //    setAlignment(alignment);
 }
 
+int PhyloTree::setNegativeBranch(bool force, double newlen, Node *node, Node *dad) {
+    if (!node) node = root;
+    int fixed = 0;
+
+    FOR_NEIGHBOR_IT(node, dad, it) {
+        if ((*it)->length < 0.0 || force) { // negative branch length detected
+            (*it)->length = newlen;
+            // set the backward branch length
+            (*it)->node->findNeighbor(node)->length = (*it)->length;
+            fixed++;
+        }
+        fixed += setNegativeBranch(force, newlen, (*it)->node, node);
+    }
+    return fixed;
+}
+
+
 int PhyloTree::fixNegativeBranch(bool force, Node *node, Node *dad) {
 
-    if (!node)
+    if (!node) {
         node = root;
+        // 2015-11-30: if not bifurcating, initialize unknown branch lengths with 0.1
+        if (!isBifurcating())
+            return setNegativeBranch(force, 0.1, root, NULL);
+    }
     int fixed = 0;
 
     FOR_NEIGHBOR_IT(node, dad, it){
@@ -3515,8 +3641,8 @@ int PhyloTree::fixNegativeBranch(bool force, Node *node, Node *dad) {
         double z = (double) aln->num_states / (aln->num_states - 1);
         double x = 1.0 - (z * branch_length);
         if (x > 0) branch_length = -log(x) / z;
-        if (branch_length < MIN_BRANCH_LEN)
-            branch_length = MIN_BRANCH_LEN;
+        if (branch_length < params->min_branch_length)
+            branch_length = params->min_branch_length;
 //        if (verbose_mode >= VB_DEBUG)
 //        	cout << "Negative branch length " << (*it)->length << " was set to ";
         //(*it)->length = fixed_length;
@@ -3529,7 +3655,7 @@ int PhyloTree::fixNegativeBranch(bool force, Node *node, Node *dad) {
         fixed++;
     }
     if ((*it)->length <= 0.0) {
-        (*it)->length = MIN_BRANCH_LEN;
+        (*it)->length = params->min_branch_length;
         (*it)->node->findNeighbor(node)->length = (*it)->length;
     }
     fixed += fixNegativeBranch(force, (*it)->node, node);
@@ -3830,9 +3956,6 @@ NNIMove PhyloTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove
 		// compute the score of the swapped topology
 //		double saved_len = node1_nei->length;
 
-		optimizeOneBranch(node1, node2, false, NNI_MAX_NR_STEP);
-		nniMoves[cnt].newLen[0] = node1->findNeighbor(node2)->length;
-
 		int i=1;
         if (params->nni5) {
 			FOR_NEIGHBOR(node1, node2, it)
@@ -3842,9 +3965,14 @@ NNIMove PhyloTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove
 				nniMoves[cnt].newLen[i] = node1->findNeighbor((*it)->node)->length;
 				i++;
 			}
+            node21_it->clearPartialLh();
+        }
+
+		optimizeOneBranch(node1, node2, false, NNI_MAX_NR_STEP);
+		nniMoves[cnt].newLen[0] = node1->findNeighbor(node2)->length;
 
-			 node21_it->clearPartialLh();
 
+        if (params->nni5) {
 			FOR_NEIGHBOR(node2, node1, it)
 			{
 				((PhyloNeighbor*) (*it)->node->findNeighbor(node2))->clearPartialLh();
@@ -3853,7 +3981,7 @@ NNIMove PhyloTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove
 				nniMoves[cnt].newLen[i] = node2->findNeighbor((*it)->node)->length;
 				i++;
 			}
-			 node12_it->clearPartialLh();
+			node12_it->clearPartialLh();
 		}
 		double score = computeLikelihoodFromBuffer();
 		nniMoves[cnt].newloglh = score;
@@ -4650,10 +4778,181 @@ void PhyloTree::resampleLh(double **pat_lh, double *lh_new) {
     }
 }
 
+/*********************************************************/
+/** THIS FUNCTION IS TAKEN FROM PHYML source code alrt.c
+* Convert an aLRT statistic to a none parametric support
+* param in: the statistic
+*/
+
+double Statistics_To_Probabilities(double in)
+{
+  double rough_value=0.0;
+  double a=0.0;
+  double b=0.0;
+  double fa=0.0;
+  double fb=0.0;
+
+  if(in>=0.000000393 && in<0.00000157)
+    {
+      a=0.000000393;
+      b=0.00000157;
+      fa=0.0005;
+      fb=0.001;
+    }
+  else if(in>=0.00000157 && in<0.0000393)
+    {
+      a=0.00000157;
+      b=0.0000393;
+      fa=0.001;
+      fb=0.005;
+    }
+  else if(in>=0.0000393 && in<0.000157)
+    {
+      a=0.0000393;
+      b=0.000157;
+      fa=0.005;
+      fb=0.01;
+    }
+  else if(in>=0.000157 && in<0.000982)
+    {
+      a=0.000157;
+      b=0.000982;
+      fa=0.01;
+      fb=0.025;
+    }
+  else if(in>0.000982 && in<0.00393)
+    {
+      a=0.000982;
+      b=0.00393;
+      fa=0.025;
+      fb=0.05;
+    }
+  else if(in>=0.00393 && in<0.0158)
+    {
+      a=0.00393;
+      b=0.0158;
+      fa=0.05;
+      fb=0.1;
+    }
+  else if(in>=0.0158 && in<0.0642)
+    {
+      a=0.0158;
+      b=0.0642;
+      fa=0.1;
+      fb=0.2;
+    }
+  else if(in>=0.0642 && in<0.148)
+    {
+      a=0.0642;
+      b=0.148;
+      fa=0.2;
+      fb=0.3;
+    }
+  else if(in>=0.148 && in<0.275)
+    {
+      a=0.148;
+      b=0.275;
+      fa=0.3;
+      fb=0.4;
+    }
+  else if(in>=0.275 && in<0.455)
+    {
+      a=0.275;
+      b=0.455;
+      fa=0.4;
+      fb=0.5;
+    }
+  else if(in>=0.455 && in<0.708)
+    {
+      a=0.455;
+      b=0.708;
+      fa=0.5;
+      fb=0.6;
+    }
+  else if(in>=0.708 && in<1.074)
+    {
+      a=0.708;
+      b=1.074;
+      fa=0.6;
+      fb=0.7;
+    }
+  else if(in>=1.074 && in<1.642)
+    {
+      a=1.074;
+      b=1.642;
+      fa=0.7;
+      fb=0.8;
+    }
+  else if(in>=1.642 && in<2.706)
+    {
+      a=1.642;
+      b=2.706;
+      fa=0.8;
+      fb=0.9;
+    }
+  else if(in>=2.706 && in<3.841)
+    {
+      a=2.706;
+      b=3.841;
+      fa=0.9;
+      fb=0.95;
+    }
+  else if(in>=3.841 && in<5.024)
+    {
+      a=3.841;
+      b=5.024;
+      fa=0.95;
+      fb=0.975;
+    }
+  else if(in>=5.024 && in<6.635)
+    {
+      a=5.024;
+      b=6.635;
+      fa=0.975;
+      fb=0.99;
+    }
+  else if(in>=6.635 && in<7.879)
+    {
+      a=6.635;
+      b=7.879;
+      fa=0.99;
+      fb=0.995;
+    }
+  else if(in>=7.879 && in<10.828)
+    {
+      a=7.879;
+      b=10.828;
+      fa=0.995;
+      fb=0.999;
+    }
+  else if(in>=10.828 && in<12.116)
+    {
+      a=10.828;
+      b=12.116;
+      fa=0.999;
+      fb=0.9995;
+    }
+  if (in>=12.116)
+    {
+      rough_value=0.9999;
+    }
+  else if(in<0.000000393)
+    {
+      rough_value=0.0001;
+    }
+  else
+    {
+      rough_value=(b-in)/(b-a)*fa + (in - a)/(b-a)*fb;
+    }
+  rough_value=rough_value+(1.0-rough_value)/2.0;
+  rough_value=rough_value*rough_value*rough_value;
+  return rough_value;
+}
+
 // Implementation of testBranch follows Guindon et al. (2010)
 
-double PhyloTree::testOneBranch(double best_score, double *pattern_lh, int reps, int lbp_reps, PhyloNode *node1,
-        PhyloNode *node2, double &lbp_support) {
+double PhyloTree::testOneBranch(double best_score, double *pattern_lh, int reps, int lbp_reps,
+        PhyloNode *node1, PhyloNode *node2, double &lbp_support, double &aLRT_support, double &aBayes_support) {
     const int NUM_NNI = 3;
     double lh[NUM_NNI];
     double *pat_lh[NUM_NNI];
@@ -4668,7 +4967,16 @@ double PhyloTree::testOneBranch(double best_score, double *pattern_lh, int reps,
     else
         aLRT = (lh[0] - lh[2]);
 
-    int support = 0;
+    // compute parametric aLRT test support
+    double aLRT_stat = 2*aLRT;
+    aLRT_support = 0.0;
+    if (aLRT_stat >= 0) {
+        aLRT_support = Statistics_To_Probabilities(aLRT_stat);
+    }
+
+    aBayes_support = 1.0 / (1.0 + exp(lh[1]-lh[0]) + exp(lh[2]-lh[0]));
+
+    int SH_aLRT_support = 0;
 
     lbp_support = 0.0;
     int times = max(reps, lbp_reps);
@@ -4703,16 +5011,21 @@ double PhyloTree::testOneBranch(double best_score, double *pattern_lh, int reps,
                 cs_2nd_best = cs[1];
         }
         if (aLRT > (cs_best - cs_2nd_best) + 0.05)
-            support++;
+            SH_aLRT_support++;
     }
     delete[] pat_lh[2];
     delete[] pat_lh[1];
-    lbp_support /= times;
+    
+    if (times > 0)
+        lbp_support /= times;
 
-    return ((double) support) / times;
+    if (times > 0)
+        return ((double) SH_aLRT_support) / times;
+    else
+        return 0.0;
 }
 
-int PhyloTree::testAllBranches(int threshold, double best_score, double *pattern_lh, int reps, int lbp_reps,
+int PhyloTree::testAllBranches(int threshold, double best_score, double *pattern_lh, int reps, int lbp_reps, bool aLRT_test, bool aBayes_test,
         PhyloNode *node, PhyloNode *dad) {
     int num_low_support = 0;
     if (!node) {
@@ -4729,19 +5042,26 @@ int PhyloTree::testAllBranches(int threshold, double best_score, double *pattern
         }
     }
     if (dad && !node->isLeaf() && !dad->isLeaf()) {
-        double lbp_support;
-        int support = round(testOneBranch(best_score, pattern_lh, reps, lbp_reps, node, dad, lbp_support) * 100);
-        node->name = convertIntToString(support);
+        double lbp_support, aLRT_support, aBayes_support;
+        double SH_aLRT_support = (testOneBranch(best_score, pattern_lh, reps, lbp_reps,
+            node, dad, lbp_support, aLRT_support, aBayes_support) * 100);
+        if (reps)
+            node->name = convertDoubleToString(SH_aLRT_support);
         if (lbp_reps)
-            node->name += "/" + convertIntToString(round(lbp_support * 100));
-        if (support < threshold)
+            node->name += "/" + convertDoubleToString(lbp_support * 100);
+        if (aLRT_test)
+            node->name += "/" + convertDoubleToString(aLRT_support);
+        if (aBayes_test)
+            node->name += "/" + convertDoubleToString(aBayes_support);
+        if (SH_aLRT_support < threshold)
             num_low_support = 1;
         if (((PhyloNeighbor*) node->findNeighbor(dad))->partial_pars) {
-			((PhyloNeighbor*) node->findNeighbor(dad))->partial_pars[0] = support;
-			((PhyloNeighbor*) dad->findNeighbor(node))->partial_pars[0] = support;
+			((PhyloNeighbor*) node->findNeighbor(dad))->partial_pars[0] = round(SH_aLRT_support);
+			((PhyloNeighbor*) dad->findNeighbor(node))->partial_pars[0] = round(SH_aLRT_support);
         }
     }
-    FOR_NEIGHBOR_IT(node, dad, it)num_low_support += testAllBranches(threshold, best_score, pattern_lh, reps, lbp_reps, (PhyloNode*) (*it)->node, node);
+    FOR_NEIGHBOR_IT(node, dad, it)
+        num_low_support += testAllBranches(threshold, best_score, pattern_lh, reps, lbp_reps, aLRT_test, aBayes_test, (PhyloNode*) (*it)->node, node);
 
     return num_low_support;
 }
@@ -4779,7 +5099,7 @@ void PhyloTree::reinsertLeaf(Node *leaf, Node *node, Node *dad) {
     Node *adjacent_node = leaf->neighbors[0]->node;
     Neighbor *nei = node->findNeighbor(dad);
     //double len = nei->length;
-    double len = max(nei->length, MIN_BRANCH_LEN * 2);
+    double len = max(nei->length, params->min_branch_length * 2);
     // to avoid too small branch length when reinserting leaf
 
     FOR_NEIGHBOR_IT(adjacent_node, leaf, it){
diff --git a/phylotree.h b/phylotree.h
index fbdf0b0..ddf7f28 100644
--- a/phylotree.h
+++ b/phylotree.h
@@ -22,7 +22,7 @@
 
 //#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE (512*256)
 //#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE (8*512*512)
-#include "Eigen/Core"
+#include <Eigen/Core>
 #include "mtree.h"
 #include "alignment.h"
 #include "model/modelsubst.h"
@@ -39,8 +39,6 @@
 extern int instruction_set;
 
 
-const double MIN_BRANCH_LEN = 0.000001; // NEVER TOUCH THIS CONSTANT AGAIN PLEASE!
-const double MAX_BRANCH_LEN = 100.0;
 const double TOL_BRANCH_LEN = 0.000001; // NEVER TOUCH THIS CONSTANT AGAIN PLEASE!
 const double TOL_LIKELIHOOD = 0.001; // NEVER TOUCH THIS CONSTANT AGAIN PLEASE!
 const double TOL_LIKELIHOOD_PARAMOPT = 0.001; // BQM: newly introduced for ModelFactory::optimizeParameters
@@ -94,7 +92,11 @@ inline T *aligned_alloc(size_t size) {
     void *mem;
 
 #if defined WIN32 || defined _WIN32 || defined __WIN32__
-	mem = _aligned_malloc(size*sizeof(T), MEM_ALIGNMENT);
+    #if (defined(__MINGW32__) || defined(__clang__)) && defined(BINARY32)
+        mem = __mingw_aligned_malloc(size*sizeof(T), MEM_ALIGNMENT);
+    #else
+        mem = _aligned_malloc(size*sizeof(T), MEM_ALIGNMENT);
+    #endif
 #else
 	int res = posix_memalign(&mem, MEM_ALIGNMENT, size*sizeof(T));
     if (res == ENOMEM) {
@@ -115,7 +117,11 @@ inline T *aligned_alloc(size_t size) {
 
 inline void aligned_free(void *mem) {
 #if defined WIN32 || defined _WIN32 || defined __WIN32__
-	_aligned_free(mem);
+    #if (defined(__MINGW32__) || defined(__clang__)) && defined(BINARY32)
+        __mingw_aligned_free(mem);
+    #else
+        _aligned_free(mem);
+    #endif
 #else
 	free(mem);
 #endif
@@ -256,7 +262,7 @@ public:
      */
     PhyloTree();
 
-    EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+//    EIGEN_MAKE_ALIGNED_OPERATOR_NEW
 
     /**
      * Constructor with given alignment
@@ -723,6 +729,17 @@ public:
     virtual double computeLikelihood(double *pattern_lh = NULL);
 
     /**
+     * @return number of elements per site lhl entry, used in conjunction with computePatternLhCat
+     */
+    int getNumLhCat(SiteLoglType wsl);
+
+    /**
+     * compute _pattern_lh_cat for site-likelihood per category
+     * @return tree log-likelihood
+     */
+    virtual double computePatternLhCat(SiteLoglType wsl);
+
+    /**
             compute pattern likelihoods only if the accumulated scaling factor is non-zero.
             Otherwise, copy the pattern_lh attribute
             @param pattern_lh (OUT) pattern log-likelihoods,
@@ -731,7 +748,7 @@ public:
             @param pattern_lh_cat (OUT) if not NULL, store all pattern-likelihood per category
      */
     virtual void computePatternLikelihood(double *pattern_lh, double *cur_logl = NULL,
-    		double *pattern_lh_cat = NULL);
+    		double *pattern_lh_cat = NULL, SiteLoglType wsl = WSL_RATECAT);
 
     vector<uint64_t> ptn_cat_mask;
 
@@ -1006,6 +1023,8 @@ public:
      */
     void optimizeAllBranchesLS(PhyloNode *node = NULL, PhyloNode *dad = NULL);
 
+    void computeBestTraversal(NodeVector &nodes, NodeVector &nodes2);
+
     /**
             optimize all branch lengths of the tree
             @param iterations number of iterations to loop through all branches
@@ -1244,6 +1263,11 @@ public:
      */
     virtual int fixNegativeBranch(bool force = false, Node *node = NULL, Node *dad = NULL);
 
+    /**
+        set negative branch to a new len
+    */
+    int setNegativeBranch(bool force, double newlen, Node *node = NULL, Node *dad = NULL);
+
     // OBSOLETE: assignRandomBranchLengths no longer needed, use fixNegativeBranch instead!
 //    int assignRandomBranchLengths(bool force = false, Node *node = NULL, Node *dad = NULL);
 
@@ -1325,15 +1349,16 @@ public:
     /**
             Test one branch of the tree with aLRT SH-like interpretation
      */
-    double testOneBranch(
-            double best_score, double *pattern_lh, int reps, int lbp_reps,
-            PhyloNode *node1, PhyloNode *node2, double &lbp_support);
+    double testOneBranch(double best_score, double *pattern_lh, 
+            int reps, int lbp_reps,
+            PhyloNode *node1, PhyloNode *node2, 
+            double &lbp_support, double &aLRT_support, double &aBayes_support);
 
     /**
             Test all branches of the tree with aLRT SH-like interpretation
      */
-    int testAllBranches(int threshold,
-            double best_score, double *pattern_lh, int reps, int lbp_reps,
+    int testAllBranches(int threshold, double best_score, double *pattern_lh, 
+            int reps, int lbp_reps, bool aLRT_test, bool aBayes_test,
             PhyloNode *node = NULL, PhyloNode *dad = NULL);
 
     /****************************************************************************
@@ -1458,6 +1483,8 @@ public:
 	/** sequence that are identical to one of the removed sequences */
 	StrVector twin_seqs;
 
+	size_t num_partial_lh_computations;
+
 	/** remove identical sequences from the tree */
     virtual void removeIdenticalSeqs(Params &params);
 
diff --git a/phylotreepars.cpp b/phylotreepars.cpp
index 2be1e90..57b3ae4 100644
--- a/phylotreepars.cpp
+++ b/phylotreepars.cpp
@@ -396,7 +396,9 @@ int PhyloTree::computeParsimonyTree(const char *out_prefix, Alignment *alignment
         ((PhyloNeighbor*) new_taxon->findNeighbor(added_node))->partial_lh_computed |= 2;
         ((PhyloNeighbor*) new_taxon->findNeighbor(added_node))->partial_pars = new_taxon_partial_pars;
 
+        ((PhyloNeighbor*)target_dad->findNeighbor(added_node))->clearPartialLh();
         ((PhyloNeighbor*)target_dad->findNeighbor(added_node))->partial_pars = central_partial_pars + ((index++) * pars_block_size);
+        ((PhyloNeighbor*)target_node->findNeighbor(added_node))->clearPartialLh();
         ((PhyloNeighbor*)target_node->findNeighbor(added_node))->partial_pars = central_partial_pars + ((index++) * pars_block_size);
 
         target_dad->clearReversePartialLh(added_node);
diff --git a/phylotreesse.cpp b/phylotreesse.cpp
index 6a1bbb3..8447044 100644
--- a/phylotreesse.cpp
+++ b/phylotreesse.cpp
@@ -819,9 +819,13 @@ void PhyloTree::computeLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode
 				double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
 				double *theta = theta_all + ptn*block;
 				double *lh_tip = tip_partial_lh + ((int)((ptn < orig_nptn) ? (aln->at(ptn))[dad->id] :  model_factory->unobserved_ptns[ptn-orig_nptn]))*nstates;
-				for (i = 0; i < block; i++) {
-					theta[i] = lh_tip[i%nstates] * partial_lh_dad[i];
-				}
+                for (c = 0; c < ncat; c++) {
+                    for (i = 0; i < nstates; i++) {
+                        theta[i] = lh_tip[i] * partial_lh_dad[i];
+                    }
+                    partial_lh_dad += nstates;
+                    theta += nstates;
+                }
 
 			}
 			// ascertainment bias correction
@@ -836,20 +840,9 @@ void PhyloTree::computeLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode
 				double *theta = theta_all + ptn*block;
 			    double *partial_lh_node = node_branch->partial_lh + ptn*block;
 			    double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
-//			    double theta_max = 0.0;
 	    		for (i = 0; i < block; i++) {
 	    			theta[i] = partial_lh_node[i] * partial_lh_dad[i];
-//	    			theta_max = max(theta_max, fabs(theta[i]));
 	    		}
-//	    		if (theta_max <= 0) {
-//	    			// numerical underflow, recompute theta
-//	    			for (i = 0; i < block; i++) {
-//	    				partial_lh_node[i] *= SCALING_THRESHOLD_INVER;
-//		    			theta[i] = partial_lh_node[i] * partial_lh_dad[i];
-//	    			}
-//	    			node_branch->lh_scale_factor += LOG_SCALING_THRESHOLD*ptn_freq[ptn];
-//	    			node_branch->scale_num[ptn] += 1;
-//	    		}
 			}
 	    }
 		theta_computed = true;
@@ -1079,7 +1072,14 @@ double PhyloTree::computeLikelihoodBranchEigen(PhyloNeighbor *dad_branch, PhyloN
 
     if (orig_nptn < nptn) {
     	// ascertainment bias correction
-        assert(prob_const < 1.0);
+        assert(prob_const < 1.0 && prob_const >= 0.0);
+
+        // BQM 2015-10-11: fix this those functions using _pattern_lh_cat
+//        double inv_const = 1.0 / (1.0-prob_const);
+//        size_t nptn_cat = orig_nptn*ncat;
+//    	for (ptn = 0; ptn < nptn_cat; ptn++)
+//            _pattern_lh_cat[ptn] *= inv_const;
+        
     	prob_const = log(1.0 - prob_const);
     	for (ptn = 0; ptn < orig_nptn; ptn++)
     		_pattern_lh[ptn] -= prob_const;
@@ -2605,6 +2605,7 @@ double PhyloTree::computeMixtureLikelihoodBranchEigen(PhyloNeighbor *dad_branch,
     size_t block = ncat * nstates * nmixture;
     size_t statemix = nstates * nmixture;
     size_t cat_states = ncat * nstates;
+    size_t catmix = ncat * nmixture;
     size_t ptn; // for big data size > 4GB memory required
     size_t c, i, m;
     size_t orig_nptn = aln->size();
@@ -2622,8 +2623,8 @@ double PhyloTree::computeMixtureLikelihoodBranchEigen(PhyloNeighbor *dad_branch,
 	}
 
 	double prob_const = 0.0;
-    // 2018-08-14: _pattern_lh_cat now only stores mixture likelihoods
-	memset(_pattern_lh_cat, 0, nptn*nmixture*sizeof(double));
+    // 2015-11-30: _pattern_lh_cat now stores mixture and cat likelihoods
+	memset(_pattern_lh_cat, 0, nptn*catmix*sizeof(double));
 
     if (dad->isLeaf()) {
     	// special treatment for TIP-INTERNAL NODE case
@@ -2652,26 +2653,26 @@ double PhyloTree::computeMixtureLikelihoodBranchEigen(PhyloNeighbor *dad_branch,
 #endif
     	for (ptn = 0; ptn < nptn; ptn++) {
 			double lh_ptn = ptn_invar[ptn];
-			double *lh_cat = _pattern_lh_cat + ptn*nmixture;
+			double *lh_cat = _pattern_lh_cat + ptn*catmix;
 			double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
 			int state_dad = (ptn < orig_nptn) ? (aln->at(ptn))[dad->id] : model_factory->unobserved_ptns[ptn-orig_nptn];
 			double *lh_node = partial_lh_node + state_dad*block;
 			for (m = 0; m < nmixture; m++) {
-                for (i = 0; i < cat_states; i++)
-                    *lh_cat += lh_node[i] * partial_lh_dad[i];
-                lh_ptn += *lh_cat;
-                lh_node += cat_states;
-                partial_lh_dad += cat_states;
-                lh_cat++;
-//				for (c = 0; c < ncat; c++) {
-//					for (i = 0; i < nstates; i++) {
-//						*lh_cat += lh_node[i] * partial_lh_dad[i];
-//					}
-//					lh_node += nstates;
-//					partial_lh_dad += nstates;
-//                    lh_ptn += *lh_cat;
-//					lh_cat++;
-//				}
+//                for (i = 0; i < cat_states; i++)
+//                    *lh_cat += lh_node[i] * partial_lh_dad[i];
+//                lh_ptn += *lh_cat;
+//                lh_node += cat_states;
+//                partial_lh_dad += cat_states;
+//                lh_cat++;
+				for (c = 0; c < ncat; c++) {
+					for (i = 0; i < nstates; i++) {
+						*lh_cat += lh_node[i] * partial_lh_dad[i];
+					}
+					lh_node += nstates;
+					partial_lh_dad += nstates;
+                    lh_ptn += *lh_cat;
+					lh_cat++;
+				}
                 
 			}
 //			assert(lh_ptn > 0.0);
@@ -2691,28 +2692,28 @@ double PhyloTree::computeMixtureLikelihoodBranchEigen(PhyloNeighbor *dad_branch,
 #endif
     	for (ptn = 0; ptn < nptn; ptn++) {
 			double lh_ptn = ptn_invar[ptn];
-			double *lh_cat = _pattern_lh_cat + ptn*nmixture;
+			double *lh_cat = _pattern_lh_cat + ptn*catmix;
 			double *partial_lh_dad = dad_branch->partial_lh + ptn*block;
 			double *partial_lh_node = node_branch->partial_lh + ptn*block;
 			double *val_tmp = val;
 			for (m = 0; m < nmixture; m++) {
-                for (i = 0; i < cat_states; i++)
-                    *lh_cat += val_tmp[i] * partial_lh_node[i] * partial_lh_dad[i];
-                lh_ptn += *lh_cat;
-                partial_lh_dad += cat_states;
-                partial_lh_node += cat_states;
-                val_tmp += cat_states;
-                lh_cat++;
-//				for (c = 0; c < ncat; c++) {
-//					for (i = 0; i < nstates; i++) {
-//						*lh_cat +=  val_tmp[i] * partial_lh_node[i] * partial_lh_dad[i];
-//					}
-//					lh_ptn += *lh_cat;
-//					partial_lh_node += nstates;
-//					partial_lh_dad += nstates;
-//					val_tmp += nstates;
-//					lh_cat++;
-//				}
+//                for (i = 0; i < cat_states; i++)
+//                    *lh_cat += val_tmp[i] * partial_lh_node[i] * partial_lh_dad[i];
+//                lh_ptn += *lh_cat;
+//                partial_lh_dad += cat_states;
+//                partial_lh_node += cat_states;
+//                val_tmp += cat_states;
+//                lh_cat++;
+				for (c = 0; c < ncat; c++) {
+					for (i = 0; i < nstates; i++) {
+						*lh_cat +=  val_tmp[i] * partial_lh_node[i] * partial_lh_dad[i];
+					}
+					lh_ptn += *lh_cat;
+					partial_lh_node += nstates;
+					partial_lh_dad += nstates;
+					val_tmp += nstates;
+					lh_cat++;
+				}
 			}
 
 			assert(lh_ptn > 0.0);
diff --git a/pllnni.cpp b/pllnni.cpp
index 16ac6bc..9c71a91 100755
--- a/pllnni.cpp
+++ b/pllnni.cpp
@@ -828,9 +828,9 @@ void pllSaveCurrentTree(pllInstance* tr, partitionList *pr, nodeptr p){
 
     pllBoolean is_stored = PLL_FALSE;
 
-    if(globalParam->store_candidate_trees){
-        is_stored = pllHashSearch(pllUFBootDataPtr->treels, tree_str, &(item_ptr->data));
-    }
+//    if(globalParam->store_candidate_trees){
+//        is_stored = pllHashSearch(pllUFBootDataPtr->treels, tree_str, &(item_ptr->data));
+//    }
 
     if(is_stored){ // if found the tree_str
         pllUFBootDataPtr->duplication_counter++;
@@ -849,13 +849,13 @@ void pllSaveCurrentTree(pllInstance* tr, partitionList *pr, nodeptr p){
             printf("Updated logl %f to %f\n", pllUFBootDataPtr->treels_logl[tree_index], cur_logl);
         pllUFBootDataPtr->treels_logl[tree_index] = cur_logl;
 
-        if (pllUFBootDataPtr->save_all_br_lens) {
-            pllTree2StringREC(tr->tree_string, tr, pr, tr->start->back, PLL_TRUE,
-                    PLL_FALSE, PLL_FALSE, PLL_FALSE, PLL_TRUE, PLL_SUMMARIZE_LENGTH, PLL_FALSE, PLL_FALSE);
-            char * tree_str_br_lens = (char *) malloc (strlen(tr->tree_string) + 1);
-            strcpy(tree_str_br_lens, tr->tree_string);
-            pllUFBootDataPtr->treels_newick[tree_index] = tree_str_br_lens;
-        }
+//        if (pllUFBootDataPtr->save_all_br_lens) {
+//            pllTree2StringREC(tr->tree_string, tr, pr, tr->start->back, PLL_TRUE,
+//                    PLL_FALSE, PLL_FALSE, PLL_FALSE, PLL_TRUE, PLL_SUMMARIZE_LENGTH, PLL_FALSE, PLL_FALSE);
+//            char * tree_str_br_lens = (char *) malloc (strlen(tr->tree_string) + 1);
+//            strcpy(tree_str_br_lens, tr->tree_string);
+//            pllUFBootDataPtr->treels_newick[tree_index] = tree_str_br_lens;
+//        }
         if (pllUFBootDataPtr->boot_samples == NULL) {
             (pllUFBootDataPtr->treels_ptnlh)[tree_index] =
                     (double *) malloc(pllUFBootDataPtr->n_patterns * sizeof(double));
@@ -881,11 +881,11 @@ void pllSaveCurrentTree(pllInstance* tr, partitionList *pr, nodeptr p){
 
 		tree_index = pllUFBootDataPtr->candidate_trees_count;
 		pllUFBootDataPtr->candidate_trees_count++;
-		if (globalParam->store_candidate_trees){
-			*((int *)item_ptr->data) = tree_index;
-			item_ptr->str = tree_str;
-			pllHashAdd(pllUFBootDataPtr->treels, pllHashString(tree_str, pllUFBootDataPtr->treels->size), tree_str, item_ptr->data);
-		}
+//		if (globalParam->store_candidate_trees){
+//			*((int *)item_ptr->data) = tree_index;
+//			item_ptr->str = tree_str;
+//			pllHashAdd(pllUFBootDataPtr->treels, pllHashString(tree_str, pllUFBootDataPtr->treels->size), tree_str, item_ptr->data);
+//		}
 		pllUFBootDataPtr->treels_logl[tree_index] = cur_logl;
 
 		if (verbose_mode >= VB_MAX)
@@ -915,7 +915,8 @@ void pllSaveCurrentTree(pllInstance* tr, partitionList *pr, nodeptr p){
             if (rell > pllUFBootDataPtr->boot_logl[sample] + globalParam->ufboot_epsilon ||
                 (rell > pllUFBootDataPtr->boot_logl[sample] - globalParam->ufboot_epsilon &&
                     random_double() <= 1.0/(pllUFBootDataPtr->boot_counts[sample]+1))) {
-                if (!globalParam->store_candidate_trees){
+//                if (!globalParam->store_candidate_trees)
+                {
                     is_stored = pllHashSearch(pllUFBootDataPtr->treels, tree_str, &(item_ptr->data));
                     if(is_stored)
                         tree_index = *((int *)item_ptr->data);
@@ -941,13 +942,13 @@ void pllSaveCurrentTree(pllInstance* tr, partitionList *pr, nodeptr p){
 /*        if (updated && verbose_mode >= VB_MAX)
          printf("%d boot trees updated\n", updated);*/
     }
-    if (pllUFBootDataPtr->save_all_br_lens) {
-        pllTree2StringREC(tr->tree_string, tr, pr, tr->start->back, PLL_TRUE,
-                PLL_FALSE, PLL_FALSE, PLL_FALSE, PLL_TRUE, PLL_SUMMARIZE_LH, PLL_FALSE, PLL_FALSE);
-        char * s = (char *) malloc (strlen(tr->tree_string) + 1);
-        strcpy(s, tr->tree_string);
-        pllUFBootDataPtr->treels_newick[tree_index] = s;
-    }
+//    if (pllUFBootDataPtr->save_all_br_lens) {
+//        pllTree2StringREC(tr->tree_string, tr, pr, tr->start->back, PLL_TRUE,
+//                PLL_FALSE, PLL_FALSE, PLL_FALSE, PLL_TRUE, PLL_SUMMARIZE_LH, PLL_FALSE, PLL_FALSE);
+//        char * s = (char *) malloc (strlen(tr->tree_string) + 1);
+//        strcpy(s, tr->tree_string);
+//        pllUFBootDataPtr->treels_newick[tree_index] = s;
+//    }
 
 //    if(!globalParam->store_candidate_trees){
 //        free(tree_str);
@@ -1000,13 +1001,13 @@ void pllResizeUFBootData(){
     free(pllUFBootDataPtr->treels_logl);
     pllUFBootDataPtr->treels_logl = rtreels_logl;
 
-    char ** rtreels_newick =
-            (char **) malloc(2 * count * (sizeof(char *)));
-    if(!rtreels_newick) outError("Not enough dynamic memory!");
-    memset(rtreels_newick, 0, 2 * count * sizeof(char *));
-    memcpy(rtreels_newick, pllUFBootDataPtr->treels_newick, count * sizeof(char *));
-    free(pllUFBootDataPtr->treels_newick);
-    pllUFBootDataPtr->treels_newick = rtreels_newick;
+//    char ** rtreels_newick =
+//            (char **) malloc(2 * count * (sizeof(char *)));
+//    if(!rtreels_newick) outError("Not enough dynamic memory!");
+//    memset(rtreels_newick, 0, 2 * count * sizeof(char *));
+//    memcpy(rtreels_newick, pllUFBootDataPtr->treels_newick, count * sizeof(char *));
+//    free(pllUFBootDataPtr->treels_newick);
+//    pllUFBootDataPtr->treels_newick = rtreels_newick;
 
     double ** rtreels_ptnlh =
         (double **) malloc(2 * count * (sizeof(double *)));
diff --git a/pllnni.h b/pllnni.h
index a0597fe..35edd6c 100644
--- a/pllnni.h
+++ b/pllnni.h
@@ -200,19 +200,19 @@ typedef struct{
     int max_candidate_trees;
     int treels_size;
     int save_all_trees;
-    pllBoolean save_all_br_lens;
+//    pllBoolean save_all_br_lens;
     double logl_cutoff;
     int duplication_counter;
     int n_patterns;
     struct pllHashTable * treels;
     unsigned int candidate_trees_count; /* counter of trees in pllHashTable */
     double * treels_logl; // maintain size == treels_size
-    char ** treels_newick; // maintain size == treels_size
+//    char ** treels_newick; // maintain size == treels_size
     double ** treels_ptnlh; // maintain size == treels_size
     int ** boot_samples;
     double * boot_logl;
     int * boot_counts;
-    int * boot_trees;
+    StrVector boot_trees;
 } pllUFBootData;
 
 /**
diff --git a/pllrepo/AUTHORS b/pllrepo/AUTHORS
deleted file mode 100644
index e69de29..0000000
diff --git a/pllrepo/COPYING b/pllrepo/COPYING
deleted file mode 100644
index 94a9ed0..0000000
--- a/pllrepo/COPYING
+++ /dev/null
@@ -1,674 +0,0 @@
-                    GNU GENERAL PUBLIC LICENSE
-                       Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-                            Preamble
-
-  The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
-  The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works.  By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users.  We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors.  You can apply it to
-your programs, too.
-
-  When we speak of free software, we are referring to freedom, not
-price.  Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
-  To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights.  Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
-  For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received.  You must make sure that they, too, receive
-or can get the source code.  And you must show them these terms so they
-know their rights.
-
-  Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
-  For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software.  For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
-  Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so.  This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software.  The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable.  Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products.  If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
-  Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary.  To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.
-
-                       TERMS AND CONDITIONS
-
-  0. Definitions.
-
-  "This License" refers to version 3 of the GNU General Public License.
-
-  "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
-  "The Program" refers to any copyrightable work licensed under this
-License.  Each licensee is addressed as "you".  "Licensees" and
-"recipients" may be individuals or organizations.
-
-  To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy.  The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
-  A "covered work" means either the unmodified Program or a work based
-on the Program.
-
-  To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy.  Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
-  To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies.  Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
-  An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License.  If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
-  1. Source Code.
-
-  The "source code" for a work means the preferred form of the work
-for making modifications to it.  "Object code" means any non-source
-form of a work.
-
-  A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
-  The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form.  A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
-  The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities.  However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work.  For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
-  The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
-  The Corresponding Source for a work in source code form is that
-same work.
-
-  2. Basic Permissions.
-
-  All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met.  This License explicitly affirms your unlimited
-permission to run the unmodified Program.  The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work.  This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
-  You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force.  You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright.  Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
-  Conveying under any other circumstances is permitted solely under
-the conditions stated below.  Sublicensing is not allowed; section 10
-makes it unnecessary.
-
-  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
-  No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
-  When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
-  4. Conveying Verbatim Copies.
-
-  You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
-  You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
-  5. Conveying Modified Source Versions.
-
-  You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
-    a) The work must carry prominent notices stating that you modified
-    it, and giving a relevant date.
-
-    b) The work must carry prominent notices stating that it is
-    released under this License and any conditions added under section
-    7.  This requirement modifies the requirement in section 4 to
-    "keep intact all notices".
-
-    c) You must license the entire work, as a whole, under this
-    License to anyone who comes into possession of a copy.  This
-    License will therefore apply, along with any applicable section 7
-    additional terms, to the whole of the work, and all its parts,
-    regardless of how they are packaged.  This License gives no
-    permission to license the work in any other way, but it does not
-    invalidate such permission if you have separately received it.
-
-    d) If the work has interactive user interfaces, each must display
-    Appropriate Legal Notices; however, if the Program has interactive
-    interfaces that do not display Appropriate Legal Notices, your
-    work need not make them do so.
-
-  A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit.  Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
-  6. Conveying Non-Source Forms.
-
-  You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
-    a) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by the
-    Corresponding Source fixed on a durable physical medium
-    customarily used for software interchange.
-
-    b) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by a
-    written offer, valid for at least three years and valid for as
-    long as you offer spare parts or customer support for that product
-    model, to give anyone who possesses the object code either (1) a
-    copy of the Corresponding Source for all the software in the
-    product that is covered by this License, on a durable physical
-    medium customarily used for software interchange, for a price no
-    more than your reasonable cost of physically performing this
-    conveying of source, or (2) access to copy the
-    Corresponding Source from a network server at no charge.
-
-    c) Convey individual copies of the object code with a copy of the
-    written offer to provide the Corresponding Source.  This
-    alternative is allowed only occasionally and noncommercially, and
-    only if you received the object code with such an offer, in accord
-    with subsection 6b.
-
-    d) Convey the object code by offering access from a designated
-    place (gratis or for a charge), and offer equivalent access to the
-    Corresponding Source in the same way through the same place at no
-    further charge.  You need not require recipients to copy the
-    Corresponding Source along with the object code.  If the place to
-    copy the object code is a network server, the Corresponding Source
-    may be on a different server (operated by you or a third party)
-    that supports equivalent copying facilities, provided you maintain
-    clear directions next to the object code saying where to find the
-    Corresponding Source.  Regardless of what server hosts the
-    Corresponding Source, you remain obligated to ensure that it is
-    available for as long as needed to satisfy these requirements.
-
-    e) Convey the object code using peer-to-peer transmission, provided
-    you inform other peers where the object code and Corresponding
-    Source of the work are being offered to the general public at no
-    charge under subsection 6d.
-
-  A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
-  A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling.  In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage.  For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product.  A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
-  "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source.  The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
-  If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information.  But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
-  The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed.  Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
-  Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
-  7. Additional Terms.
-
-  "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law.  If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
-  When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it.  (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.)  You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
-  Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
-    a) Disclaiming warranty or limiting liability differently from the
-    terms of sections 15 and 16 of this License; or
-
-    b) Requiring preservation of specified reasonable legal notices or
-    author attributions in that material or in the Appropriate Legal
-    Notices displayed by works containing it; or
-
-    c) Prohibiting misrepresentation of the origin of that material, or
-    requiring that modified versions of such material be marked in
-    reasonable ways as different from the original version; or
-
-    d) Limiting the use for publicity purposes of names of licensors or
-    authors of the material; or
-
-    e) Declining to grant rights under trademark law for use of some
-    trade names, trademarks, or service marks; or
-
-    f) Requiring indemnification of licensors and authors of that
-    material by anyone who conveys the material (or modified versions of
-    it) with contractual assumptions of liability to the recipient, for
-    any liability that these contractual assumptions directly impose on
-    those licensors and authors.
-
-  All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10.  If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term.  If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
-  If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
-  Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
-  8. Termination.
-
-  You may not propagate or modify a covered work except as expressly
-provided under this License.  Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
-  However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
-  Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
-  Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License.  If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
-  9. Acceptance Not Required for Having Copies.
-
-  You are not required to accept this License in order to receive or
-run a copy of the Program.  Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance.  However,
-nothing other than this License grants you permission to propagate or
-modify any covered work.  These actions infringe copyright if you do
-not accept this License.  Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
-  10. Automatic Licensing of Downstream Recipients.
-
-  Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License.  You are not responsible
-for enforcing compliance by third parties with this License.
-
-  An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations.  If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
-  You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License.  For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
-  11. Patents.
-
-  A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based.  The
-work thus licensed is called the contributor's "contributor version".
-
-  A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version.  For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
-  Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
-  In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement).  To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
-  If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients.  "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
-  If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
-  A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License.  You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
-  Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
-  12. No Surrender of Others' Freedom.
-
-  If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all.  For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
-  13. Use with the GNU Affero General Public License.
-
-  Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work.  The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
-  14. Revised Versions of this License.
-
-  The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time.  Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-  Each version is given a distinguishing version number.  If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation.  If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
-  If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
-  Later license versions may give you additional or different
-permissions.  However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
-  15. Disclaimer of Warranty.
-
-  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-  16. Limitation of Liability.
-
-  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
-  17. Interpretation of Sections 15 and 16.
-
-  If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
-                     END OF TERMS AND CONDITIONS
-
-            How to Apply These Terms to Your New Programs
-
-  If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-  To do so, attach the following notices to the program.  It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-    <one line to give the program's name and a brief idea of what it does.>
-    Copyright (C) <year>  <name of author>
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
-  If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
-    <program>  Copyright (C) <year>  <name of author>
-    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-    This is free software, and you are welcome to redistribute it
-    under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License.  Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
-  You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
-  The GNU General Public License does not permit incorporating your program
-into proprietary programs.  If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library.  If this is what you want to do, use the GNU Lesser General
-Public License instead of this License.  But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/pllrepo/ChangeLog b/pllrepo/ChangeLog
deleted file mode 100644
index e69de29..0000000
diff --git a/pllrepo/Doxyfile b/pllrepo/Doxyfile
deleted file mode 100644
index af84260..0000000
--- a/pllrepo/Doxyfile
+++ /dev/null
@@ -1,2299 +0,0 @@
-# Doxyfile 1.8.5
-
-# This file describes the settings to be used by the documentation system
-# doxygen (www.doxygen.org) for a project.
-#
-# All text after a double hash (##) is considered a comment and is placed in
-# front of the TAG it is preceding.
-#
-# All text after a single hash (#) is considered a comment and will be ignored.
-# The format is:
-# TAG = value [value, ...]
-# For lists, items can also be appended using:
-# TAG += value [value, ...]
-# Values that contain spaces should be placed between quotes (\" \").
-
-#---------------------------------------------------------------------------
-# Project related configuration options
-#---------------------------------------------------------------------------
-
-# This tag specifies the encoding used for all characters in the config file
-# that follow. The default is UTF-8 which is also the encoding used for all text
-# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
-# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv
-# for the list of possible encodings.
-# The default value is: UTF-8.
-
-DOXYFILE_ENCODING      = UTF-8
-
-# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
-# double-quotes, unless you are using Doxywizard) that should identify the
-# project for which the documentation is generated. This name is used in the
-# title of most generated pages and in a few other places.
-# The default value is: My Project.
-
-PROJECT_NAME           = "Phylogenetic Likelihood Library"
-
-# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
-# could be handy for archiving the generated documentation or if some version
-# control system is used.
-
-PROJECT_NUMBER         = 1.0.0
-
-# Using the PROJECT_BRIEF tag one can provide an optional one line description
-# for a project that appears at the top of each page and should give viewer a
-# quick idea about the purpose of the project. Keep the description short.
-
-PROJECT_BRIEF          = "A software library for phylogenetic inference"
-
-# With the PROJECT_LOGO tag one can specify an logo or icon that is included in
-# the documentation. The maximum height of the logo should not exceed 55 pixels
-# and the maximum width should not exceed 200 pixels. Doxygen will copy the logo
-# to the output directory.
-
-PROJECT_LOGO           =
-
-# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
-# into which the generated documentation will be written. If a relative path is
-# entered, it will be relative to the location where doxygen was started. If
-# left blank the current directory will be used.
-
-OUTPUT_DIRECTORY       = /var/www/test
-
-# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub-
-# directories (in 2 levels) under the output directory of each output format and
-# will distribute the generated files over these directories. Enabling this
-# option can be useful when feeding doxygen a huge amount of source files, where
-# putting all generated files in the same directory would otherwise causes
-# performance problems for the file system.
-# The default value is: NO.
-
-CREATE_SUBDIRS         = NO
-
-# The OUTPUT_LANGUAGE tag is used to specify the language in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all constant output in the proper language.
-# Possible values are: Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-
-# Traditional, Croatian, Czech, Danish, Dutch, English, Esperanto, Farsi,
-# Finnish, French, German, Greek, Hungarian, Italian, Japanese, Japanese-en,
-# Korean, Korean-en, Latvian, Norwegian, Macedonian, Persian, Polish,
-# Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish,
-# Turkish, Ukrainian and Vietnamese.
-# The default value is: English.
-
-OUTPUT_LANGUAGE        = English
-
-# If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member
-# descriptions after the members that are listed in the file and class
-# documentation (similar to Javadoc). Set to NO to disable this.
-# The default value is: YES.
-
-BRIEF_MEMBER_DESC      = YES
-
-# If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief
-# description of a member or function before the detailed description
-#
-# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
-# brief descriptions will be completely suppressed.
-# The default value is: YES.
-
-REPEAT_BRIEF           = YES
-
-# This tag implements a quasi-intelligent brief description abbreviator that is
-# used to form the text in various listings. Each string in this list, if found
-# as the leading text of the brief description, will be stripped from the text
-# and the result, after processing the whole list, is used as the annotated
-# text. Otherwise, the brief description is used as-is. If left blank, the
-# following values are used ($name is automatically replaced with the name of
-# the entity):The $name class, The $name widget, The $name file, is, provides,
-# specifies, contains, represents, a, an and the.
-
-ABBREVIATE_BRIEF       =
-
-# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
-# doxygen will generate a detailed section even if there is only a brief
-# description.
-# The default value is: NO.
-
-ALWAYS_DETAILED_SEC    = NO
-
-# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
-# inherited members of a class in the documentation of that class as if those
-# members were ordinary class members. Constructors, destructors and assignment
-# operators of the base classes will not be shown.
-# The default value is: NO.
-
-INLINE_INHERITED_MEMB  = NO
-
-# If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path
-# before files name in the file list and in the header files. If set to NO the
-# shortest path that makes the file name unique will be used
-# The default value is: YES.
-
-FULL_PATH_NAMES        = YES
-
-# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
-# Stripping is only done if one of the specified strings matches the left-hand
-# part of the path. The tag can be used to show relative paths in the file list.
-# If left blank the directory from which doxygen is run is used as the path to
-# strip.
-#
-# Note that you can specify absolute paths here, but also relative paths, which
-# will be relative from the directory where doxygen is started.
-# This tag requires that the tag FULL_PATH_NAMES is set to YES.
-
-STRIP_FROM_PATH        =
-
-# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
-# path mentioned in the documentation of a class, which tells the reader which
-# header file to include in order to use a class. If left blank only the name of
-# the header file containing the class definition is used. Otherwise one should
-# specify the list of include paths that are normally passed to the compiler
-# using the -I flag.
-
-STRIP_FROM_INC_PATH    =
-
-# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
-# less readable) file names. This can be useful is your file systems doesn't
-# support long names like on DOS, Mac, or CD-ROM.
-# The default value is: NO.
-
-SHORT_NAMES            = NO
-
-# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
-# first line (until the first dot) of a Javadoc-style comment as the brief
-# description. If set to NO, the Javadoc-style will behave just like regular Qt-
-# style comments (thus requiring an explicit @brief command for a brief
-# description.)
-# The default value is: NO.
-
-JAVADOC_AUTOBRIEF      = NO
-
-# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
-# line (until the first dot) of a Qt-style comment as the brief description. If
-# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
-# requiring an explicit \brief command for a brief description.)
-# The default value is: NO.
-
-QT_AUTOBRIEF           = NO
-
-# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
-# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
-# a brief description. This used to be the default behavior. The new default is
-# to treat a multi-line C++ comment block as a detailed description. Set this
-# tag to YES if you prefer the old behavior instead.
-#
-# Note that setting this tag to YES also means that rational rose comments are
-# not recognized any more.
-# The default value is: NO.
-
-MULTILINE_CPP_IS_BRIEF = NO
-
-# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
-# documentation from any documented member that it re-implements.
-# The default value is: YES.
-
-INHERIT_DOCS           = YES
-
-# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a
-# new page for each member. If set to NO, the documentation of a member will be
-# part of the file/class/namespace that contains it.
-# The default value is: NO.
-
-SEPARATE_MEMBER_PAGES  = NO
-
-# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
-# uses this value to replace tabs by spaces in code fragments.
-# Minimum value: 1, maximum value: 16, default value: 4.
-
-TAB_SIZE               = 4
-
-# This tag can be used to specify a number of aliases that act as commands in
-# the documentation. An alias has the form:
-# name=value
-# For example adding
-# "sideeffect=@par Side Effects:\n"
-# will allow you to put the command \sideeffect (or @sideeffect) in the
-# documentation, which will result in a user-defined paragraph with heading
-# "Side Effects:". You can put \n's in the value part of an alias to insert
-# newlines.
-
-ALIASES                =
-
-# This tag can be used to specify a number of word-keyword mappings (TCL only).
-# A mapping has the form "name=value". For example adding "class=itcl::class"
-# will allow you to use the command class in the itcl::class meaning.
-
-TCL_SUBST              =
-
-# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
-# only. Doxygen will then generate output that is more tailored for C. For
-# instance, some of the names that are used will be different. The list of all
-# members will be omitted, etc.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_FOR_C  = YES
-
-# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
-# Python sources only. Doxygen will then generate output that is more tailored
-# for that language. For instance, namespaces will be presented as packages,
-# qualified scopes will look different, etc.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_JAVA   = NO
-
-# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
-# sources. Doxygen will then generate output that is tailored for Fortran.
-# The default value is: NO.
-
-OPTIMIZE_FOR_FORTRAN   = NO
-
-# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
-# sources. Doxygen will then generate output that is tailored for VHDL.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_VHDL   = NO
-
-# Doxygen selects the parser to use depending on the extension of the files it
-# parses. With this tag you can assign which parser to use for a given
-# extension. Doxygen has a built-in mapping, but you can override or extend it
-# using this tag. The format is ext=language, where ext is a file extension, and
-# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
-# C#, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL. For instance to make
-# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C
-# (default is Fortran), use: inc=Fortran f=C.
-#
-# Note For files without extension you can use no_extension as a placeholder.
-#
-# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
-# the files are not read by doxygen.
-
-EXTENSION_MAPPING      =
-
-# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
-# according to the Markdown format, which allows for more readable
-# documentation. See http://daringfireball.net/projects/markdown/ for details.
-# The output of markdown processing is further processed by doxygen, so you can
-# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
-# case of backward compatibilities issues.
-# The default value is: YES.
-
-MARKDOWN_SUPPORT       = YES
-
-# When enabled doxygen tries to link words that correspond to documented
-# classes, or namespaces to their corresponding documentation. Such a link can
-# be prevented in individual cases by by putting a % sign in front of the word
-# or globally by setting AUTOLINK_SUPPORT to NO.
-# The default value is: YES.
-
-AUTOLINK_SUPPORT       = YES
-
-# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
-# to include (a tag file for) the STL sources as input, then you should set this
-# tag to YES in order to let doxygen match functions declarations and
-# definitions whose arguments contain STL classes (e.g. func(std::string);
-# versus func(std::string) {}). This also make the inheritance and collaboration
-# diagrams that involve STL classes more complete and accurate.
-# The default value is: NO.
-
-BUILTIN_STL_SUPPORT    = NO
-
-# If you use Microsoft's C++/CLI language, you should set this option to YES to
-# enable parsing support.
-# The default value is: NO.
-
-CPP_CLI_SUPPORT        = NO
-
-# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
-# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen
-# will parse them like normal C++ but will assume all classes use public instead
-# of private inheritance when no explicit protection keyword is present.
-# The default value is: NO.
-
-SIP_SUPPORT            = NO
-
-# For Microsoft's IDL there are propget and propput attributes to indicate
-# getter and setter methods for a property. Setting this option to YES will make
-# doxygen to replace the get and set methods by a property in the documentation.
-# This will only work if the methods are indeed getting or setting a simple
-# type. If this is not the case, or you want to show the methods anyway, you
-# should set this option to NO.
-# The default value is: YES.
-
-IDL_PROPERTY_SUPPORT   = YES
-
-# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
-# tag is set to YES, then doxygen will reuse the documentation of the first
-# member in the group (if any) for the other members of the group. By default
-# all members of a group must be documented explicitly.
-# The default value is: NO.
-
-DISTRIBUTE_GROUP_DOC   = NO
-
-# Set the SUBGROUPING tag to YES to allow class member groups of the same type
-# (for instance a group of public functions) to be put as a subgroup of that
-# type (e.g. under the Public Functions section). Set it to NO to prevent
-# subgrouping. Alternatively, this can be done per class using the
-# \nosubgrouping command.
-# The default value is: YES.
-
-SUBGROUPING            = YES
-
-# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
-# are shown inside the group in which they are included (e.g. using \ingroup)
-# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
-# and RTF).
-#
-# Note that this feature does not work in combination with
-# SEPARATE_MEMBER_PAGES.
-# The default value is: NO.
-
-INLINE_GROUPED_CLASSES = NO
-
-# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
-# with only public data fields or simple typedef fields will be shown inline in
-# the documentation of the scope in which they are defined (i.e. file,
-# namespace, or group documentation), provided this scope is documented. If set
-# to NO, structs, classes, and unions are shown on a separate page (for HTML and
-# Man pages) or section (for LaTeX and RTF).
-# The default value is: NO.
-
-INLINE_SIMPLE_STRUCTS  = NO
-
-# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
-# enum is documented as struct, union, or enum with the name of the typedef. So
-# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
-# with name TypeT. When disabled the typedef will appear as a member of a file,
-# namespace, or class. And the struct will be named TypeS. This can typically be
-# useful for C code in case the coding convention dictates that all compound
-# types are typedef'ed and only the typedef is referenced, never the tag name.
-# The default value is: NO.
-
-TYPEDEF_HIDES_STRUCT   = NO
-
-# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
-# cache is used to resolve symbols given their name and scope. Since this can be
-# an expensive process and often the same symbol appears multiple times in the
-# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
-# doxygen will become slower. If the cache is too large, memory is wasted. The
-# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
-# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
-# symbols. At the end of a run doxygen will report the cache usage and suggest
-# the optimal cache size from a speed point of view.
-# Minimum value: 0, maximum value: 9, default value: 0.
-
-LOOKUP_CACHE_SIZE      = 0
-
-#---------------------------------------------------------------------------
-# Build related configuration options
-#---------------------------------------------------------------------------
-
-# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
-# documentation are documented, even if no documentation was available. Private
-# class members and static file members will be hidden unless the
-# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
-# Note: This will also disable the warnings about undocumented members that are
-# normally produced when WARNINGS is set to YES.
-# The default value is: NO.
-
-EXTRACT_ALL            = NO
-
-# If the EXTRACT_PRIVATE tag is set to YES all private members of a class will
-# be included in the documentation.
-# The default value is: NO.
-
-EXTRACT_PRIVATE        = NO
-
-# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal
-# scope will be included in the documentation.
-# The default value is: NO.
-
-EXTRACT_PACKAGE        = NO
-
-# If the EXTRACT_STATIC tag is set to YES all static members of a file will be
-# included in the documentation.
-# The default value is: NO.
-
-EXTRACT_STATIC         = YES
-
-# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined
-# locally in source files will be included in the documentation. If set to NO
-# only classes defined in header files are included. Does not have any effect
-# for Java sources.
-# The default value is: YES.
-
-EXTRACT_LOCAL_CLASSES  = YES
-
-# This flag is only useful for Objective-C code. When set to YES local methods,
-# which are defined in the implementation section but not in the interface are
-# included in the documentation. If set to NO only methods in the interface are
-# included.
-# The default value is: NO.
-
-EXTRACT_LOCAL_METHODS  = NO
-
-# If this flag is set to YES, the members of anonymous namespaces will be
-# extracted and appear in the documentation as a namespace called
-# 'anonymous_namespace{file}', where file will be replaced with the base name of
-# the file that contains the anonymous namespace. By default anonymous namespace
-# are hidden.
-# The default value is: NO.
-
-EXTRACT_ANON_NSPACES   = NO
-
-# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
-# undocumented members inside documented classes or files. If set to NO these
-# members will be included in the various overviews, but no documentation
-# section is generated. This option has no effect if EXTRACT_ALL is enabled.
-# The default value is: NO.
-
-HIDE_UNDOC_MEMBERS     = NO
-
-# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
-# undocumented classes that are normally visible in the class hierarchy. If set
-# to NO these classes will be included in the various overviews. This option has
-# no effect if EXTRACT_ALL is enabled.
-# The default value is: NO.
-
-HIDE_UNDOC_CLASSES     = NO
-
-# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
-# (class|struct|union) declarations. If set to NO these declarations will be
-# included in the documentation.
-# The default value is: NO.
-
-HIDE_FRIEND_COMPOUNDS  = NO
-
-# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
-# documentation blocks found inside the body of a function. If set to NO these
-# blocks will be appended to the function's detailed documentation block.
-# The default value is: NO.
-
-HIDE_IN_BODY_DOCS      = NO
-
-# The INTERNAL_DOCS tag determines if documentation that is typed after a
-# \internal command is included. If the tag is set to NO then the documentation
-# will be excluded. Set it to YES to include the internal documentation.
-# The default value is: NO.
-
-INTERNAL_DOCS          = NO
-
-# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
-# names in lower-case letters. If set to YES upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# and Mac users are advised to set this option to NO.
-# The default value is: system dependent.
-
-CASE_SENSE_NAMES       = YES
-
-# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
-# their full class and namespace scopes in the documentation. If set to YES the
-# scope will be hidden.
-# The default value is: NO.
-
-HIDE_SCOPE_NAMES       = NO
-
-# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
-# the files that are included by a file in the documentation of that file.
-# The default value is: YES.
-
-SHOW_INCLUDE_FILES     = YES
-
-# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
-# files with double quotes in the documentation rather than with sharp brackets.
-# The default value is: NO.
-
-FORCE_LOCAL_INCLUDES   = NO
-
-# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
-# documentation for inline members.
-# The default value is: YES.
-
-INLINE_INFO            = YES
-
-# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
-# (detailed) documentation of file and class members alphabetically by member
-# name. If set to NO the members will appear in declaration order.
-# The default value is: YES.
-
-SORT_MEMBER_DOCS       = YES
-
-# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
-# descriptions of file, namespace and class members alphabetically by member
-# name. If set to NO the members will appear in declaration order.
-# The default value is: NO.
-
-SORT_BRIEF_DOCS        = NO
-
-# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
-# (brief and detailed) documentation of class members so that constructors and
-# destructors are listed first. If set to NO the constructors will appear in the
-# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
-# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
-# member documentation.
-# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
-# detailed member documentation.
-# The default value is: NO.
-
-SORT_MEMBERS_CTORS_1ST = NO
-
-# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
-# of group names into alphabetical order. If set to NO the group names will
-# appear in their defined order.
-# The default value is: NO.
-
-SORT_GROUP_NAMES       = NO
-
-# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
-# fully-qualified names, including namespaces. If set to NO, the class list will
-# be sorted only by class name, not including the namespace part.
-# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
-# Note: This option applies only to the class list, not to the alphabetical
-# list.
-# The default value is: NO.
-
-SORT_BY_SCOPE_NAME     = NO
-
-# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
-# type resolution of all parameters of a function it will reject a match between
-# the prototype and the implementation of a member function even if there is
-# only one candidate or it is obvious which candidate to choose by doing a
-# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
-# accept a match between prototype and implementation in such cases.
-# The default value is: NO.
-
-STRICT_PROTO_MATCHING  = NO
-
-# The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the
-# todo list. This list is created by putting \todo commands in the
-# documentation.
-# The default value is: YES.
-
-GENERATE_TODOLIST      = YES
-
-# The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the
-# test list. This list is created by putting \test commands in the
-# documentation.
-# The default value is: YES.
-
-GENERATE_TESTLIST      = YES
-
-# The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug
-# list. This list is created by putting \bug commands in the documentation.
-# The default value is: YES.
-
-GENERATE_BUGLIST       = YES
-
-# The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO)
-# the deprecated list. This list is created by putting \deprecated commands in
-# the documentation.
-# The default value is: YES.
-
-GENERATE_DEPRECATEDLIST= YES
-
-# The ENABLED_SECTIONS tag can be used to enable conditional documentation
-# sections, marked by \if <section_label> ... \endif and \cond <section_label>
-# ... \endcond blocks.
-
-ENABLED_SECTIONS       =
-
-# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
-# initial value of a variable or macro / define can have for it to appear in the
-# documentation. If the initializer consists of more lines than specified here
-# it will be hidden. Use a value of 0 to hide initializers completely. The
-# appearance of the value of individual variables and macros / defines can be
-# controlled using \showinitializer or \hideinitializer command in the
-# documentation regardless of this setting.
-# Minimum value: 0, maximum value: 10000, default value: 30.
-
-MAX_INITIALIZER_LINES  = 30
-
-# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
-# the bottom of the documentation of classes and structs. If set to YES the list
-# will mention the files that were used to generate the documentation.
-# The default value is: YES.
-
-SHOW_USED_FILES        = YES
-
-# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
-# will remove the Files entry from the Quick Index and from the Folder Tree View
-# (if specified).
-# The default value is: YES.
-
-SHOW_FILES             = YES
-
-# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
-# page. This will remove the Namespaces entry from the Quick Index and from the
-# Folder Tree View (if specified).
-# The default value is: YES.
-
-SHOW_NAMESPACES        = YES
-
-# The FILE_VERSION_FILTER tag can be used to specify a program or script that
-# doxygen should invoke to get the current version for each file (typically from
-# the version control system). Doxygen will invoke the program by executing (via
-# popen()) the command command input-file, where command is the value of the
-# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
-# by doxygen. Whatever the program writes to standard output is used as the file
-# version. For an example see the documentation.
-
-FILE_VERSION_FILTER    =
-
-# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
-# by doxygen. The layout file controls the global structure of the generated
-# output files in an output format independent way. To create the layout file
-# that represents doxygen's defaults, run doxygen with the -l option. You can
-# optionally specify a file name after the option, if omitted DoxygenLayout.xml
-# will be used as the name of the layout file.
-#
-# Note that if you run doxygen from a directory containing a file called
-# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
-# tag is left empty.
-
-LAYOUT_FILE            =
-
-# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
-# the reference definitions. This must be a list of .bib files. The .bib
-# extension is automatically appended if omitted. This requires the bibtex tool
-# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info.
-# For LaTeX the style of the bibliography can be controlled using
-# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
-# search path. Do not use file names with spaces, bibtex cannot handle them. See
-# also \cite for info how to create references.
-
-CITE_BIB_FILES         =
-
-#---------------------------------------------------------------------------
-# Configuration options related to warning and progress messages
-#---------------------------------------------------------------------------
-
-# The QUIET tag can be used to turn on/off the messages that are generated to
-# standard output by doxygen. If QUIET is set to YES this implies that the
-# messages are off.
-# The default value is: NO.
-
-QUIET                  = NO
-
-# The WARNINGS tag can be used to turn on/off the warning messages that are
-# generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES
-# this implies that the warnings are on.
-#
-# Tip: Turn warnings on while writing the documentation.
-# The default value is: YES.
-
-WARNINGS               = YES
-
-# If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate
-# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
-# will automatically be disabled.
-# The default value is: YES.
-
-WARN_IF_UNDOCUMENTED   = YES
-
-# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
-# potential errors in the documentation, such as not documenting some parameters
-# in a documented function, or documenting parameters that don't exist or using
-# markup commands wrongly.
-# The default value is: YES.
-
-WARN_IF_DOC_ERROR      = YES
-
-# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
-# are documented, but have no documentation for their parameters or return
-# value. If set to NO doxygen will only warn about wrong or incomplete parameter
-# documentation, but not about the absence of documentation.
-# The default value is: NO.
-
-WARN_NO_PARAMDOC       = NO
-
-# The WARN_FORMAT tag determines the format of the warning messages that doxygen
-# can produce. The string should contain the $file, $line, and $text tags, which
-# will be replaced by the file and line number from which the warning originated
-# and the warning text. Optionally the format may contain $version, which will
-# be replaced by the version of the file (if it could be obtained via
-# FILE_VERSION_FILTER)
-# The default value is: $file:$line: $text.
-
-WARN_FORMAT            = "$file:$line: $text"
-
-# The WARN_LOGFILE tag can be used to specify a file to which warning and error
-# messages should be written. If left blank the output is written to standard
-# error (stderr).
-
-WARN_LOGFILE           =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the input files
-#---------------------------------------------------------------------------
-
-# The INPUT tag is used to specify the files and/or directories that contain
-# documented source files. You may enter file names like myfile.cpp or
-# directories like /usr/src/myproject. Separate the files or directories with
-# spaces.
-# Note: If this tag is empty the current directory is searched.
-
-INPUT                  = src
-
-# This tag can be used to specify the character encoding of the source files
-# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
-# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
-# documentation (see: http://www.gnu.org/software/libiconv) for the list of
-# possible encodings.
-# The default value is: UTF-8.
-
-INPUT_ENCODING         = UTF-8
-
-# If the value of the INPUT tag contains directories, you can use the
-# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
-# *.h) to filter out the source-files in the directories. If left blank the
-# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii,
-# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp,
-# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown,
-# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf,
-# *.qsf, *.as and *.js.
-
-FILE_PATTERNS          =
-
-# The RECURSIVE tag can be used to specify whether or not subdirectories should
-# be searched for input files as well.
-# The default value is: NO.
-
-RECURSIVE              = NO
-
-# The EXCLUDE tag can be used to specify files and/or directories that should be
-# excluded from the INPUT source files. This way you can easily exclude a
-# subdirectory from a directory tree whose root is specified with the INPUT tag.
-#
-# Note that relative paths are relative to the directory from which doxygen is
-# run.
-
-EXCLUDE                =
-
-# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
-# directories that are symbolic links (a Unix file system feature) are excluded
-# from the input.
-# The default value is: NO.
-
-EXCLUDE_SYMLINKS       = NO
-
-# If the value of the INPUT tag contains directories, you can use the
-# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
-# certain files from those directories.
-#
-# Note that the wildcards are matched against the file with absolute path, so to
-# exclude all test directories for example use the pattern */test/*
-
-EXCLUDE_PATTERNS       =
-
-# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
-# (namespaces, classes, functions, etc.) that should be excluded from the
-# output. The symbol name can be a fully qualified name, a word, or if the
-# wildcard * is used, a substring. Examples: ANamespace, AClass,
-# AClass::ANamespace, ANamespace::*Test
-#
-# Note that the wildcards are matched against the file with absolute path, so to
-# exclude all test directories use the pattern */test/*
-
-EXCLUDE_SYMBOLS        =
-
-# The EXAMPLE_PATH tag can be used to specify one or more files or directories
-# that contain example code fragments that are included (see the \include
-# command).
-
-EXAMPLE_PATH           =
-
-# If the value of the EXAMPLE_PATH tag contains directories, you can use the
-# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
-# *.h) to filter out the source-files in the directories. If left blank all
-# files are included.
-
-EXAMPLE_PATTERNS       =
-
-# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
-# searched for input files to be used with the \include or \dontinclude commands
-# irrespective of the value of the RECURSIVE tag.
-# The default value is: NO.
-
-EXAMPLE_RECURSIVE      = NO
-
-# The IMAGE_PATH tag can be used to specify one or more files or directories
-# that contain images that are to be included in the documentation (see the
-# \image command).
-
-IMAGE_PATH             = dox
-
-# The INPUT_FILTER tag can be used to specify a program that doxygen should
-# invoke to filter for each input file. Doxygen will invoke the filter program
-# by executing (via popen()) the command:
-#
-# <filter> <input-file>
-#
-# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the
-# name of an input file. Doxygen will then use the output that the filter
-# program writes to standard output. If FILTER_PATTERNS is specified, this tag
-# will be ignored.
-#
-# Note that the filter must not add or remove lines; it is applied before the
-# code is scanned, but not when the output code is generated. If lines are added
-# or removed, the anchors will not be placed correctly.
-
-INPUT_FILTER           =
-
-# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
-# basis. Doxygen will compare the file name with each pattern and apply the
-# filter if there is a match. The filters are a list of the form: pattern=filter
-# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
-# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
-# patterns match the file name, INPUT_FILTER is applied.
-
-FILTER_PATTERNS        =
-
-# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
-# INPUT_FILTER ) will also be used to filter the input files that are used for
-# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
-# The default value is: NO.
-
-FILTER_SOURCE_FILES    = NO
-
-# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
-# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
-# it is also possible to disable source filtering for a specific pattern using
-# *.ext= (so without naming a filter).
-# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
-
-FILTER_SOURCE_PATTERNS =
-
-# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
-# is part of the input, its contents will be placed on the main page
-# (index.html). This can be useful if you have a project on for instance GitHub
-# and want to reuse the introduction page also for the doxygen output.
-
-USE_MDFILE_AS_MAINPAGE =
-
-#---------------------------------------------------------------------------
-# Configuration options related to source browsing
-#---------------------------------------------------------------------------
-
-# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
-# generated. Documented entities will be cross-referenced with these sources.
-#
-# Note: To get rid of all source code in the generated output, make sure that
-# also VERBATIM_HEADERS is set to NO.
-# The default value is: NO.
-
-SOURCE_BROWSER         = NO
-
-# Setting the INLINE_SOURCES tag to YES will include the body of functions,
-# classes and enums directly into the documentation.
-# The default value is: NO.
-
-INLINE_SOURCES         = NO
-
-# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
-# special comment blocks from generated source code fragments. Normal C, C++ and
-# Fortran comments will always remain visible.
-# The default value is: YES.
-
-STRIP_CODE_COMMENTS    = YES
-
-# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
-# function all documented functions referencing it will be listed.
-# The default value is: NO.
-
-REFERENCED_BY_RELATION = NO
-
-# If the REFERENCES_RELATION tag is set to YES then for each documented function
-# all documented entities called/used by that function will be listed.
-# The default value is: NO.
-
-REFERENCES_RELATION    = NO
-
-# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
-# to YES, then the hyperlinks from functions in REFERENCES_RELATION and
-# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
-# link to the documentation.
-# The default value is: YES.
-
-REFERENCES_LINK_SOURCE = YES
-
-# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
-# source code will show a tooltip with additional information such as prototype,
-# brief description and links to the definition and documentation. Since this
-# will make the HTML file larger and loading of large files a bit slower, you
-# can opt to disable this feature.
-# The default value is: YES.
-# This tag requires that the tag SOURCE_BROWSER is set to YES.
-
-SOURCE_TOOLTIPS        = YES
-
-# If the USE_HTAGS tag is set to YES then the references to source code will
-# point to the HTML generated by the htags(1) tool instead of doxygen built-in
-# source browser. The htags tool is part of GNU's global source tagging system
-# (see http://www.gnu.org/software/global/global.html). You will need version
-# 4.8.6 or higher.
-#
-# To use it do the following:
-# - Install the latest version of global
-# - Enable SOURCE_BROWSER and USE_HTAGS in the config file
-# - Make sure the INPUT points to the root of the source tree
-# - Run doxygen as normal
-#
-# Doxygen will invoke htags (and that will in turn invoke gtags), so these
-# tools must be available from the command line (i.e. in the search path).
-#
-# The result: instead of the source browser generated by doxygen, the links to
-# source code will now point to the output of htags.
-# The default value is: NO.
-# This tag requires that the tag SOURCE_BROWSER is set to YES.
-
-USE_HTAGS              = NO
-
-# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
-# verbatim copy of the header file for each class for which an include is
-# specified. Set to NO to disable this.
-# See also: Section \class.
-# The default value is: YES.
-
-VERBATIM_HEADERS       = YES
-
-# If the CLANG_ASSISTED_PARSING tag is set to YES, then doxygen will use the
-# clang parser (see: http://clang.llvm.org/) for more acurate parsing at the
-# cost of reduced performance. This can be particularly helpful with template
-# rich C++ code for which doxygen's built-in parser lacks the necessary type
-# information.
-# Note: The availability of this option depends on whether or not doxygen was
-# compiled with the --with-libclang option.
-# The default value is: NO.
-
-CLANG_ASSISTED_PARSING = NO
-
-# If clang assisted parsing is enabled you can provide the compiler with command
-# line options that you would normally use when invoking the compiler. Note that
-# the include paths will already be set by doxygen for the files and directories
-# specified with INPUT and INCLUDE_PATH.
-# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES.
-
-CLANG_OPTIONS          =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the alphabetical class index
-#---------------------------------------------------------------------------
-
-# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
-# compounds will be generated. Enable this if the project contains a lot of
-# classes, structs, unions or interfaces.
-# The default value is: YES.
-
-ALPHABETICAL_INDEX     = YES
-
-# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
-# which the alphabetical index list will be split.
-# Minimum value: 1, maximum value: 20, default value: 5.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-COLS_IN_ALPHA_INDEX    = 5
-
-# In case all classes in a project start with a common prefix, all classes will
-# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
-# can be used to specify a prefix (or a list of prefixes) that should be ignored
-# while generating the index headers.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-IGNORE_PREFIX          =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the HTML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_HTML tag is set to YES doxygen will generate HTML output
-# The default value is: YES.
-
-GENERATE_HTML          = YES
-
-# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it.
-# The default directory is: html.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_OUTPUT            = html
-
-# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
-# generated HTML page (for example: .htm, .php, .asp).
-# The default value is: .html.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_FILE_EXTENSION    = .html
-
-# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
-# each generated HTML page. If the tag is left blank doxygen will generate a
-# standard header.
-#
-# To get valid HTML the header file that includes any scripts and style sheets
-# that doxygen needs, which is dependent on the configuration options used (e.g.
-# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
-# default header using
-# doxygen -w html new_header.html new_footer.html new_stylesheet.css
-# YourConfigFile
-# and then modify the file new_header.html. See also section "Doxygen usage"
-# for information on how to generate the default header that doxygen normally
-# uses.
-# Note: The header is subject to change so you typically have to regenerate the
-# default header when upgrading to a newer version of doxygen. For a description
-# of the possible markers and block names see the documentation.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_HEADER            = dox/header2.html
-
-# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
-# generated HTML page. If the tag is left blank doxygen will generate a standard
-# footer. See HTML_HEADER for more information on how to generate a default
-# footer and what special commands can be used inside the footer. See also
-# section "Doxygen usage" for information on how to generate the default footer
-# that doxygen normally uses.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_FOOTER            =
-
-# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
-# sheet that is used by each HTML page. It can be used to fine-tune the look of
-# the HTML output. If left blank doxygen will generate a default style sheet.
-# See also section "Doxygen usage" for information on how to generate the style
-# sheet that doxygen normally uses.
-# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
-# it is more robust and this tag (HTML_STYLESHEET) will in the future become
-# obsolete.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_STYLESHEET        =
-
-# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional user-
-# defined cascading style sheet that is included after the standard style sheets
-# created by doxygen. Using this option one can overrule certain style aspects.
-# This is preferred over using HTML_STYLESHEET since it does not replace the
-# standard style sheet and is therefor more robust against future updates.
-# Doxygen will copy the style sheet file to the output directory. For an example
-# see the documentation.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_EXTRA_STYLESHEET  = dox/pll.css
-
-# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
-# other source files which should be copied to the HTML output directory. Note
-# that these files will be copied to the base HTML output directory. Use the
-# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
-# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
-# files will be copied as-is; there are no commands or markers available.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_EXTRA_FILES       =
-
-# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
-# will adjust the colors in the stylesheet and background images according to
-# this color. Hue is specified as an angle on a colorwheel, see
-# http://en.wikipedia.org/wiki/Hue for more information. For instance the value
-# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
-# purple, and 360 is red again.
-# Minimum value: 0, maximum value: 359, default value: 220.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_COLORSTYLE_HUE    = 220
-
-# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
-# in the HTML output. For a value of 0 the output will use grayscales only. A
-# value of 255 will produce the most vivid colors.
-# Minimum value: 0, maximum value: 255, default value: 100.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_COLORSTYLE_SAT    = 100
-
-# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
-# luminance component of the colors in the HTML output. Values below 100
-# gradually make the output lighter, whereas values above 100 make the output
-# darker. The value divided by 100 is the actual gamma applied, so 80 represents
-# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
-# change the gamma.
-# Minimum value: 40, maximum value: 240, default value: 80.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_COLORSTYLE_GAMMA  = 80
-
-# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
-# page will contain the date and time when the page was generated. Setting this
-# to NO can help when comparing the output of multiple runs.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_TIMESTAMP         = YES
-
-# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
-# documentation will contain sections that can be hidden and shown after the
-# page has loaded.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_DYNAMIC_SECTIONS  = YES
-
-# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
-# shown in the various tree structured indices initially; the user can expand
-# and collapse entries dynamically later on. Doxygen will expand the tree to
-# such a level that at most the specified number of entries are visible (unless
-# a fully collapsed tree already exceeds this amount). So setting the number of
-# entries 1 will produce a full collapsed tree by default. 0 is a special value
-# representing an infinite number of entries and will result in a full expanded
-# tree by default.
-# Minimum value: 0, maximum value: 9999, default value: 100.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_INDEX_NUM_ENTRIES = 100
-
-# If the GENERATE_DOCSET tag is set to YES, additional index files will be
-# generated that can be used as input for Apple's Xcode 3 integrated development
-# environment (see: http://developer.apple.com/tools/xcode/), introduced with
-# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a
-# Makefile in the HTML output directory. Running make will produce the docset in
-# that directory and running make install will install the docset in
-# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
-# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
-# for more information.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_DOCSET        = NO
-
-# This tag determines the name of the docset feed. A documentation feed provides
-# an umbrella under which multiple documentation sets from a single provider
-# (such as a company or product suite) can be grouped.
-# The default value is: Doxygen generated docs.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_FEEDNAME        = "Doxygen generated docs"
-
-# This tag specifies a string that should uniquely identify the documentation
-# set bundle. This should be a reverse domain-name style string, e.g.
-# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
-# The default value is: org.doxygen.Project.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_BUNDLE_ID       = org.doxygen.Project
-
-# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
-# the documentation publisher. This should be a reverse domain-name style
-# string, e.g. com.mycompany.MyDocSet.documentation.
-# The default value is: org.doxygen.Publisher.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
-
-# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
-# The default value is: Publisher.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_PUBLISHER_NAME  = Publisher
-
-# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
-# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
-# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
-# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on
-# Windows.
-#
-# The HTML Help Workshop contains a compiler that can convert all HTML output
-# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
-# files are now used as the Windows 98 help format, and will replace the old
-# Windows help format (.hlp) on all Windows platforms in the future. Compressed
-# HTML files also contain an index, a table of contents, and you can search for
-# words in the documentation. The HTML workshop also contains a viewer for
-# compressed HTML files.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_HTMLHELP      = NO
-
-# The CHM_FILE tag can be used to specify the file name of the resulting .chm
-# file. You can add a path in front of the file if the result should not be
-# written to the html output directory.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-CHM_FILE               =
-
-# The HHC_LOCATION tag can be used to specify the location (absolute path
-# including file name) of the HTML help compiler ( hhc.exe). If non-empty
-# doxygen will try to run the HTML help compiler on the generated index.hhp.
-# The file has to be specified with full path.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-HHC_LOCATION           =
-
-# The GENERATE_CHI flag controls if a separate .chi index file is generated (
-# YES) or that it should be included in the master .chm file ( NO).
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-GENERATE_CHI           = NO
-
-# The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc)
-# and project file content.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-CHM_INDEX_ENCODING     =
-
-# The BINARY_TOC flag controls whether a binary table of contents is generated (
-# YES) or a normal table of contents ( NO) in the .chm file.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-BINARY_TOC             = NO
-
-# The TOC_EXPAND flag can be set to YES to add extra items for group members to
-# the table of contents of the HTML help documentation and to the tree view.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-TOC_EXPAND             = NO
-
-# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
-# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
-# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
-# (.qch) of the generated HTML documentation.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_QHP           = NO
-
-# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
-# the file name of the resulting .qch file. The path specified is relative to
-# the HTML output folder.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QCH_FILE               =
-
-# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
-# Project output. For more information please see Qt Help Project / Namespace
-# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace).
-# The default value is: org.doxygen.Project.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_NAMESPACE          = org.doxygen.Project
-
-# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
-# Help Project output. For more information please see Qt Help Project / Virtual
-# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual-
-# folders).
-# The default value is: doc.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_VIRTUAL_FOLDER     = doc
-
-# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
-# filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
-# filters).
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_CUST_FILTER_NAME   =
-
-# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
-# custom filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
-# filters).
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_CUST_FILTER_ATTRS  =
-
-# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
-# project's filter section matches. Qt Help Project / Filter Attributes (see:
-# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes).
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_SECT_FILTER_ATTRS  =
-
-# The QHG_LOCATION tag can be used to specify the location of Qt's
-# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
-# generated .qhp file.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHG_LOCATION           =
-
-# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
-# generated, together with the HTML files, they form an Eclipse help plugin. To
-# install this plugin and make it available under the help contents menu in
-# Eclipse, the contents of the directory containing the HTML and XML files needs
-# to be copied into the plugins directory of eclipse. The name of the directory
-# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
-# After copying Eclipse needs to be restarted before the help appears.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_ECLIPSEHELP   = NO
-
-# A unique identifier for the Eclipse help plugin. When installing the plugin
-# the directory name containing the HTML and XML files should also have this
-# name. Each documentation set should have its own identifier.
-# The default value is: org.doxygen.Project.
-# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
-
-ECLIPSE_DOC_ID         = org.doxygen.Project
-
-# If you want full control over the layout of the generated HTML pages it might
-# be necessary to disable the index and replace it with your own. The
-# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
-# of each HTML page. A value of NO enables the index and the value YES disables
-# it. Since the tabs in the index contain the same information as the navigation
-# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-DISABLE_INDEX          = NO
-
-# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
-# structure should be generated to display hierarchical information. If the tag
-# value is set to YES, a side panel will be generated containing a tree-like
-# index structure (just like the one that is generated for HTML Help). For this
-# to work a browser that supports JavaScript, DHTML, CSS and frames is required
-# (i.e. any modern browser). Windows users are probably better off using the
-# HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can
-# further fine-tune the look of the index. As an example, the default style
-# sheet generated by doxygen has an example that shows how to put an image at
-# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
-# the same information as the tab index, you could consider setting
-# DISABLE_INDEX to YES when enabling this option.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_TREEVIEW      = NO
-
-# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
-# doxygen will group on one line in the generated HTML documentation.
-#
-# Note that a value of 0 will completely suppress the enum values from appearing
-# in the overview section.
-# Minimum value: 0, maximum value: 20, default value: 4.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-ENUM_VALUES_PER_LINE   = 4
-
-# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
-# to set the initial width (in pixels) of the frame in which the tree is shown.
-# Minimum value: 0, maximum value: 1500, default value: 250.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-TREEVIEW_WIDTH         = 250
-
-# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to
-# external symbols imported via tag files in a separate window.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-EXT_LINKS_IN_WINDOW    = NO
-
-# Use this tag to change the font size of LaTeX formulas included as images in
-# the HTML documentation. When you change the font size after a successful
-# doxygen run you need to manually remove any form_*.png images from the HTML
-# output directory to force them to be regenerated.
-# Minimum value: 8, maximum value: 50, default value: 10.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-FORMULA_FONTSIZE       = 10
-
-# Use the FORMULA_TRANPARENT tag to determine whether or not the images
-# generated for formulas are transparent PNGs. Transparent PNGs are not
-# supported properly for IE 6.0, but are supported on all modern browsers.
-#
-# Note that when changing this option you need to delete any form_*.png files in
-# the HTML output directory before the changes have effect.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-FORMULA_TRANSPARENT    = YES
-
-# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
-# http://www.mathjax.org) which uses client side Javascript for the rendering
-# instead of using prerendered bitmaps. Use this if you do not have LaTeX
-# installed or if you want to formulas look prettier in the HTML output. When
-# enabled you may also need to install MathJax separately and configure the path
-# to it using the MATHJAX_RELPATH option.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-USE_MATHJAX            = NO
-
-# When MathJax is enabled you can set the default output format to be used for
-# the MathJax output. See the MathJax site (see:
-# http://docs.mathjax.org/en/latest/output.html) for more details.
-# Possible values are: HTML-CSS (which is slower, but has the best
-# compatibility), NativeMML (i.e. MathML) and SVG.
-# The default value is: HTML-CSS.
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_FORMAT         = HTML-CSS
-
-# When MathJax is enabled you need to specify the location relative to the HTML
-# output directory using the MATHJAX_RELPATH option. The destination directory
-# should contain the MathJax.js script. For instance, if the mathjax directory
-# is located at the same level as the HTML output directory, then
-# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
-# Content Delivery Network so you can quickly see the result without installing
-# MathJax. However, it is strongly recommended to install a local copy of
-# MathJax from http://www.mathjax.org before deployment.
-# The default value is: http://cdn.mathjax.org/mathjax/latest.
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
-
-# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
-# extension names that should be enabled during MathJax rendering. For example
-# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_EXTENSIONS     =
-
-# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
-# of code that will be used on startup of the MathJax code. See the MathJax site
-# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
-# example see the documentation.
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_CODEFILE       =
-
-# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
-# the HTML output. The underlying search engine uses javascript and DHTML and
-# should work on any modern browser. Note that when using HTML help
-# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
-# there is already a search function so this one should typically be disabled.
-# For large projects the javascript based search engine can be slow, then
-# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
-# search using the keyboard; to jump to the search box use <access key> + S
-# (what the <access key> is depends on the OS and browser, but it is typically
-# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down
-# key> to jump into the search results window, the results can be navigated
-# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel
-# the search. The filter options can be selected when the cursor is inside the
-# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>
-# to select a filter and <Enter> or <escape> to activate or cancel the filter
-# option.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-SEARCHENGINE           = NO
-
-# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
-# implemented using a web server instead of a web client using Javascript. There
-# are two flavours of web server based searching depending on the
-# EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for
-# searching and an index file used by the script. When EXTERNAL_SEARCH is
-# enabled the indexing and searching needs to be provided by external tools. See
-# the section "External Indexing and Searching" for details.
-# The default value is: NO.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-SERVER_BASED_SEARCH    = NO
-
-# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
-# script for searching. Instead the search results are written to an XML file
-# which needs to be processed by an external indexer. Doxygen will invoke an
-# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
-# search results.
-#
-# Doxygen ships with an example indexer ( doxyindexer) and search engine
-# (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: http://xapian.org/).
-#
-# See the section "External Indexing and Searching" for details.
-# The default value is: NO.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-EXTERNAL_SEARCH        = NO
-
-# The SEARCHENGINE_URL should point to a search engine hosted by a web server
-# which will return the search results when EXTERNAL_SEARCH is enabled.
-#
-# Doxygen ships with an example indexer ( doxyindexer) and search engine
-# (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: http://xapian.org/). See the section "External Indexing and
-# Searching" for details.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-SEARCHENGINE_URL       =
-
-# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
-# search data is written to a file for indexing by an external tool. With the
-# SEARCHDATA_FILE tag the name of this file can be specified.
-# The default file is: searchdata.xml.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-SEARCHDATA_FILE        = searchdata.xml
-
-# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
-# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
-# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
-# projects and redirect the results back to the right project.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-EXTERNAL_SEARCH_ID     =
-
-# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
-# projects other than the one defined by this configuration file, but that are
-# all added to the same external search index. Each project needs to have a
-# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
-# to a relative location where the documentation can be found. The format is:
-# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-EXTRA_SEARCH_MAPPINGS  =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the LaTeX output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_LATEX tag is set to YES doxygen will generate LaTeX output.
-# The default value is: YES.
-
-GENERATE_LATEX         = YES
-
-# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it.
-# The default directory is: latex.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_OUTPUT           = latex
-
-# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
-# invoked.
-#
-# Note that when enabling USE_PDFLATEX this option is only used for generating
-# bitmaps for formulas in the HTML output, but not in the Makefile that is
-# written to the output directory.
-# The default file is: latex.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_CMD_NAME         = latex
-
-# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
-# index for LaTeX.
-# The default file is: makeindex.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-MAKEINDEX_CMD_NAME     = makeindex
-
-# If the COMPACT_LATEX tag is set to YES doxygen generates more compact LaTeX
-# documents. This may be useful for small projects and may help to save some
-# trees in general.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-COMPACT_LATEX          = NO
-
-# The PAPER_TYPE tag can be used to set the paper type that is used by the
-# printer.
-# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
-# 14 inches) and executive (7.25 x 10.5 inches).
-# The default value is: a4.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-PAPER_TYPE             = a4
-
-# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
-# that should be included in the LaTeX output. To get the times font for
-# instance you can specify
-# EXTRA_PACKAGES=times
-# If left blank no extra packages will be included.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-EXTRA_PACKAGES         =
-
-# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
-# generated LaTeX document. The header should contain everything until the first
-# chapter. If it is left blank doxygen will generate a standard header. See
-# section "Doxygen usage" for information on how to let doxygen write the
-# default header to a separate file.
-#
-# Note: Only use a user-defined header if you know what you are doing! The
-# following commands have a special meaning inside the header: $title,
-# $datetime, $date, $doxygenversion, $projectname, $projectnumber. Doxygen will
-# replace them by respectively the title of the page, the current date and time,
-# only the current date, the version number of doxygen, the project name (see
-# PROJECT_NAME), or the project number (see PROJECT_NUMBER).
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_HEADER           =
-
-# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
-# generated LaTeX document. The footer should contain everything after the last
-# chapter. If it is left blank doxygen will generate a standard footer.
-#
-# Note: Only use a user-defined footer if you know what you are doing!
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_FOOTER           =
-
-# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
-# other source files which should be copied to the LATEX_OUTPUT output
-# directory. Note that the files will be copied as-is; there are no commands or
-# markers available.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_EXTRA_FILES      =
-
-# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
-# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
-# contain links (just like the HTML output) instead of page references. This
-# makes the output suitable for online browsing using a PDF viewer.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-PDF_HYPERLINKS         = YES
-
-# If the LATEX_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
-# the PDF file directly from the LaTeX files. Set this option to YES to get a
-# higher quality PDF documentation.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-USE_PDFLATEX           = YES
-
-# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
-# command to the generated LaTeX files. This will instruct LaTeX to keep running
-# if errors occur, instead of asking the user for help. This option is also used
-# when generating formulas in HTML.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_BATCHMODE        = NO
-
-# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
-# index chapters (such as File Index, Compound Index, etc.) in the output.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_HIDE_INDICES     = NO
-
-# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
-# code with syntax highlighting in the LaTeX output.
-#
-# Note that which sources are shown also depends on other settings such as
-# SOURCE_BROWSER.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_SOURCE_CODE      = NO
-
-# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
-# bibliography, e.g. plainnat, or ieeetr. See
-# http://en.wikipedia.org/wiki/BibTeX and \cite for more info.
-# The default value is: plain.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_BIB_STYLE        = plain
-
-#---------------------------------------------------------------------------
-# Configuration options related to the RTF output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_RTF tag is set to YES doxygen will generate RTF output. The
-# RTF output is optimized for Word 97 and may not look too pretty with other RTF
-# readers/editors.
-# The default value is: NO.
-
-GENERATE_RTF           = NO
-
-# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it.
-# The default directory is: rtf.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_OUTPUT             = rtf
-
-# If the COMPACT_RTF tag is set to YES doxygen generates more compact RTF
-# documents. This may be useful for small projects and may help to save some
-# trees in general.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-COMPACT_RTF            = NO
-
-# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
-# contain hyperlink fields. The RTF file will contain links (just like the HTML
-# output) instead of page references. This makes the output suitable for online
-# browsing using Word or some other Word compatible readers that support those
-# fields.
-#
-# Note: WordPad (write) and others do not support links.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_HYPERLINKS         = NO
-
-# Load stylesheet definitions from file. Syntax is similar to doxygen's config
-# file, i.e. a series of assignments. You only have to provide replacements,
-# missing definitions are set to their default value.
-#
-# See also section "Doxygen usage" for information on how to generate the
-# default style sheet that doxygen normally uses.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_STYLESHEET_FILE    =
-
-# Set optional variables used in the generation of an RTF document. Syntax is
-# similar to doxygen's config file. A template extensions file can be generated
-# using doxygen -e rtf extensionFile.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_EXTENSIONS_FILE    =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the man page output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_MAN tag is set to YES doxygen will generate man pages for
-# classes and files.
-# The default value is: NO.
-
-GENERATE_MAN           = NO
-
-# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it. A directory man3 will be created inside the directory specified by
-# MAN_OUTPUT.
-# The default directory is: man.
-# This tag requires that the tag GENERATE_MAN is set to YES.
-
-MAN_OUTPUT             = man
-
-# The MAN_EXTENSION tag determines the extension that is added to the generated
-# man pages. In case the manual section does not start with a number, the number
-# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
-# optional.
-# The default value is: .3.
-# This tag requires that the tag GENERATE_MAN is set to YES.
-
-MAN_EXTENSION          = .3
-
-# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
-# will generate one additional man file for each entity documented in the real
-# man page(s). These additional files only source the real man page, but without
-# them the man command would be unable to find the correct page.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_MAN is set to YES.
-
-MAN_LINKS              = NO
-
-#---------------------------------------------------------------------------
-# Configuration options related to the XML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_XML tag is set to YES doxygen will generate an XML file that
-# captures the structure of the code including all documentation.
-# The default value is: NO.
-
-GENERATE_XML           = YES
-
-# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it.
-# The default directory is: xml.
-# This tag requires that the tag GENERATE_XML is set to YES.
-
-XML_OUTPUT             = xml
-
-# The XML_SCHEMA tag can be used to specify a XML schema, which can be used by a
-# validating XML parser to check the syntax of the XML files.
-# This tag requires that the tag GENERATE_XML is set to YES.
-
-XML_SCHEMA             =
-
-# The XML_DTD tag can be used to specify a XML DTD, which can be used by a
-# validating XML parser to check the syntax of the XML files.
-# This tag requires that the tag GENERATE_XML is set to YES.
-
-XML_DTD                =
-
-# If the XML_PROGRAMLISTING tag is set to YES doxygen will dump the program
-# listings (including syntax highlighting and cross-referencing information) to
-# the XML output. Note that enabling this will significantly increase the size
-# of the XML output.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_XML is set to YES.
-
-XML_PROGRAMLISTING     = YES
-
-#---------------------------------------------------------------------------
-# Configuration options related to the DOCBOOK output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_DOCBOOK tag is set to YES doxygen will generate Docbook files
-# that can be used to generate PDF.
-# The default value is: NO.
-
-GENERATE_DOCBOOK       = NO
-
-# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
-# front of it.
-# The default directory is: docbook.
-# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
-
-DOCBOOK_OUTPUT         = docbook
-
-#---------------------------------------------------------------------------
-# Configuration options for the AutoGen Definitions output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_AUTOGEN_DEF tag is set to YES doxygen will generate an AutoGen
-# Definitions (see http://autogen.sf.net) file that captures the structure of
-# the code including all documentation. Note that this feature is still
-# experimental and incomplete at the moment.
-# The default value is: NO.
-
-GENERATE_AUTOGEN_DEF   = NO
-
-#---------------------------------------------------------------------------
-# Configuration options related to the Perl module output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_PERLMOD tag is set to YES doxygen will generate a Perl module
-# file that captures the structure of the code including all documentation.
-#
-# Note that this feature is still experimental and incomplete at the moment.
-# The default value is: NO.
-
-GENERATE_PERLMOD       = NO
-
-# If the PERLMOD_LATEX tag is set to YES doxygen will generate the necessary
-# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
-# output from the Perl module output.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_PERLMOD is set to YES.
-
-PERLMOD_LATEX          = NO
-
-# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be nicely
-# formatted so it can be parsed by a human reader. This is useful if you want to
-# understand what is going on. On the other hand, if this tag is set to NO the
-# size of the Perl module output will be much smaller and Perl will parse it
-# just the same.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_PERLMOD is set to YES.
-
-PERLMOD_PRETTY         = YES
-
-# The names of the make variables in the generated doxyrules.make file are
-# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
-# so different doxyrules.make files included by the same Makefile don't
-# overwrite each other's variables.
-# This tag requires that the tag GENERATE_PERLMOD is set to YES.
-
-PERLMOD_MAKEVAR_PREFIX =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the preprocessor
-#---------------------------------------------------------------------------
-
-# If the ENABLE_PREPROCESSING tag is set to YES doxygen will evaluate all
-# C-preprocessor directives found in the sources and include files.
-# The default value is: YES.
-
-ENABLE_PREPROCESSING   = NO
-
-# If the MACRO_EXPANSION tag is set to YES doxygen will expand all macro names
-# in the source code. If set to NO only conditional compilation will be
-# performed. Macro expansion can be done in a controlled way by setting
-# EXPAND_ONLY_PREDEF to YES.
-# The default value is: NO.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-MACRO_EXPANSION        = NO
-
-# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
-# the macro expansion is limited to the macros specified with the PREDEFINED and
-# EXPAND_AS_DEFINED tags.
-# The default value is: NO.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-EXPAND_ONLY_PREDEF     = NO
-
-# If the SEARCH_INCLUDES tag is set to YES the includes files in the
-# INCLUDE_PATH will be searched if a #include is found.
-# The default value is: YES.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-SEARCH_INCLUDES        = YES
-
-# The INCLUDE_PATH tag can be used to specify one or more directories that
-# contain include files that are not input files but should be processed by the
-# preprocessor.
-# This tag requires that the tag SEARCH_INCLUDES is set to YES.
-
-INCLUDE_PATH           =
-
-# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
-# patterns (like *.h and *.hpp) to filter out the header-files in the
-# directories. If left blank, the patterns specified with FILE_PATTERNS will be
-# used.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-INCLUDE_FILE_PATTERNS  =
-
-# The PREDEFINED tag can be used to specify one or more macro names that are
-# defined before the preprocessor is started (similar to the -D option of e.g.
-# gcc). The argument of the tag is a list of macros of the form: name or
-# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
-# is assumed. To prevent a macro definition from being undefined via #undef or
-# recursively expanded use the := operator instead of the = operator.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-PREDEFINED             =
-
-# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
-# tag can be used to specify a list of macro names that should be expanded. The
-# macro definition that is found in the sources will be used. Use the PREDEFINED
-# tag if you want to use a different macro definition that overrules the
-# definition found in the source code.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-EXPAND_AS_DEFINED      =
-
-# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
-# remove all refrences to function-like macros that are alone on a line, have an
-# all uppercase name, and do not end with a semicolon. Such function macros are
-# typically used for boiler-plate code, and will confuse the parser if not
-# removed.
-# The default value is: YES.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-SKIP_FUNCTION_MACROS   = YES
-
-#---------------------------------------------------------------------------
-# Configuration options related to external references
-#---------------------------------------------------------------------------
-
-# The TAGFILES tag can be used to specify one or more tag files. For each tag
-# file the location of the external documentation should be added. The format of
-# a tag file without this location is as follows:
-# TAGFILES = file1 file2 ...
-# Adding location for the tag files is done as follows:
-# TAGFILES = file1=loc1 "file2 = loc2" ...
-# where loc1 and loc2 can be relative or absolute paths or URLs. See the
-# section "Linking to external documentation" for more information about the use
-# of tag files.
-# Note: Each tag file must have an unique name (where the name does NOT include
-# the path). If a tag file is not located in the directory in which doxygen is
-# run, you must also specify the path to the tagfile here.
-
-TAGFILES               =
-
-# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
-# tag file that is based on the input files it reads. See section "Linking to
-# external documentation" for more information about the usage of tag files.
-
-GENERATE_TAGFILE       =
-
-# If the ALLEXTERNALS tag is set to YES all external class will be listed in the
-# class index. If set to NO only the inherited external classes will be listed.
-# The default value is: NO.
-
-ALLEXTERNALS           = NO
-
-# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed in
-# the modules index. If set to NO, only the current project's groups will be
-# listed.
-# The default value is: YES.
-
-EXTERNAL_GROUPS        = YES
-
-# If the EXTERNAL_PAGES tag is set to YES all external pages will be listed in
-# the related pages index. If set to NO, only the current project's pages will
-# be listed.
-# The default value is: YES.
-
-EXTERNAL_PAGES         = YES
-
-# The PERL_PATH should be the absolute path and name of the perl script
-# interpreter (i.e. the result of 'which perl').
-# The default file (with absolute path) is: /usr/bin/perl.
-
-PERL_PATH              = /usr/bin/perl
-
-#---------------------------------------------------------------------------
-# Configuration options related to the dot tool
-#---------------------------------------------------------------------------
-
-# If the CLASS_DIAGRAMS tag is set to YES doxygen will generate a class diagram
-# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
-# NO turns the diagrams off. Note that this option also works with HAVE_DOT
-# disabled, but it is recommended to install and use dot, since it yields more
-# powerful graphs.
-# The default value is: YES.
-
-CLASS_DIAGRAMS         = YES
-
-# You can define message sequence charts within doxygen comments using the \msc
-# command. Doxygen will then run the mscgen tool (see:
-# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
-# documentation. The MSCGEN_PATH tag allows you to specify the directory where
-# the mscgen tool resides. If left empty the tool is assumed to be found in the
-# default search path.
-
-MSCGEN_PATH            =
-
-# If set to YES, the inheritance and collaboration graphs will hide inheritance
-# and usage relations if the target is undocumented or is not a class.
-# The default value is: YES.
-
-HIDE_UNDOC_RELATIONS   = YES
-
-# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
-# available from the path. This tool is part of Graphviz (see:
-# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
-# Bell Labs. The other options in this section have no effect if this option is
-# set to NO
-# The default value is: NO.
-
-HAVE_DOT               = YES
-
-# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
-# to run in parallel. When set to 0 doxygen will base this on the number of
-# processors available in the system. You can set it explicitly to a value
-# larger than 0 to get control over the balance between CPU load and processing
-# speed.
-# Minimum value: 0, maximum value: 32, default value: 0.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_NUM_THREADS        = 0
-
-# When you want a differently looking font n the dot files that doxygen
-# generates you can specify the font name using DOT_FONTNAME. You need to make
-# sure dot is able to find the font, which can be done by putting it in a
-# standard location or by setting the DOTFONTPATH environment variable or by
-# setting DOT_FONTPATH to the directory containing the font.
-# The default value is: Helvetica.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_FONTNAME           = Helvetica
-
-# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
-# dot graphs.
-# Minimum value: 4, maximum value: 24, default value: 10.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_FONTSIZE           = 10
-
-# By default doxygen will tell dot to use the default font as specified with
-# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
-# the path where dot can find it using this tag.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_FONTPATH           =
-
-# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
-# each documented class showing the direct and indirect inheritance relations.
-# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-CLASS_GRAPH            = YES
-
-# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
-# graph for each documented class showing the direct and indirect implementation
-# dependencies (inheritance, containment, and class references variables) of the
-# class with other documented classes.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-COLLABORATION_GRAPH    = YES
-
-# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
-# groups, showing the direct groups dependencies.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-GROUP_GRAPHS           = YES
-
-# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
-# collaboration diagrams in a style similar to the OMG's Unified Modeling
-# Language.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-UML_LOOK               = NO
-
-# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
-# class node. If there are many fields or methods and many nodes the graph may
-# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
-# number of items for each type to make the size more manageable. Set this to 0
-# for no limit. Note that the threshold may be exceeded by 50% before the limit
-# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
-# but if the number exceeds 15, the total amount of fields shown is limited to
-# 10.
-# Minimum value: 0, maximum value: 100, default value: 10.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-UML_LIMIT_NUM_FIELDS   = 10
-
-# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
-# collaboration graphs will show the relations between templates and their
-# instances.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-TEMPLATE_RELATIONS     = NO
-
-# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
-# YES then doxygen will generate a graph for each documented file showing the
-# direct and indirect include dependencies of the file with other documented
-# files.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-INCLUDE_GRAPH          = YES
-
-# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
-# set to YES then doxygen will generate a graph for each documented file showing
-# the direct and indirect include dependencies of the file with other documented
-# files.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-INCLUDED_BY_GRAPH      = YES
-
-# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
-# dependency graph for every global function or class method.
-#
-# Note that enabling this option will significantly increase the time of a run.
-# So in most cases it will be better to enable call graphs for selected
-# functions only using the \callgraph command.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-CALL_GRAPH             = YES
-
-# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
-# dependency graph for every global function or class method.
-#
-# Note that enabling this option will significantly increase the time of a run.
-# So in most cases it will be better to enable caller graphs for selected
-# functions only using the \callergraph command.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-CALLER_GRAPH           = YES
-
-# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
-# hierarchy of all classes instead of a textual one.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-GRAPHICAL_HIERARCHY    = YES
-
-# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
-# dependencies a directory has on other directories in a graphical way. The
-# dependency relations are determined by the #include relations between the
-# files in the directories.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DIRECTORY_GRAPH        = YES
-
-# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
-# generated by dot.
-# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
-# to make the SVG files visible in IE 9+ (other browsers do not have this
-# requirement).
-# Possible values are: png, jpg, gif and svg.
-# The default value is: png.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_IMAGE_FORMAT       = png
-
-# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
-# enable generation of interactive SVG images that allow zooming and panning.
-#
-# Note that this requires a modern browser other than Internet Explorer. Tested
-# and working are Firefox, Chrome, Safari, and Opera.
-# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
-# the SVG files visible. Older versions of IE do not have SVG support.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-INTERACTIVE_SVG        = NO
-
-# The DOT_PATH tag can be used to specify the path where the dot tool can be
-# found. If left blank, it is assumed the dot tool can be found in the path.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_PATH               =
-
-# The DOTFILE_DIRS tag can be used to specify one or more directories that
-# contain dot files that are included in the documentation (see the \dotfile
-# command).
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOTFILE_DIRS           =
-
-# The MSCFILE_DIRS tag can be used to specify one or more directories that
-# contain msc files that are included in the documentation (see the \mscfile
-# command).
-
-MSCFILE_DIRS           =
-
-# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
-# that will be shown in the graph. If the number of nodes in a graph becomes
-# larger than this value, doxygen will truncate the graph, which is visualized
-# by representing a node as a red box. Note that doxygen if the number of direct
-# children of the root node in a graph is already larger than
-# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
-# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
-# Minimum value: 0, maximum value: 10000, default value: 50.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_GRAPH_MAX_NODES    = 50
-
-# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
-# generated by dot. A depth value of 3 means that only nodes reachable from the
-# root by following a path via at most 3 edges will be shown. Nodes that lay
-# further from the root node will be omitted. Note that setting this option to 1
-# or 2 may greatly reduce the computation time needed for large code bases. Also
-# note that the size of a graph can be further restricted by
-# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
-# Minimum value: 0, maximum value: 1000, default value: 0.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-MAX_DOT_GRAPH_DEPTH    = 0
-
-# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
-# background. This is disabled by default, because dot on Windows does not seem
-# to support this out of the box.
-#
-# Warning: Depending on the platform used, enabling this option may lead to
-# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
-# read).
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_TRANSPARENT        = NO
-
-# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
-# files in one run (i.e. multiple -o and -T options on the command line). This
-# makes dot run faster, but since only newer versions of dot (>1.8.10) support
-# this, this feature is disabled by default.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_MULTI_TARGETS      = NO
-
-# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
-# explaining the meaning of the various boxes and arrows in the dot generated
-# graphs.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-GENERATE_LEGEND        = YES
-
-# If the DOT_CLEANUP tag is set to YES doxygen will remove the intermediate dot
-# files that are used to generate the various graphs.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_CLEANUP            = YES
diff --git a/pllrepo/INSTALL b/pllrepo/INSTALL
deleted file mode 100644
index a1e89e1..0000000
--- a/pllrepo/INSTALL
+++ /dev/null
@@ -1,370 +0,0 @@
-Installation Instructions
-*************************
-
-Copyright (C) 1994-1996, 1999-2002, 2004-2011 Free Software Foundation,
-Inc.
-
-   Copying and distribution of this file, with or without modification,
-are permitted in any medium without royalty provided the copyright
-notice and this notice are preserved.  This file is offered as-is,
-without warranty of any kind.
-
-Basic Installation
-==================
-
-   Briefly, the shell commands `./configure; make; make install' should
-configure, build, and install this package.  The following
-more-detailed instructions are generic; see the `README' file for
-instructions specific to this package.  Some packages provide this
-`INSTALL' file but do not implement all of the features documented
-below.  The lack of an optional feature in a given package is not
-necessarily a bug.  More recommendations for GNU packages can be found
-in *note Makefile Conventions: (standards)Makefile Conventions.
-
-   The `configure' shell script attempts to guess correct values for
-various system-dependent variables used during compilation.  It uses
-those values to create a `Makefile' in each directory of the package.
-It may also create one or more `.h' files containing system-dependent
-definitions.  Finally, it creates a shell script `config.status' that
-you can run in the future to recreate the current configuration, and a
-file `config.log' containing compiler output (useful mainly for
-debugging `configure').
-
-   It can also use an optional file (typically called `config.cache'
-and enabled with `--cache-file=config.cache' or simply `-C') that saves
-the results of its tests to speed up reconfiguring.  Caching is
-disabled by default to prevent problems with accidental use of stale
-cache files.
-
-   If you need to do unusual things to compile the package, please try
-to figure out how `configure' could check whether to do them, and mail
-diffs or instructions to the address given in the `README' so they can
-be considered for the next release.  If you are using the cache, and at
-some point `config.cache' contains results you don't want to keep, you
-may remove or edit it.
-
-   The file `configure.ac' (or `configure.in') is used to create
-`configure' by a program called `autoconf'.  You need `configure.ac' if
-you want to change it or regenerate `configure' using a newer version
-of `autoconf'.
-
-   The simplest way to compile this package is:
-
-  1. `cd' to the directory containing the package's source code and type
-     `./configure' to configure the package for your system.
-
-     Running `configure' might take a while.  While running, it prints
-     some messages telling which features it is checking for.
-
-  2. Type `make' to compile the package.
-
-  3. Optionally, type `make check' to run any self-tests that come with
-     the package, generally using the just-built uninstalled binaries.
-
-  4. Type `make install' to install the programs and any data files and
-     documentation.  When installing into a prefix owned by root, it is
-     recommended that the package be configured and built as a regular
-     user, and only the `make install' phase executed with root
-     privileges.
-
-  5. Optionally, type `make installcheck' to repeat any self-tests, but
-     this time using the binaries in their final installed location.
-     This target does not install anything.  Running this target as a
-     regular user, particularly if the prior `make install' required
-     root privileges, verifies that the installation completed
-     correctly.
-
-  6. You can remove the program binaries and object files from the
-     source code directory by typing `make clean'.  To also remove the
-     files that `configure' created (so you can compile the package for
-     a different kind of computer), type `make distclean'.  There is
-     also a `make maintainer-clean' target, but that is intended mainly
-     for the package's developers.  If you use it, you may have to get
-     all sorts of other programs in order to regenerate files that came
-     with the distribution.
-
-  7. Often, you can also type `make uninstall' to remove the installed
-     files again.  In practice, not all packages have tested that
-     uninstallation works correctly, even though it is required by the
-     GNU Coding Standards.
-
-  8. Some packages, particularly those that use Automake, provide `make
-     distcheck', which can by used by developers to test that all other
-     targets like `make install' and `make uninstall' work correctly.
-     This target is generally not run by end users.
-
-Compilers and Options
-=====================
-
-   Some systems require unusual options for compilation or linking that
-the `configure' script does not know about.  Run `./configure --help'
-for details on some of the pertinent environment variables.
-
-   You can give `configure' initial values for configuration parameters
-by setting variables in the command line or in the environment.  Here
-is an example:
-
-     ./configure CC=c99 CFLAGS=-g LIBS=-lposix
-
-   *Note Defining Variables::, for more details.
-
-Compiling For Multiple Architectures
-====================================
-
-   You can compile the package for more than one kind of computer at the
-same time, by placing the object files for each architecture in their
-own directory.  To do this, you can use GNU `make'.  `cd' to the
-directory where you want the object files and executables to go and run
-the `configure' script.  `configure' automatically checks for the
-source code in the directory that `configure' is in and in `..'.  This
-is known as a "VPATH" build.
-
-   With a non-GNU `make', it is safer to compile the package for one
-architecture at a time in the source code directory.  After you have
-installed the package for one architecture, use `make distclean' before
-reconfiguring for another architecture.
-
-   On MacOS X 10.5 and later systems, you can create libraries and
-executables that work on multiple system types--known as "fat" or
-"universal" binaries--by specifying multiple `-arch' options to the
-compiler but only a single `-arch' option to the preprocessor.  Like
-this:
-
-     ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
-                 CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
-                 CPP="gcc -E" CXXCPP="g++ -E"
-
-   This is not guaranteed to produce working output in all cases, you
-may have to build one architecture at a time and combine the results
-using the `lipo' tool if you have problems.
-
-Installation Names
-==================
-
-   By default, `make install' installs the package's commands under
-`/usr/local/bin', include files under `/usr/local/include', etc.  You
-can specify an installation prefix other than `/usr/local' by giving
-`configure' the option `--prefix=PREFIX', where PREFIX must be an
-absolute file name.
-
-   You can specify separate installation prefixes for
-architecture-specific files and architecture-independent files.  If you
-pass the option `--exec-prefix=PREFIX' to `configure', the package uses
-PREFIX as the prefix for installing programs and libraries.
-Documentation and other data files still use the regular prefix.
-
-   In addition, if you use an unusual directory layout you can give
-options like `--bindir=DIR' to specify different values for particular
-kinds of files.  Run `configure --help' for a list of the directories
-you can set and what kinds of files go in them.  In general, the
-default for these options is expressed in terms of `${prefix}', so that
-specifying just `--prefix' will affect all of the other directory
-specifications that were not explicitly provided.
-
-   The most portable way to affect installation locations is to pass the
-correct locations to `configure'; however, many packages provide one or
-both of the following shortcuts of passing variable assignments to the
-`make install' command line to change installation locations without
-having to reconfigure or recompile.
-
-   The first method involves providing an override variable for each
-affected directory.  For example, `make install
-prefix=/alternate/directory' will choose an alternate location for all
-directory configuration variables that were expressed in terms of
-`${prefix}'.  Any directories that were specified during `configure',
-but not in terms of `${prefix}', must each be overridden at install
-time for the entire installation to be relocated.  The approach of
-makefile variable overrides for each directory variable is required by
-the GNU Coding Standards, and ideally causes no recompilation.
-However, some platforms have known limitations with the semantics of
-shared libraries that end up requiring recompilation when using this
-method, particularly noticeable in packages that use GNU Libtool.
-
-   The second method involves providing the `DESTDIR' variable.  For
-example, `make install DESTDIR=/alternate/directory' will prepend
-`/alternate/directory' before all installation names.  The approach of
-`DESTDIR' overrides is not required by the GNU Coding Standards, and
-does not work on platforms that have drive letters.  On the other hand,
-it does better at avoiding recompilation issues, and works well even
-when some directory options were not specified in terms of `${prefix}'
-at `configure' time.
-
-Optional Features
-=================
-
-   If the package supports it, you can cause programs to be installed
-with an extra prefix or suffix on their names by giving `configure' the
-option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
-
-   Some packages pay attention to `--enable-FEATURE' options to
-`configure', where FEATURE indicates an optional part of the package.
-They may also pay attention to `--with-PACKAGE' options, where PACKAGE
-is something like `gnu-as' or `x' (for the X Window System).  The
-`README' should mention any `--enable-' and `--with-' options that the
-package recognizes.
-
-   For packages that use the X Window System, `configure' can usually
-find the X include and library files automatically, but if it doesn't,
-you can use the `configure' options `--x-includes=DIR' and
-`--x-libraries=DIR' to specify their locations.
-
-   Some packages offer the ability to configure how verbose the
-execution of `make' will be.  For these packages, running `./configure
---enable-silent-rules' sets the default to minimal output, which can be
-overridden with `make V=1'; while running `./configure
---disable-silent-rules' sets the default to verbose, which can be
-overridden with `make V=0'.
-
-Particular systems
-==================
-
-   On HP-UX, the default C compiler is not ANSI C compatible.  If GNU
-CC is not installed, it is recommended to use the following options in
-order to use an ANSI C compiler:
-
-     ./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
-
-and if that doesn't work, install pre-built binaries of GCC for HP-UX.
-
-   HP-UX `make' updates targets which have the same time stamps as
-their prerequisites, which makes it generally unusable when shipped
-generated files such as `configure' are involved.  Use GNU `make'
-instead.
-
-   On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
-parse its `<wchar.h>' header file.  The option `-nodtk' can be used as
-a workaround.  If GNU CC is not installed, it is therefore recommended
-to try
-
-     ./configure CC="cc"
-
-and if that doesn't work, try
-
-     ./configure CC="cc -nodtk"
-
-   On Solaris, don't put `/usr/ucb' early in your `PATH'.  This
-directory contains several dysfunctional programs; working variants of
-these programs are available in `/usr/bin'.  So, if you need `/usr/ucb'
-in your `PATH', put it _after_ `/usr/bin'.
-
-   On Haiku, software installed for all users goes in `/boot/common',
-not `/usr/local'.  It is recommended to use the following options:
-
-     ./configure --prefix=/boot/common
-
-Specifying the System Type
-==========================
-
-   There may be some features `configure' cannot figure out
-automatically, but needs to determine by the type of machine the package
-will run on.  Usually, assuming the package is built to be run on the
-_same_ architectures, `configure' can figure that out, but if it prints
-a message saying it cannot guess the machine type, give it the
-`--build=TYPE' option.  TYPE can either be a short name for the system
-type, such as `sun4', or a canonical name which has the form:
-
-     CPU-COMPANY-SYSTEM
-
-where SYSTEM can have one of these forms:
-
-     OS
-     KERNEL-OS
-
-   See the file `config.sub' for the possible values of each field.  If
-`config.sub' isn't included in this package, then this package doesn't
-need to know the machine type.
-
-   If you are _building_ compiler tools for cross-compiling, you should
-use the option `--target=TYPE' to select the type of system they will
-produce code for.
-
-   If you want to _use_ a cross compiler, that generates code for a
-platform different from the build platform, you should specify the
-"host" platform (i.e., that on which the generated programs will
-eventually be run) with `--host=TYPE'.
-
-Sharing Defaults
-================
-
-   If you want to set default values for `configure' scripts to share,
-you can create a site shell script called `config.site' that gives
-default values for variables like `CC', `cache_file', and `prefix'.
-`configure' looks for `PREFIX/share/config.site' if it exists, then
-`PREFIX/etc/config.site' if it exists.  Or, you can set the
-`CONFIG_SITE' environment variable to the location of the site script.
-A warning: not all `configure' scripts look for a site script.
-
-Defining Variables
-==================
-
-   Variables not defined in a site shell script can be set in the
-environment passed to `configure'.  However, some packages may run
-configure again during the build, and the customized values of these
-variables may be lost.  In order to avoid this problem, you should set
-them in the `configure' command line, using `VAR=value'.  For example:
-
-     ./configure CC=/usr/local2/bin/gcc
-
-causes the specified `gcc' to be used as the C compiler (unless it is
-overridden in the site shell script).
-
-Unfortunately, this technique does not work for `CONFIG_SHELL' due to
-an Autoconf bug.  Until the bug is fixed you can use this workaround:
-
-     CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
-
-`configure' Invocation
-======================
-
-   `configure' recognizes the following options to control how it
-operates.
-
-`--help'
-`-h'
-     Print a summary of all of the options to `configure', and exit.
-
-`--help=short'
-`--help=recursive'
-     Print a summary of the options unique to this package's
-     `configure', and exit.  The `short' variant lists options used
-     only in the top level, while the `recursive' variant lists options
-     also present in any nested packages.
-
-`--version'
-`-V'
-     Print the version of Autoconf used to generate the `configure'
-     script, and exit.
-
-`--cache-file=FILE'
-     Enable the cache: use and save the results of the tests in FILE,
-     traditionally `config.cache'.  FILE defaults to `/dev/null' to
-     disable caching.
-
-`--config-cache'
-`-C'
-     Alias for `--cache-file=config.cache'.
-
-`--quiet'
-`--silent'
-`-q'
-     Do not print messages saying which checks are being made.  To
-     suppress all normal output, redirect it to `/dev/null' (any error
-     messages will still be shown).
-
-`--srcdir=DIR'
-     Look for the package's source code in directory DIR.  Usually
-     `configure' can determine that directory automatically.
-
-`--prefix=DIR'
-     Use DIR as the installation prefix.  *note Installation Names::
-     for more details, including other options available for fine-tuning
-     the installation locations.
-
-`--no-create'
-`-n'
-     Run the configure checks, but stop before creating any output
-     files.
-
-`configure' also accepts some other, not widely useful, options.  Run
-`configure --help' for more details.
-
diff --git a/pllrepo/Makefile.am b/pllrepo/Makefile.am
deleted file mode 100644
index 381c18f..0000000
--- a/pllrepo/Makefile.am
+++ /dev/null
@@ -1,7 +0,0 @@
-ACLOCAL_AMFLAGS = -I m4 --install
-if BUILD_MPI
-SUBDIRS = MPI src man examples
-else
-SUBDIRS = src man examples
-endif
-
diff --git a/pllrepo/NEWS b/pllrepo/NEWS
deleted file mode 100644
index e69de29..0000000
diff --git a/pllrepo/README b/pllrepo/README
deleted file mode 100644
index e69de29..0000000
diff --git a/pllrepo/configure.ac b/pllrepo/configure.ac
deleted file mode 100644
index 5e6dd2f..0000000
--- a/pllrepo/configure.ac
+++ /dev/null
@@ -1,123 +0,0 @@
-#                                               -*- Autoconf -*-
-# Process this file with autoconf to produce a configure script.
-
-AC_PREREQ([2.68])
-AC_INIT([libpll], [1.0.0], [Tomas.Flouri at h-its.org])
-AC_CONFIG_MACRO_DIR([m4])
-AM_INIT_AUTOMAKE
-
-# AM_MAINTAINER_MODE
-
-LIBPLL_VERSION=1.0.0
-LIBPLL_MAJOR=1
-LIBPLL_MINOR=0
-LIBPLL_REV=0
-
-AC_CONFIG_SRCDIR([src/pll.h])
-AC_CONFIG_HEADERS([config.h])
-
-# Checks for programs.
-AC_PROG_CXX
-AC_PROG_CC
-AC_PROG_MAKE_SET
-AC_PROG_RANLIB
-AC_PROG_CPP
-LT_INIT
-
-AM_PROG_CC_C_O
-# Checks for libraries.
-# FIXME: Replace `main' with a function in `-lm':
-AC_CHECK_LIB([m], [log])
-
-# Checks for header files.
-AC_CHECK_HEADERS([fcntl.h float.h limits.h malloc.h stddef.h stdint.h stdlib.h string.h strings.h sys/time.h unistd.h])
-
-# Checks for typedefs, structures, and compiler characteristics.
-AC_HEADER_STDBOOL
-AC_C_INLINE
-AC_TYPE_INT64_T
-AC_TYPE_SIZE_T
-AC_TYPE_SSIZE_T
-AC_TYPE_UINT32_T
-AC_TYPE_UINT64_T
-AC_TYPE_UINT8_T
-AC_CHECK_TYPES([ptrdiff_t])
-
-# Checks for library functions.
-AC_FUNC_ERROR_AT_LINE
-AC_FUNC_MALLOC
-AC_FUNC_MMAP
-AC_FUNC_REALLOC
-AC_CHECK_FUNCS([atexit clock_gettime getcwd getpagesize gettimeofday memmove memset munmap pow sqrt strcasecmp strchr strdup strndup strstr])
-
-have_pthreads=no
-AC_SEARCH_LIBS([pthread_create], [pthread], [have_pthreads=yes])
-
-if test "x${have_pthreads}" = xyes; then
-  AC_CHECK_HEADERS([pthread.h], [], [have_pthreads=no])
-fi
-
-
-have_generic=yes
-have_avx=no
-have_avx_pthreads=no
-have_avx_mpi=no
-have_sse3=no
-have_sse3_pthreads=no
-have_sse3_mpi=no
-
-AX_EXT
-
-if test "x${ax_cv_have_avx_ext}" = "xyes"; then
-  have_avx=yes
-  if test "x${have_pthreads}"="xyes"; then
-    have_avx_pthreads=yes
-  fi
-fi
-
-if test "x${ax_cv_have_sse3_ext}" = "xyes"; then
-  have_sse3=yes
-  if test "x${have_pthreads}"="xyes"; then
-    have_sse3_pthreads=yes
-  fi
-fi
-
-AC_ARG_ENABLE(generic,AC_HELP_STRING([--enable-generic],[build generic version of the library]),[have_generic=yes])
-AC_ARG_ENABLE(generic,AC_HELP_STRING([--disable-generic],[build generic version of the library]),[have_generic=no])
-AC_ARG_ENABLE(avx,AC_HELP_STRING([--enable-avx],[build AVX version of the library]),[have_avx=yes])
-AC_ARG_ENABLE(avx,AC_HELP_STRING([--disable-avx],[build AVX version of the library]),[have_avx=no])
-AC_ARG_ENABLE(avx-pthreads,AC_HELP_STRING([--enable-avx-pthreads],[build pthreads AVX version of the library]),[have_avx_pthreads=yes])
-AC_ARG_ENABLE(avx-pthreads,AC_HELP_STRING([--disable-avx-pthreads],[build pthreads AVX version of the library]),[have_avx_pthreads=no])
-AC_ARG_ENABLE(avx-mpi,AC_HELP_STRING([--enable-avx-mpi],[build MPI AVX version of the library]),[have_avx_mpi=yes])
-AC_ARG_ENABLE(avx-mpi,AC_HELP_STRING([--disable-avx-mpi],[build MPI AVX version of the library]),[have_avx_mpi=no])
-AC_ARG_ENABLE(sse3,AC_HELP_STRING([--enable-sse3],[build SSE3 version of the library]),[have_sse3=yes])
-AC_ARG_ENABLE(sse3,AC_HELP_STRING([--disable-sse3],[build SSE3 version of the library]),[have_sse3=no])
-AC_ARG_ENABLE(sse3-pthreads,AC_HELP_STRING([--enable-sse3-pthreads],[build pthreads SSE3 version of the library]),[have_sse3_pthreads=yes])
-AC_ARG_ENABLE(sse3-pthreads,AC_HELP_STRING([--disable-sse3-pthreads],[build pthreads SSE3 version of the library]),[have_sse3_pthreads=no])
-AC_ARG_ENABLE(sse3-mpi,AC_HELP_STRING([--enable-sse3-mpi],[build MPI SSE3 version of the library]),[have_sse3_mpi=yes])
-AC_ARG_ENABLE(sse3-mpi,AC_HELP_STRING([--disable-sse3-mpi],[build MPI SSE3 version of the library]),[have_sse3_mpi=no])
-
-AX_MPI([have_mpi=yes],[have_mpi=no])
-
-AM_INIT_AUTOMAKE([subdir-objects])
-AM_CONDITIONAL(BUILD_GENERIC, test "x${have_generic}" = "xyes")
-AM_CONDITIONAL(BUILD_AVX, test "x${have_avx}" = "xyes")
-AM_CONDITIONAL(BUILD_AVX_PTHREADS, test "x${have_avx_pthreads}" = "xyes")
-AM_CONDITIONAL(BUILD_SSE3, test "x${have_sse3}" = "xyes")
-AM_CONDITIONAL(BUILD_SSE3_PTHREADS, test "x${have_sse3_pthreads}" = "xyes")
-AM_CONDITIONAL(BUILD_MPI, test "x${have_mpi}" = "xyes")
-
-AC_SUBST(LIBPLL_VERSION)
-AC_SUBST(LIBPLL_MAJOR)
-AC_SUBST(LIBPLL_MINOR)
-AC_SUBST(LIBPLL_REV)
-AC_SUBST(MPICC)
-
-
-
-AC_CONFIG_FILES([Makefile
-                 MPI/Makefile
-                 src/Makefile
-                 man/Makefile
-                 examples/Makefile])
-AC_OUTPUT
diff --git a/pllrepo/sources.am b/pllrepo/sources.am
deleted file mode 100644
index 22a89c4..0000000
--- a/pllrepo/sources.am
+++ /dev/null
@@ -1,2 +0,0 @@
-ALL_SOURCES = ../src/genericParallelization.c ../src/hash.c ../src/stack.c ../src/ssort.c ../src/queue.c ../src/utils.c ../src/randomTree.c ../src/optimizeModel.c ../src/trash.c ../src/searchAlgo.c ../src/topologies.c ../src/fastDNAparsimony.c ../src/treeIO.c ../src/models.c ../src/evaluatePartialGenericSpecial.c ../src/evaluateGenericSpecial.c ../src/newviewGenericSpecial.c ../src/makenewzGenericSpecial.c ../src/bipartitionList.c ../src/restartHashTable.c ../src/recom.c ../src/lexer.c . [...]
-AVX_SOURCES = ../src/avxLikelihood.c $(ALL_SOURCES) 
diff --git a/pllrepo/src/CMakeLists.txt b/pllrepo/src/CMakeLists.txt
deleted file mode 100644
index fd99063..0000000
--- a/pllrepo/src/CMakeLists.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-#set( CMAKE_C_FLAGS  "${CMAKE_C_FLAGS} -Wall -msse3 -DRAXML_USE_LLALLOC -D_USE_PTHREADS -D_OPTIMIZED_FUNCTIONS -D__SIM_SSE3 -fno-builtin" )
-
-#add_executable( raxml_light axml.c  optimizeModel.c trash.c searchAlgo.c topologies.c treeIO.c models.c evaluatePartialGenericSpecial.c evaluateGenericSpecial.c newviewGenericSpecial.c makenewzGenericSpecial.c bipartitionList.c restartHashTable.c fastDNAparsimony.c randomTree.c lockless_allocator/ll_alloc.c mem_alloc.c recom.c)
-
-#target_link_libraries( raxml_light m pthread )
-
-if (NOT BINARY32 AND NOT IQTREE_FLAGS MATCHES "novx")
-add_library(pllavx 
-	avxLikelihood.c)
-endif()
-
-if (IQTREE_FLAGS MATCHES "omp")
-add_library(pll 
-  alignment.c
-  bipartitionList.c
-  evaluateGenericSpecial.c
-  evaluatePartialGenericSpecial.c
-  fastDNAparsimony.c
-  hardware.c
-  hash.c
-  lexer.c
-  makenewzGenericSpecial.c
-  models.c
-  newick.c
-  newviewGenericSpecial.c
-  genericParallelization.c
-  optimizeModel.c
-  parsePartition.c
-  queue.c
-  randomTree.c
-  recom.c
-  restartHashTable.c
-  searchAlgo.c
-  ssort.c
-  stack.c
-  topologies.c
-  trash.c
-  treeIO.c
-  utils.c)
-else()
-add_library(pll 
-  alignment.c
-  bipartitionList.c
-  evaluateGenericSpecial.c
-  evaluatePartialGenericSpecial.c
-  fastDNAparsimony.c
-  hardware.c
-  hash.c
-  lexer.c
-  makenewzGenericSpecial.c
-  models.c
-  newick.c
-  newviewGenericSpecial.c
-  optimizeModel.c
-  parsePartition.c
-  queue.c
-  randomTree.c
-  recom.c
-  restartHashTable.c
-  searchAlgo.c
-  ssort.c
-  stack.c
-  topologies.c
-  trash.c
-  treeIO.c
-  utils.c)
-endif()
\ No newline at end of file
diff --git a/pllrepo/src/Makefile.ALL b/pllrepo/src/Makefile.ALL
deleted file mode 100644
index 573521e..0000000
--- a/pllrepo/src/Makefile.ALL
+++ /dev/null
@@ -1,54 +0,0 @@
-MAKE = make
-RM = rm -f
-TARGET = libpll
-VERSION = 1.0.0
-ARCH1 = AVX
-ARCH2 = SSE3
-ARCH3 = AVX-PTHREADS
-ARCH4 = SSE3-PTHREADS
-ARCH5 = AVX-MPI
-ARCH6 = SSE3-MPI
-ARCH7 = ARM
-STATICLIB1 = $(TARGET)-$(ARCH1).a.$(VERSION)
-STATICLIB2 = $(TARGET)-$(ARCH2).a.$(VERSION)
-STATICLIB3 = $(TARGET)-$(ARCH3).a.$(VERSION)
-STATICLIB4 = $(TARGET)-$(ARCH4).a.$(VERSION)
-STATICLIB5 = $(TARGET)-$(ARCH5).a.$(VERSION)
-STATICLIB6 = $(TARGET)-$(ARCH6).a.$(VERSION)
-STATICLIB7 = $(TARGET)-$(ARCH7).a.$(VERSION)
-
-
-all: $(STATICLIB1) $(STATICLIB2) $(STATICLIB3) $(STATICLIB4) $(STATICLIB5) $(STATICLIB6) $(STATICLIB7)
-
-$(STATICLIB1): Makefile.$(ARCH1)
-	-$(RM) *.o
-	$(MAKE) -f $+
-
-$(STATICLIB2): Makefile.$(ARCH2)
-	-$(RM) *.o
-	$(MAKE) -f $+
-
-$(STATICLIB3): Makefile.$(ARCH3)
-	-$(RM) *.o
-	$(MAKE) -f $+
-
-$(STATICLIB4): Makefile.$(ARCH4)
-	-$(RM) *.o
-	$(MAKE) -f $+
-
-$(STATICLIB5): Makefile.$(ARCH5)
-	-$(RM) *.o
-	$(MAKE) -f $+
-
-$(STATICLIB6): Makefile.$(ARCH6)
-	-$(RM) *.o
-	$(MAKE) -f $+
-
-$(STATICLIB7): Makefile.$(ARCH7)
-	-$(RM) *.o
-	$(MAKE) -f $+
-
-clean:
-	-$(RM) *.a $(STATICLIB1) $(STATICLIB2) $(STATICLIB3) $(STATICLIB4) $(STATICLIB5) $(STATICLIB6) $(STATICLIB7)
-
-.PHONY: all clean
diff --git a/pllrepo/src/Makefile.ARM b/pllrepo/src/Makefile.ARM
deleted file mode 100644
index 340fb5c..0000000
--- a/pllrepo/src/Makefile.ARM
+++ /dev/null
@@ -1,51 +0,0 @@
-CC = gcc 
-AR = ar
-CFLAGS =  -c -D_GNU_SOURCE -O2 -g -fomit-frame-pointer -funroll-loops -Wall -Wredundant-decls  -Wreturn-type  -Wswitch-default -Wimplicit  -Wimplicit-function-declaration  -Wimplicit-int -Wimport -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast  -Wmissing-declarations -Wmissing-prototypes  -Wnested-externs  -Wold-style-definition -Wstrict-prototypes -Wpointer-sign -Wextra -Wredundant-decls -Wunused -Wunused-function -Wunused-parameter -Wunused-value  -Wunused-variable -Wformat [...]
-ARFLAGS = rvs
-TARGET = libpll
-ARCH = ARM
-VERSION = 1.0.0
-STATICLIB = $(TARGET)-$(ARCH).a.$(VERSION)	# static library
-SHAREDOBJ = $(TARGET)-$(ARCH).so.$(VERSION)	# shared object
-GLOBAL_DEPS = pll.h globalVariables.h
-RM = rm -f
-
-OBJ = hash.o stack.o ssort.o queue.o utils.o randomTree.o optimizeModel.o trash.o searchAlgo.o topologies.o fastDNAparsimony.o treeIO.o models.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o bipartitionList.o restartHashTable.o recom.o lexer.o alignment.o newick.o parsePartition.o
-
-all: $(STATICLIB)
-
-$(STATICLIB): $(OBJ)
-	@echo "==> Building PLL Library ($(STATICLIB))"
-	$(AR) $(ARFLAGS) $@ $(OBJ)
-	ln -sf $(STATICLIB) $(TARGET)-$(ARCH).a
-
-bipartitionList.o : bipartitionList.c $(GLOBAL_DEPS)
-evaluatePartialSpecialGeneric.o : evaluatePartialSpecialGeneric.c $(GLOBAL_DEPS)
-optimizeModel.o : optimizeModel.c $(GLOBAL_DEPS)
-trash.o : trash.c $(GLOBAL_DEPS)
-searchAlgo.o : searchAlgo.c $(GLOBAL_DEPS)
-topologies.o : topologies.c $(GLOBAL_DEPS)
-treeIO.o : treeIO.c $(GLOBAL_DEPS)
-models.o : models.c $(GLOBAL_DEPS)
-evaluatePartialGenericSpecial.o : evaluatePartialGenericSpecial.c $(GLOBAL_DEPS)
-evaluateGenericSpecial.o : evaluateGenericSpecial.c $(GLOBAL_DEPS)
-newviewGenericSpecial.o : newviewGenericSpecial.c $(GLOBAL_DEPS)
-makenewzGenericSpecial.o : makenewzGenericSpecial.c $(GLOBAL_DEPS)
-restartHashTable.o : restartHashTable.c $(GLOBAL_DEPS)
-randomTree.o : randomTree.c $(GLOBAL_DEPS)
-fastDNAparsimony.o : fastDNAparsimony.c  $(GLOBAL_DEPS)
-recom.o : recom.c  $(GLOBAL_DEPS)
-queue.o : queue.c $(GLOBAL_DEPS)
-stack.o : stack.c $(GLOBAL_DEPS)
-hash.o : hash.c $(GLOBAL_DEPS)
-lexer.o : lexer.c $(GLOBAL_DEPS)
-alignment.o: alignment.c $(GLOBAL_DEPS)
-newick.o: newick.c $(GLOBAL_DEPS)
-parsePartition.o: parsePartition.c $(GLOBAL_DEPS)
-ssort.o : ssort.c $(GLOBAL_DEPS)
-
-clean : 
-	-$(RM) *.o $(STATICLIB) $(SHAREDOBJ)
-
-.PHONY: all clean
-.INTERMEDIATE: $(OBJ)
diff --git a/pllrepo/src/Makefile.AVX b/pllrepo/src/Makefile.AVX
deleted file mode 100644
index cb48dca..0000000
--- a/pllrepo/src/Makefile.AVX
+++ /dev/null
@@ -1,60 +0,0 @@
-CC = gcc
-AR = ar
-CFLAGS = -g -c -O2 -D_GNU_SOURCE -D__SSE3 -D__AVX -msse3 -fomit-frame-pointer -funroll-loops -Wall -Wunused-parameter -Wredundant-decls  -Wreturn-type  -Wswitch-default -Wunused-value -Wimplicit  -Wimplicit-function-declaration  -Wimplicit-int -Wimport  -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast  -Wmissing-declarations -Wmissing-prototypes  -Wnested-externs  -Wold-style-definition -Wstrict-prototypes  -Wpointer-sign -Wextra -Wredundant-decls -Wunused -Wunused-parameter - [...]
-ARFLAGS = rvs
-TARGET = libpll
-ARCH = avx
-VERSION = 1.0.0
-STATICLIB = $(TARGET)-$(ARCH).a.$(VERSION)	# static library
-SHAREDOBJ = $(TARGET)-$(ARCH).so.$(VERSION)	# shared object
-GLOBAL_DEPS = pll.h globalVariables.h
-RM = rm -f
-
-OBJ = avxLikelihood.o evaluateGenericSpecial.o hash.o models.o queue.o restartHashTable.o stack.o treeIO.o evaluatePartialGenericSpecial.o makenewzGenericSpecial.o newviewGenericSpecial.o randomTree.o searchAlgo.o topologies.o utils.o bipartitionList.o fastDNAparsimony.o optimizeModel.o recom.o trash.o lexer.o alignment.o ssort.o newick.o parsePartition.o parsimony.o
-
-all: $(STATICLIB)
-
-$(STATICLIB): $(OBJ)
-	@echo "==> Building PLL Library ($(STATICLIB))"
-	$(AR) $(ARFLAGS) $@ $(OBJ)
-	ln -sf $(STATICLIB) $(TARGET)-$(ARCH).a
-
-avxLikelihood.o : avxLikelihood.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -mavx -c -o $@ $<
-
-fastDNAparsimony.o : fastDNAparsimony.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -mavx -c -o $@ $<
-
-parsimony.o : parsimony.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -mavx -c -o $@ $<
-
-bipartitionList.o : bipartitionList.c $(GLOBAL_DEPS)
-evaluatePartialSpecialGeneric.o : evaluatePartialSpecialGeneric.c $(GLOBAL_DEPS)
-optimizeModel.o : optimizeModel.c $(GLOBAL_DEPS)
-trash.o : trash.c $(GLOBAL_DEPS)
-searchAlgo.o : searchAlgo.c $(GLOBAL_DEPS)
-utils.o : utils.c $(GLOBAL_DEPS)
-topologies.o : topologies.c $(GLOBAL_DEPS)
-treeIO.o : treeIO.c $(GLOBAL_DEPS)
-models.o : models.c $(GLOBAL_DEPS)
-evaluatePartialGenericSpecial.o : evaluatePartialGenericSpecial.c $(GLOBAL_DEPS)
-evaluateGenericSpecial.o : evaluateGenericSpecial.c $(GLOBAL_DEPS)
-newviewGenericSpecial.o : newviewGenericSpecial.c $(GLOBAL_DEPS)
-makenewzGenericSpecial.o : makenewzGenericSpecial.c $(GLOBAL_DEPS)
-restartHashTable.o : restartHashTable.c $(GLOBAL_DEPS)
-randomTree.o : randomTree.c $(GLOBAL_DEPS)
-recom.o : recom.c  $(GLOBAL_DEPS)
-queue.o : queue.c $(GLOBAL_DEPS)
-stack.o : stack.c $(GLOBAL_DEPS)
-hash.o : hash.c $(GLOBAL_DEPS)
-lexer.o : lexer.c $(GLOBAL_DEPS)
-alignment.o: alignment.c $(GLOBAL_DEPS)
-newick.o: newick.c $(GLOBAL_DEPS)
-parsePartition.o: parsePartition.c $(GLOBAL_DEPS)
-ssort.o : ssort.c $(GLOBAL_DEPS)
-
-clean : 
-	-$(RM) *.o $(STATICLIB) $(SHAREDOBJ)
-
-.PHONY: all clean
-.INTERMEDIATE: $(OBJ)
diff --git a/pllrepo/src/Makefile.AVX-MPI b/pllrepo/src/Makefile.AVX-MPI
deleted file mode 100644
index c928256..0000000
--- a/pllrepo/src/Makefile.AVX-MPI
+++ /dev/null
@@ -1,59 +0,0 @@
-CC = mpicc
-CFLAGS = -g -c -O2 -D_GNU_SOURCE -D__AVX -D_FINE_GRAIN_MPI -D__SSE3 -msse3 -O2 -fomit-frame-pointer -funroll-loops -Wall -Wredundant-decls  -Wreturn-type  -Wswitch-default -Wimplicit  -Wimplicit-function-declaration  -Wimplicit-int -Wimport -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast  -Wmissing-declarations -Wmissing-prototypes  -Wnested-externs  -Wold-style-definition -Wstrict-prototypes -Wpointer-sign -Wextra -Wredundant-decls -Wunused -Wunused-function -Wunused-paramet [...]
-ARFLAGS = rvs
-TARGET = libpll
-ARCH = AVX-MPI
-VERSION = 1.0.0
-STATICLIB = $(TARGET)-$(ARCH).a.$(VERSION)	# static library
-SHAREDOBJ = $(TARGET)-$(ARCH).so.$(VERSION)	# shared object
-GLOBAL_DEPS = pll.h globalVariables.h
-RM = rm -f
-
-OBJ = hash.o stack.o ssort.o queue.o utils.o randomTree.o optimizeModel.o trash.o searchAlgo.o topologies.o fastDNAparsimony.o treeIO.o models.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o bipartitionList.o restartHashTable.o recom.o  genericParallelization.o avxLikelihood.o lexer.o alignment.o ssort.o newick.o parsePartition.o parsimony.o
-
-all : $(STATICLIB)
-
-$(STATICLIB) : $(OBJ)
-	@echo "==> Building PLL Library ($(STATICLIB))"
-	$(AR) $(ARFLAGS) $@ $+
-	ln -sf $(STATICLIB) $(TARGET)-$(ARCH).a
-
-fastDNAparsimony.o : fastDNAparsimony.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -mavx -c -o $@ $<
-
-parsimony.o : parsimony.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -mavx -c -o $@ $<
-
-avxLikelihood.o : avxLikelihood.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -mavx -c -o $@ $<
-
-bipartitionList.o : bipartitionList.c $(GLOBAL_DEPS)
-evaluatePartialSpecialGeneric.o : evaluatePartialSpecialGeneric.c $(GLOBAL_DEPS)
-optimizeModel.o : optimizeModel.c $(GLOBAL_DEPS)
-trash.o : trash.c $(GLOBAL_DEPS)
-searchAlgo.o : searchAlgo.c $(GLOBAL_DEPS)
-topologies.o : topologies.c $(GLOBAL_DEPS)
-parsePartitions.o : parsePartitions.c $(GLOBAL_DEPS)
-treeIO.o : treeIO.c $(GLOBAL_DEPS)
-models.o : models.c $(GLOBAL_DEPS)
-evaluatePartialGenericSpecial.o : evaluatePartialGenericSpecial.c $(GLOBAL_DEPS)
-evaluateGenericSpecial.o : evaluateGenericSpecial.c $(GLOBAL_DEPS)
-newviewGenericSpecial.o : newviewGenericSpecial.c $(GLOBAL_DEPS)
-makenewzGenericSpecial.o : makenewzGenericSpecial.c $(GLOBAL_DEPS)
-mesh.o : mesh.c $(GLOBAL_DEPS)
-fineGrainMpi.o :  fineGrainMpi.c  $(GLOBAL_DEPS)
-restartHashTable.o : restartHashTable.c $(GLOBAL_DEPS)
-queue.o : queue.c $(GLOBAL_DEPS)
-stack.o : stack.c $(GLOBAL_DEPS)
-hash.o : hash.c $(GLOBAL_DEPS)
-lexer.o : lexer.c $(GLOBAL_DEPS)
-alignment.o: alignment.c $(GLOBAL_DEPS)
-newick.o: newick.c $(GLOBAL_DEPS)
-parsePartition.o: parsePartition.c $(GLOBAL_DEPS)
-ssort.o : ssort.c $(GLOBAL_DEPS)
-
-clean : 
-	$(RM) *.o $(STATICLIB) $(SHAREDOBJ)
-
-.PHONY: all clean
-.INTERMEDIATE: $(OBJ)
diff --git a/pllrepo/src/Makefile.AVX-PTHREADS b/pllrepo/src/Makefile.AVX-PTHREADS
deleted file mode 100644
index c72e486..0000000
--- a/pllrepo/src/Makefile.AVX-PTHREADS
+++ /dev/null
@@ -1,61 +0,0 @@
-CC = gcc 
-AR = ar
-CFLAGS = -g -c -O2 -D_GNU_SOURCE -D_USE_PTHREADS -D__SSE3 -msse3 -D__AVX -fomit-frame-pointer -funroll-loops -Wall -Wredundant-decls  -Wreturn-type  -Wswitch-default -Wimplicit  -Wimplicit-function-declaration  -Wimplicit-int -Wimport -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast  -Wmissing-declarations -Wmissing-prototypes  -Wnested-externs  -Wold-style-definition -Wstrict-prototypes -Wpointer-sign -Wextra -Wredundant-decls -Wunused -Wunused-function -Wunused-parameter -Wu [...]
-ARFLAGS = rvs
-TARGET = libpll
-ARCH = AVX-PTHREADS
-VERSION = 1.0.0
-STATICLIB = $(TARGET)-$(ARCH).a.$(VERSION)	# static library
-SHAREDOBJ = $(TARGET)-$(ARCH).so.$(VERSION)	# shared object
-GLOBAL_DEPS = pll.h globalVariables.h
-RM = rm -f
-
-OBJ = hash.o stack.o ssort.o queue.o utils.o optimizeModel.o trash.o searchAlgo.o topologies.o treeIO.o models.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o bipartitionList.o restartHashTable.o avxLikelihood.o fastDNAparsimony.o randomTree.o lexer.o recom.o genericParallelization.o alignment.o newick.o parsePartition.o parsimony.o
-
-
-all: $(STATICLIB)
-
-$(STATICLIB) : $(OBJ)
-	@echo "==> Building PLL Library ($(STATICLIB))"
-	$(AR) $(ARFLAGS) $@ $(OBJ)
-	ln -sf $(STATICLIB) $(TARGET)-$(ARCH).a
-
-avxLikelihood.o : avxLikelihood.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -mavx -c -o $@ $<
-
-fastDNAparsimony.o : fastDNAparsimony.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -mavx -c -o $@ $<
-
-parsimony.o : parsimony.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -mavx -c -o $@ $<
-
-genericParallelization.o : genericParallelization.c $(GLOBAL_DEPS)
-bipartitionList.o : bipartitionList.c $(GLOBAL_DEPS)
-evaluatePartialSpecialGeneric.o : evaluatePartialSpecialGeneric.c $(GLOBAL_DEPS)
-optimizeModel.o : optimizeModel.c $(GLOBAL_DEPS)
-trash.o : trash.c $(GLOBAL_DEPS)
-utils.o : utils.c $(GLOBAL_DEPS)
-searchAlgo.o : searchAlgo.c $(GLOBAL_DEPS)
-topologies.o : topologies.c $(GLOBAL_DEPS)
-treeIO.o : treeIO.c $(GLOBAL_DEPS)
-models.o : models.c $(GLOBAL_DEPS)
-evaluatePartialGenericSpecial.o : evaluatePartialGenericSpecial.c $(GLOBAL_DEPS)
-evaluateGenericSpecial.o : evaluateGenericSpecial.c $(GLOBAL_DEPS)
-newviewGenericSpecial.o : newviewGenericSpecial.c $(GLOBAL_DEPS)
-makenewzGenericSpecial.o : makenewzGenericSpecial.c $(GLOBAL_DEPS)
-randomTree.o : randomTree.c $(GLOBAL_DEPS)
-restartHashTable.o : restartHashTable.c $(GLOBAL_DEPS)
-recom.o : recom.c  $(GLOBAL_DEPS)
-queue.o : queue.c  $(GLOBAL_DEPS)
-stack.o : stack.c $(GLOBAL_DEPS)
-hash.o : hash.c $(GLOBAL_DEPS)
-alignment.o: alignment.c $(GLOBAL_DEPS)
-newick.o: newick.c $(GLOBAL_DEPS)
-parsePartition.o: parsePartition.c $(GLOBAL_DEPS)
-ssort.o : ssort.c $(GLOBAL_DEPS)
-
-clean : 
-	-$(RM) *.o $(STATICLIB) $(SHAREDOBJ)
-
-.PHONY: all clean
-.INTERMEDIATE: $(OBJ)
diff --git a/pllrepo/src/Makefile.AVX.clang b/pllrepo/src/Makefile.AVX.clang
deleted file mode 100644
index 71ab72b..0000000
--- a/pllrepo/src/Makefile.AVX.clang
+++ /dev/null
@@ -1,57 +0,0 @@
-CC = clang
-AR = ar
-CFLAGS = -g -c -O2 -D__SSE3 -D__AVX -msse3 -fomit-frame-pointer -funroll-loops -Wall -Wunused-parameter -Wredundant-decls  -Wreturn-type  -Wswitch-default -Wunused-value -Wimplicit  -Wimplicit-function-declaration  -Wimplicit-int -Wimport  -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast  -Wmissing-declarations -Wmissing-prototypes  -Wnested-externs  -Wold-style-definition -Wstrict-prototypes  -Wpointer-sign -Wextra -Wredundant-decls -Wunused -Wunused-parameter -Wunused-value  [...]
-ARFLAGS = rvs
-TARGET = libpll
-ARCH = avx
-VERSION = 1.0.0
-STATICLIB = $(TARGET)-$(ARCH).a.$(VERSION)	# static library
-SHAREDOBJ = $(TARGET)-$(ARCH).so.$(VERSION)	# shared object
-GLOBAL_DEPS = pll.h globalVariables.h
-RM = rm -f
-
-OBJ = avxLikelihood.o evaluateGenericSpecial.o hash.o models.o queue.o restartHashTable.o stack.o treeIO.o evaluatePartialGenericSpecial.o makenewzGenericSpecial.o newviewGenericSpecial.o randomTree.o searchAlgo.o topologies.o utils.o bipartitionList.o fastDNAparsimony.o optimizeModel.o recom.o trash.o lexer.o alignment.o ssort.o newick.o parsePartition.o
-
-all: $(STATICLIB)
-
-$(STATICLIB): $(OBJ)
-	@echo "==> Building PLL Library ($(STATICLIB))"
-	$(AR) $(ARFLAGS) $@ $(OBJ)
-	ln -sf $(STATICLIB) $(TARGET)-$(ARCH).a
-
-avxLikelihood.o : avxLikelihood.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -mavx -c -o $@ $<
-
-fastDNAparsimony.o : fastDNAparsimony.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -mavx -c -o $@ $<
-
-bipartitionList.o : bipartitionList.c $(GLOBAL_DEPS)
-evaluatePartialSpecialGeneric.o : evaluatePartialSpecialGeneric.c $(GLOBAL_DEPS)
-optimizeModel.o : optimizeModel.c $(GLOBAL_DEPS)
-trash.o : trash.c $(GLOBAL_DEPS)
-searchAlgo.o : searchAlgo.c $(GLOBAL_DEPS)
-utils.o : utils.c $(GLOBAL_DEPS)
-topologies.o : topologies.c $(GLOBAL_DEPS)
-treeIO.o : treeIO.c $(GLOBAL_DEPS)
-models.o : models.c $(GLOBAL_DEPS)
-evaluatePartialGenericSpecial.o : evaluatePartialGenericSpecial.c $(GLOBAL_DEPS)
-evaluateGenericSpecial.o : evaluateGenericSpecial.c $(GLOBAL_DEPS)
-newviewGenericSpecial.o : newviewGenericSpecial.c $(GLOBAL_DEPS)
-makenewzGenericSpecial.o : makenewzGenericSpecial.c $(GLOBAL_DEPS)
-restartHashTable.o : restartHashTable.c $(GLOBAL_DEPS)
-randomTree.o : randomTree.c $(GLOBAL_DEPS)
-recom.o : recom.c  $(GLOBAL_DEPS)
-queue.o : queue.c $(GLOBAL_DEPS)
-stack.o : stack.c $(GLOBAL_DEPS)
-hash.o : hash.c $(GLOBAL_DEPS)
-lexer.o : lexer.c $(GLOBAL_DEPS)
-alignment.o: alignment.c $(GLOBAL_DEPS)
-newick.o: newick.c $(GLOBAL_DEPS)
-parsePartition.o: parsePartition.c $(GLOBAL_DEPS)
-ssort.o : ssort.c $(GLOBAL_DEPS)
-
-clean : 
-	-$(RM) *.o $(STATICLIB) $(SHAREDOBJ)
-
-.PHONY: all clean
-.INTERMEDIATE: $(OBJ)
diff --git a/pllrepo/src/Makefile.AVX.shared b/pllrepo/src/Makefile.AVX.shared
deleted file mode 100644
index 3752291..0000000
--- a/pllrepo/src/Makefile.AVX.shared
+++ /dev/null
@@ -1,68 +0,0 @@
-CC = gcc
-AR = ar
-CFLAGS = -fPIC -g -c -O2 -D__SSE3 -D__AVX -msse3 -fomit-frame-pointer -funroll-loops -Wall -Wunused-parameter -Wredundant-decls  -Wreturn-type  -Wswitch-default -Wunused-value -Wimplicit  -Wimplicit-function-declaration  -Wimplicit-int -Wimport  -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast  -Wmissing-declarations -Wmissing-prototypes  -Wnested-externs  -Wold-style-definition -Wstrict-prototypes  -Wpointer-sign -Wextra -Wredundant-decls -Wunused -Wunused-parameter -Wunused- [...]
-ARFLAGS = rvs
-TARGET = libpll
-ARCH = AVX
-VERSION = 1.0.0
-STATICLIB = $(TARGET)-$(ARCH).a.$(VERSION)	# static library
-SHAREDOBJ = $(TARGET)-$(ARCH).so.$(VERSION)	# shared object
-GLOBAL_DEPS = pll.h globalVariables.h
-RM = rm -f
-
-OBJ = avxLikelihood.o evaluateGenericSpecial.o hash.o models.o queue.o restartHashTable.o stack.o treeIO.o evaluatePartialGenericSpecial.o makenewzGenericSpecial.o newviewGenericSpecial.o randomTree.o searchAlgo.o topologies.o utils.o bipartitionList.o fastDNAparsimony.o optimizeModel.o recom.o trash.o lexer.o common.o alignment.o fasta.o phylip.o ssort.o newick.o part.o
-
-all: $(SHAREDOBJ)
-
-$(SHAREDOBJ): $(OBJ)
-	@echo "==> Building PLL Library ($@)"
-	$(CC) -shared -Wl,-soname,$@ -o $@ $(OBJ)
-	ln -sf $(SHAREDOBJ) $(TARGET)-$(ARCH).so
-
-avxLikelihood.o : avxLikelihood.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -mavx -c -o $@ $<
-
-fastDNAparsimony.o : fastDNAparsimony.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -mavx -c -o $@ $<
-
-bipartitionList.o : bipartitionList.c $(GLOBAL_DEPS)
-evaluatePartialSpecialGeneric.o : evaluatePartialSpecialGeneric.c $(GLOBAL_DEPS)
-optimizeModel.o : optimizeModel.c $(GLOBAL_DEPS)
-trash.o : trash.c $(GLOBAL_DEPS)
-pll.o : pll.c $(GLOBAL_DEPS)
-searchAlgo.o : searchAlgo.c $(GLOBAL_DEPS)
-utils.o : utils.c $(GLOBAL_DEPS)
-topologies.o : topologies.c $(GLOBAL_DEPS)
-treeIO.o : treeIO.c $(GLOBAL_DEPS)
-models.o : models.c $(GLOBAL_DEPS)
-evaluatePartialGenericSpecial.o : evaluatePartialGenericSpecial.c $(GLOBAL_DEPS)
-evaluateGenericSpecial.o : evaluateGenericSpecial.c $(GLOBAL_DEPS)
-newviewGenericSpecial.o : newviewGenericSpecial.c $(GLOBAL_DEPS)
-makenewzGenericSpecial.o : makenewzGenericSpecial.c $(GLOBAL_DEPS)
-restartHashTable.o : restartHashTable.c $(GLOBAL_DEPS)
-randomTree.o : randomTree.c $(GLOBAL_DEPS)
-recom.o : recom.c  $(GLOBAL_DEPS)
-queue.o : queue.c $(GLOBAL_DEPS)
-stack.o : stack.c $(GLOBAL_DEPS)
-hash.o : hash.c $(GLOBAL_DEPS)
-lexer.o : lexer.c $(GLOBAL_DEPS)
-common.o: parser/common.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -o $@ $<
-alignment.o: parser/alignment/alignment.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -o $@ $<
-phylip.o: parser/alignment/phylip.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -o $@ $<
-fasta.o: parser/alignment/fasta.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -o $@ $<
-newick.o: parser/newick/newick.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -o $@ $<
-part.o: parser/partition/part.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -o $@ $<
-ssort.o : parser/ssort.c $(GLOBAL_DEPS)
-	$(CC) $(CFLAGS) -o $@ $<
-
-clean : 
-	-$(RM) *.o $(STATICLIB) $(SHAREDOBJ)
-
-.PHONY: all clean
-.INTERMEDIATE: $(OBJ)
diff --git a/pllrepo/src/Makefile.MIC-PTHREADS b/pllrepo/src/Makefile.MIC-PTHREADS
deleted file mode 100644
index 8f2d701..0000000
--- a/pllrepo/src/Makefile.MIC-PTHREADS
+++ /dev/null
@@ -1,62 +0,0 @@
-CC = icc 
-AR = ar
-MICFLAGS = -mmic -std=c99 -D__MIC_NATIVE -opt-streaming-cache-evict=0 # -D_DEBUG_MSG
-COMMON_FLAGS = $(MICFLAGS) -c -D_GNU_SOURCE -D_USE_PTHREADS -fomit-frame-pointer -funroll-loops -Wall #-Wredundant-decls  -Wreturn-type  -Wswitch-default -Wimplicit  -Wimplicit-function-declaration  -Wimplicit-int -Wimport -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast  -Wmissing-declarations -Wmissing-prototypes  -Wnested-externs  -Wold-style-definition -Wstrict-prototypes -Wpointer-sign -Wextra -Wredundant-decls -Wunused -Wunused-function -Wunused-parameter -Wunused-value  [...]
-OPT1_FLAGS = -O1
-OPT2_FLAGS = -O2
-CFLAGS = $(COMMON_FLAGS) $(OPT2_FLAGS)
-
-ARFLAGS = rvs
-TARGET = libpll
-ARCH = MIC-PTHREADS
-VERSION = 1.0.0
-STATICLIB = $(TARGET)-$(ARCH).a.$(VERSION)	# static library
-SHAREDOBJ = $(TARGET)-$(ARCH).so.$(VERSION)	# shared object
-GLOBAL_DEPS = pll.h globalVariables.h
-RM = rm -f
-
-OBJ = hash.o stack.o ssort.o queue.o utils.o optimizeModel.o trash.o searchAlgo.o topologies.o treeIO.o models.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o bipartitionList.o restartHashTable.o mic_native_dna.o mic_native_aa.o fastDNAparsimony.o randomTree.o lexer.o recom.o genericParallelization.o alignment.o newick.o parsePartition.o
-
-
-all: $(STATICLIB)
-
-$(STATICLIB) : $(OBJ)
-	@echo "==> Building PLL Library ($(STATICLIB))"
-	$(AR) $(ARFLAGS) $@ $(OBJ)
-	ln -sf $(STATICLIB) $(TARGET)-$(ARCH).a
-
-models.o : models.c $(GLOBAL_DEPS)
-	$(CC) $(COMMON_FLAGS) $(OPT1_FLAGS) -c -o models.o models.c
-
-fastDNAparsimony.o : fastDNAparsimony.c $(GLOBAL_DEPS)
-genericParallelization.o : genericParallelization.c $(GLOBAL_DEPS)
-bipartitionList.o : bipartitionList.c $(GLOBAL_DEPS)
-evaluatePartialSpecialGeneric.o : evaluatePartialSpecialGeneric.c $(GLOBAL_DEPS)
-optimizeModel.o : optimizeModel.c $(GLOBAL_DEPS)
-trash.o : trash.c $(GLOBAL_DEPS)
-utils.o : utils.c $(GLOBAL_DEPS)
-searchAlgo.o : searchAlgo.c $(GLOBAL_DEPS)
-topologies.o : topologies.c $(GLOBAL_DEPS)
-treeIO.o : treeIO.c $(GLOBAL_DEPS)
-evaluatePartialGenericSpecial.o : evaluatePartialGenericSpecial.c $(GLOBAL_DEPS)
-evaluateGenericSpecial.o : evaluateGenericSpecial.c $(GLOBAL_DEPS)
-newviewGenericSpecial.o : newviewGenericSpecial.c $(GLOBAL_DEPS)
-makenewzGenericSpecial.o : makenewzGenericSpecial.c $(GLOBAL_DEPS)
-randomTree.o : randomTree.c $(GLOBAL_DEPS)
-restartHashTable.o : restartHashTable.c $(GLOBAL_DEPS)
-recom.o : recom.c  $(GLOBAL_DEPS)
-queue.o : queue.c  $(GLOBAL_DEPS)
-stack.o : stack.c $(GLOBAL_DEPS)
-hash.o : hash.c $(GLOBAL_DEPS)
-alignment.o: alignment.c $(GLOBAL_DEPS)
-newick.o: newick.c $(GLOBAL_DEPS)
-parsePartition.o: parsePartition.c $(GLOBAL_DEPS)
-ssort.o : ssort.c $(GLOBAL_DEPS)
-mic_native_dna.o : mic_native_dna.c $(GLOBAL_DEPS)
-mic_native_aa.o : mic_native_aa.c $(GLOBAL_DEPS)
-
-clean : 
-	-$(RM) *.o $(STATICLIB) $(SHAREDOBJ)
-
-.PHONY: all clean
-.INTERMEDIATE: $(OBJ)
diff --git a/pllrepo/src/Makefile.SSE3 b/pllrepo/src/Makefile.SSE3
deleted file mode 100644
index 2afbe71..0000000
--- a/pllrepo/src/Makefile.SSE3
+++ /dev/null
@@ -1,52 +0,0 @@
-CC = gcc 
-AR = ar
-CFLAGS = -g -c -O2 -D_GNU_SOURCE -D__SSE3 -msse3 -fomit-frame-pointer -funroll-loops -Wall -Wredundant-decls  -Wreturn-type  -Wswitch-default -Wimplicit  -Wimplicit-function-declaration  -Wimplicit-int -Wimport -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast  -Wmissing-declarations -Wmissing-prototypes  -Wnested-externs  -Wold-style-definition -Wstrict-prototypes -Wpointer-sign -Wextra -Wredundant-decls -Wunused -Wunused-function -Wunused-parameter -Wunused-value  -Wunused-va [...]
-ARFLAGS = rvs
-TARGET = libpll
-ARCH = SSE3
-VERSION = 1.0.0
-STATICLIB = $(TARGET)-$(ARCH).a.$(VERSION)	# static library
-SHAREDOBJ = $(TARGET)-$(ARCH).so.$(VERSION)	# shared object
-GLOBAL_DEPS = pll.h globalVariables.h
-RM = rm -f
-
-OBJ = hash.o stack.o ssort.o queue.o utils.o randomTree.o optimizeModel.o trash.o searchAlgo.o topologies.o fastDNAparsimony.o treeIO.o models.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o bipartitionList.o restartHashTable.o recom.o lexer.o alignment.o newick.o parsePartition.o parsimony.o
-
-all: $(STATICLIB)
-
-$(STATICLIB): $(OBJ)
-	@echo "==> Building PLL Library ($(STATICLIB))"
-	$(AR) $(ARFLAGS) $@ $(OBJ)
-	ln -sf $(STATICLIB) $(TARGET)-$(ARCH).a
-
-bipartitionList.o : bipartitionList.c $(GLOBAL_DEPS)
-evaluatePartialSpecialGeneric.o : evaluatePartialSpecialGeneric.c $(GLOBAL_DEPS)
-optimizeModel.o : optimizeModel.c $(GLOBAL_DEPS)
-trash.o : trash.c $(GLOBAL_DEPS)
-searchAlgo.o : searchAlgo.c $(GLOBAL_DEPS)
-topologies.o : topologies.c $(GLOBAL_DEPS)
-treeIO.o : treeIO.c $(GLOBAL_DEPS)
-models.o : models.c $(GLOBAL_DEPS)
-evaluatePartialGenericSpecial.o : evaluatePartialGenericSpecial.c $(GLOBAL_DEPS)
-evaluateGenericSpecial.o : evaluateGenericSpecial.c $(GLOBAL_DEPS)
-newviewGenericSpecial.o : newviewGenericSpecial.c $(GLOBAL_DEPS)
-makenewzGenericSpecial.o : makenewzGenericSpecial.c $(GLOBAL_DEPS)
-restartHashTable.o : restartHashTable.c $(GLOBAL_DEPS)
-randomTree.o : randomTree.c $(GLOBAL_DEPS)
-fastDNAparsimony.o : fastDNAparsimony.c  $(GLOBAL_DEPS)
-parsimony.o : parsimony.c $(GLOBAL_DEPS)
-recom.o : recom.c  $(GLOBAL_DEPS)
-queue.o : queue.c $(GLOBAL_DEPS)
-stack.o : stack.c $(GLOBAL_DEPS)
-hash.o : hash.c $(GLOBAL_DEPS)
-lexer.o : lexer.c $(GLOBAL_DEPS)
-alignment.o: alignment.c $(GLOBAL_DEPS)
-newick.o: newick.c $(GLOBAL_DEPS)
-parsePartition.o: parsePartition.c $(GLOBAL_DEPS)
-ssort.o : ssort.c $(GLOBAL_DEPS)
-
-clean : 
-	-$(RM) *.o $(STATICLIB) $(SHAREDOBJ)
-
-.PHONY: all clean
-.INTERMEDIATE: $(OBJ)
diff --git a/pllrepo/src/Makefile.SSE3-MPI b/pllrepo/src/Makefile.SSE3-MPI
deleted file mode 100644
index ecf8023..0000000
--- a/pllrepo/src/Makefile.SSE3-MPI
+++ /dev/null
@@ -1,50 +0,0 @@
-CC = mpicc
-AR = ar
-CFLAGS = -g -c -O2 -D_GNU_SOURCE -D_FINE_GRAIN_MPI -D__SSE3 -msse3 -O2 -fomit-frame-pointer -funroll-loops -Wall -Wunused-parameter -Wredundant-decls  -Wreturn-type  -Wswitch-default -Wunused-value -Wimplicit  -Wimplicit-function-declaration  -Wimplicit-int -Wimport  -Wunused  -Wunused-function  -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast  -Wmissing-declarations -Wmissing-prototypes  -Wnested-externs  -Wold-style-definition -Wstrict-prototypes -Wpointer-sign -Wextra -Wred [...]
-ARFLAGS = rvs
-TARGET = libpll
-ARCH = SSE3-MPI
-VERSION = 1.0.0
-STATICLIB = $(TARGET)-$(ARCH).a.$(VERSION)	# static library
-SHAREDOBJ = $(TARGET)-$(ARCH).so.$(VERSION)	# shared object
-GLOBAL_DEPS = pll.h globalVariables.h
-RM = rm -f
-
-OBJ = hash.o stack.o ssort.o queue.o utils.o randomTree.o optimizeModel.o trash.o searchAlgo.o topologies.o fastDNAparsimony.o treeIO.o models.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o bipartitionList.o restartHashTable.o recom.o  genericParallelization.o lexer.o alignment.o ssort.o newick.o parsePartition.o parsimony.o
-
-all : $(STATICLIB)
-
-$(STATICLIB) : $(OBJ)
-	@echo "==> Building PLL Library ($(STATICLIB))"
-	$(AR) $(ARFLAGS) $@ $+
-	ln -sf $(STATICLIB) $(TARGET)-$(ARCH).a
-
-fastDNAparsimony.o : fastDNAparsimony.c $(GLOBAL_DEPS)
-parsimony.o : parsimony.c $(GLOBAL_DEPS)
-bipartitionList.o : bipartitionList.c $(GLOBAL_DEPS)
-optimizeModel.o : optimizeModel.c $(GLOBAL_DEPS)
-trash.o : trash.c $(GLOBAL_DEPS)
-searchAlgo.o : searchAlgo.c $(GLOBAL_DEPS)
-topologies.o : topologies.c $(GLOBAL_DEPS)
-parsePartitions.o : parsePartitions.c $(GLOBAL_DEPS)
-treeIO.o : treeIO.c $(GLOBAL_DEPS)
-models.o : models.c $(GLOBAL_DEPS)
-evaluatePartialGenericSpecial.o : evaluatePartialGenericSpecial.c $(GLOBAL_DEPS)
-evaluateGenericSpecial.o : evaluateGenericSpecial.c $(GLOBAL_DEPS)
-newviewGenericSpecial.o : newviewGenericSpecial.c $(GLOBAL_DEPS)
-makenewzGenericSpecial.o : makenewzGenericSpecial.c $(GLOBAL_DEPS)
-restartHashTable.o : restartHashTable.c $(GLOBAL_DEPS)
-queue.o : queue.c $(GLOBAL_DEPS)
-stack.o : stack.c $(GLOBAL_DEPS)
-hash.o : hash.c $(GLOBAL_DEPS)
-lexer.o : lexer.c $(GLOBAL_DEPS)
-alignment.o: alignment.c $(GLOBAL_DEPS)
-newick.o: newick.c $(GLOBAL_DEPS)
-parsePartition.o: parsePartition.c $(GLOBAL_DEPS)
-ssort.o : ssort.c $(GLOBAL_DEPS)
-
-clean : 
-	-$(RM) *.o $(STATICLIB) $(SHAREDOBJ)
-
-.PHONY: all clean
-.INTERMEDIATE: $(OBJ)
diff --git a/pllrepo/src/Makefile.SSE3-PTHREADS b/pllrepo/src/Makefile.SSE3-PTHREADS
deleted file mode 100644
index fac2010..0000000
--- a/pllrepo/src/Makefile.SSE3-PTHREADS
+++ /dev/null
@@ -1,52 +0,0 @@
-CC = gcc 
-AR = ar
-CFLAGS = -g -c -O2 -D_GNU_SOURCE -D_USE_PTHREADS -D__SSE3 -msse3 -fomit-frame-pointer -funroll-loops -Wall -Wredundant-decls  -Wreturn-type  -Wswitch-default -Wimplicit  -Wimplicit-function-declaration  -Wimplicit-int -Wimport -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast  -Wmissing-declarations -Wmissing-prototypes  -Wnested-externs  -Wold-style-definition -Wstrict-prototypes -Wpointer-sign -Wextra -Wredundant-decls -Wunused -Wunused-function -Wunused-parameter -Wunused-va [...]
-ARFLAGS = rvs
-TARGET = libpll
-ARCH = SSE3-PTHREADS
-VERSION = 1.0.0
-STATICLIB = $(TARGET)-$(ARCH).a.$(VERSION)	# static library
-SHAREDOBJ = $(TARGET)-$(ARCH).so.$(VERSION)	# shared object
-GLOBAL_DEPS = pll.h globalVariables.h
-RM = rm -f
-
-OBJ = hash.o stack.o ssort.o queue.o utils.o optimizeModel.o trash.o searchAlgo.o topologies.o treeIO.o models.o evaluatePartialGenericSpecial.o evaluateGenericSpecial.o newviewGenericSpecial.o makenewzGenericSpecial.o bipartitionList.o restartHashTable.o fastDNAparsimony.o randomTree.o lexer.o recom.o genericParallelization.o alignment.o newick.o parsePartition.o parsimony.o
-
-all: $(STATICLIB)
-
-$(STATICLIB) : $(OBJ)
-	@echo "==> Building PLL Library ($(STATICLIB))"
-	$(AR) $(ARFLAGS) $@ $(OBJ)
-	ln -sf $(STATICLIB) $(TARGET)-$(ARCH).a
-
-bipartitionList.o : bipartitionList.c $(GLOBAL_DEPS)
-genericParallelization.o : genericParallelization.c $(GLOBAL_DEPS)
-evaluatePartialSpecialGeneric.o : evaluatePartialSpecialGeneric.c $(GLOBAL_DEPS)
-optimizeModel.o : optimizeModel.c $(GLOBAL_DEPS)
-trash.o : trash.c $(GLOBAL_DEPS)
-searchAlgo.o : searchAlgo.c $(GLOBAL_DEPS)
-topologies.o : topologies.c $(GLOBAL_DEPS)
-treeIO.o : treeIO.c $(GLOBAL_DEPS)
-models.o : models.c $(GLOBAL_DEPS)
-evaluatePartialGenericSpecial.o : evaluatePartialGenericSpecial.c $(GLOBAL_DEPS)
-evaluateGenericSpecial.o : evaluateGenericSpecial.c $(GLOBAL_DEPS)
-newviewGenericSpecial.o : newviewGenericSpecial.c $(GLOBAL_DEPS)
-makenewzGenericSpecial.o : makenewzGenericSpecial.c $(GLOBAL_DEPS)
-restartHashTable.o : restartHashTable.c $(GLOBAL_DEPS)
-randomTree.o : randomTree.c $(GLOBAL_DEPS)
-fastDNAparsimony.o : fastDNAparsimony.c  $(GLOBAL_DEPS)
-parsimony.o : parsimony.c  $(GLOBAL_DEPS)
-recom.o : recom.c  $(GLOBAL_DEPS)
-queue.o : queue.c $(GLOBAL_DEPS)
-stack.o : stack.c $(GLOBAL_DEPS)
-hash.o : hash.c $(GLOBAL_DEPS)
-alignment.o: alignment.c $(GLOBAL_DEPS)
-newick.o: newick.c $(GLOBAL_DEPS)
-part.o: parsePartition.c $(GLOBAL_DEPS)
-ssort.o : ssort.c $(GLOBAL_DEPS)
-
-clean:
-	-$(RM) *.o $(STATICLIB) $(SHAREDOBJ)
-
-.PHONY: all clean
-.INTERMEDIATE: $(OBJ)
diff --git a/pllrepo/src/Makefile.am b/pllrepo/src/Makefile.am
deleted file mode 100644
index 0748b09..0000000
--- a/pllrepo/src/Makefile.am
+++ /dev/null
@@ -1,53 +0,0 @@
-#lib_LTLIBRARIES = libpll-generic.la
-lib_LTLIBRARIES = 
-#lib_LIBRARIES = libpll-generic.a
-lib_LIBRARIES = 
-libpll_generic_la_SOURCES = hash.c stack.c ssort.c queue.c utils.c randomTree.c optimizeModel.c trash.c searchAlgo.c topologies.c fastDNAparsimony.c treeIO.c models.c evaluatePartialGenericSpecial.c evaluateGenericSpecial.c newviewGenericSpecial.c makenewzGenericSpecial.c bipartitionList.c restartHashTable.c recom.c lexer.c alignment.c newick.c parsePartition.c parsimony.c
-libpll_generic_la_CFLAGS = -c -D_GNU_SOURCE -O2 -fomit-frame-pointer -funroll-loops -Wall -Wredundant-decls  -Wreturn-type  -Wswitch-default -Wimplicit  -Wimplicit-function-declaration  -Wimplicit-int -Wimport -Wunused-label -Wno-int-to-pointer-cast -Wbad-function-cast  -Wmissing-declarations -Wmissing-prototypes  -Wnested-externs  -Wold-style-definition -Wstrict-prototypes -Wpointer-sign -Wextra -Wredundant-decls -Wunused -Wunused-function -Wunused-parameter -Wunused-value  -Wunused-var [...]
-libpll_generic_la_LDFLAGS = -version-info @LIBPLL_MAJOR@:@LIBPLL_MINOR@:@LIBPLL_REV@
-libpll_generic_a_SOURCES = $(libpll_generic_la_SOURCES)
-libpll_generic_a_CFLAGS = $(libpll_generic_la_CFLAGS)
-
-if BUILD_SSE3
-lib_LTLIBRARIES += libpll-sse3.la
-libpll_sse3_la_SOURCES = $(libpll_generic_la_SOURCES)
-libpll_sse3_la_CFLAGS = -D__SSE3 -msse3 $(libpll_generic_la_CFLAGS)
-libpll_sse3_la_LDFLAGS = -version-info @LIBPLL_MAJOR@:@LIBPLL_MINOR@:@LIBPLL_REV@
-lib_LIBRARIES += libpll-sse3.a
-libpll_sse3_a_SOURCES = $(libpll_sse3_la_SOURCES)
-libpll_sse3_a_CFLAGS = $(libpll_sse3_la_CFLAGS)
-endif
-
-
-if BUILD_AVX
-lib_LTLIBRARIES += libpll-avx.la
-libpll_avx_la_SOURCES = avxLikelihood.c $(libpll_generic_la_SOURCES)
-libpll_avx_la_CFLAGS = -D__SSE3 -D__AVX -mavx -msse3 $(libpll_generic_la_CFLAGS)
-libpll_avx_la_LDFLAGS = -version-info @LIBPLL_MAJOR@:@LIBPLL_MINOR@:@LIBPLL_REV@
-lib_LIBRARIES += libpll-avx.a
-libpll_avx_a_SOURCES = $(libpll_avx_la_SOURCES)
-libpll_avx_a_CFLAGS = $(libpll_avx_la_CFLAGS)
-endif
-
-if BUILD_SSE3_PTHREADS
-lib_LTLIBRARIES += libpll-sse3-pthreads.la
-libpll_sse3_pthreads_la_SOURCES = genericParallelization.c $(libpll_generic_la_SOURCES)
-libpll_sse3_pthreads_la_CFLAGS = -D_USE_PTHREADS -D__SSE3 -msse3 $(libpll_generic_la_CFLAGS)
-libpll_sse3_pthreads_la_LDFLAGS = -version-info @LIBPLL_MAJOR@:@LIBPLL_MINOR@:@LIBPLL_REV@
-lib_LIBRARIES += libpll-sse3-pthreads.a
-libpll_sse3_pthreads_a_SOURCES = $(libpll_sse3_pthreads_la_SOURCES)
-libpll_sse3_pthreads_a_CFLAGS = $(libpll_sse3_pthreads_la_CFLAGS)
-endif
-
-if BUILD_AVX_PTHREADS
-lib_LTLIBRARIES += libpll-avx-pthreads.la
-libpll_avx_pthreads_la_SOURCES = avxLikelihood.c genericParallelization.c $(libpll_generic_la_SOURCES)
-libpll_avx_pthreads_la_CFLAGS = -D_USE_PTHREADS -D__AVX -mavx -D__SSE3 -msse3 $(libpll_generic_la_CFLAGS)
-libpll_avx_pthreads_la_LDFLAGS = -version-info @LIBPLL_MAJOR@:@LIBPLL_MINOR@:@LIBPLL_REV@
-lib_LIBRARIES += libpll-avx-pthreads.a
-libpll_avx_pthreads_a_SOURCES = $(libpll_avx_pthreads_la_SOURCES)
-libpll_avx_pthreads_a_CFLAGS = $(libpll_avx_pthreads_la_CFLAGS)
-endif
-
-pkgincludedir=$(includedir)/pll
-pkginclude_HEADERS = pll.h newick.h stack.h hash.h errcodes.h globalVariables.h lexer.h genericParallelization.h treeIO.h queue.h parsePartition.h mem_alloc.h cycle.h
diff --git a/pllrepo/src/alignment.c b/pllrepo/src/alignment.c
deleted file mode 100644
index d50f6db..0000000
--- a/pllrepo/src/alignment.c
+++ /dev/null
@@ -1,754 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file alignment.c
- *
- * @brief Collection of routines for reading alignments
- *
- * Auxiliary functions for storing alignments read from predefined file formats
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-
-#include "pll.h"
-#include "pllInternal.h"
-
-/** @defgroup alignmentGroup Reading and parsing multiple sequence alignments
-    
-    This set of functions handles the reading and parsing of several file formats that describe multiple sequence alignments. They are also responsible for storing the alignment in an internal structure
-*/
-static pllAlignmentData * pllParsePHYLIP (const char * filename);
-static pllAlignmentData * pllParseFASTA (const char * filename);
-static int read_phylip_header (int * inp, int * sequenceCount, int * sequenceLength);
-static __inline int parsedOk (int * actLen, int sequenceCount, int sequenceLength);
-static int parse_phylip (pllAlignmentData * alignmentData, int input);
-static int getFastaAlignmentInfo (int * inp, int * seqCount, int * seqLen);
-static int parseFastaAlignment (pllAlignmentData * alignmentData, int input);
-
-#ifdef __PLL_DEBUG_PARSER
-static int
-printTokens (int input)
-{
-  pllLexToken token;
-
-  do
-   {
-     NEXT_TOKEN
-
-     /* begin of parser */
-     switch (token.tokenType)
-      {
-        case PLL_TOKEN_NUMBER:
-          printf ("PLL_TOKEN_NUMBER (%.*s, %d)\n", token.len, token.lexeme, token.len);
-          break;
-        case PLL_TOKEN_STRING:
-          printf ("PLL_TOKEN_STRING (%.*s, %d)\n", token.len, token.lexeme, token.len);
-          break;
-        case PLL_TOKEN_EOF:
-          printf ("PLL_TOKEN_EOF\n");
-          break;
-        case PLL_TOKEN_WHITESPACE:
-          printf ("PLL_TOKEN_WHITESPACE\n");
-          break;
-        case PLL_TOKEN_NEWLINE:
-          printf ("PLL_TOKEN_NEWLINE\n");
-          break;
-        case PLL_TOKEN_UNKNOWN:
-          printf ("PLL_TOKEN_UNKNOWN (%.*s, %d)\n", token.len, token.lexeme, token.len);
-          break;
-        default:
-          break;
-      }
-     /* end of parser */
-
-
-   }
-  while (token.tokenType != PLL_TOKEN_EOF && token.tokenType != PLL_TOKEN_UNKNOWN);
-
-  if (token.tokenType == PLL_TOKEN_UNKNOWN) return (0);
-
-  return (1);
-}
-#endif
-
-/** @ingroup alignmentGroup
-    @brief Initialize alignment structure fields
-
-    Allocates memory for the data structure that will hold the alignment and
-    initializes it. It requires the number of sequences \a sequenceCount and
-    the length of sequences \a sequenceLength. It returns a pointer to the
-    initialized data structure.
-
-    @param sequenceCount
-      Number of sequences in the alignment
-    
-    @param sequenceLength
-      Length of the sequences
-
-    @param 
-      Initialized alignment data structured
-*/
-pllAlignmentData *
-pllInitAlignmentData (int sequenceCount, int sequenceLength)
- {
-   int i;
-   pllAlignmentData * alignmentData;
-   //void * mem;
-   //TUNG
-   unsigned char *mem;
-
-   
-   /** TODO */
-   alignmentData               =  (pllAlignmentData *) rax_malloc (sizeof (pllAlignmentData));
-   alignmentData->sequenceData = (unsigned char **) rax_malloc ((sequenceCount + 1) * sizeof (unsigned char *));
-   //mem = (void *) rax_malloc (sizeof (unsigned char) * (sequenceLength + 1) * sequenceCount);
-   //TUNG
-   mem = (unsigned char *)rax_malloc(sizeof(unsigned char) * (sequenceLength + 1) * sequenceCount);
-   for (i = 1; i <= sequenceCount; ++i)
-    {
-      alignmentData->sequenceData[i]                 = (unsigned char *) (&mem[sizeof (unsigned char) * (i - 1) * (sequenceLength + 1)]);
-      alignmentData->sequenceData[i][sequenceLength] = 0;
-    }
-   alignmentData->sequenceData[0] = NULL;
-    
-   alignmentData->sequenceLabels = (char **) rax_calloc ((sequenceCount + 1), sizeof (char *));
-
-   alignmentData->sequenceCount  = sequenceCount;
-   alignmentData->sequenceLength = sequenceLength;
-   alignmentData->originalSeqLength = sequenceLength;
-
-   /** TODO: remove siteWeights from alignment */
-   alignmentData->siteWeights    = NULL;
-
-   return (alignmentData);
- }
-
-/** @ingroup alignmentGroup
-    @brief Deallocates the memory associated with the alignment data structure
-    
-    Deallocates the memory associated with the alignment data structure \a alignmentData.
-
-    @param alignmentData
-      The alignment data structure
-*/
-void
-pllAlignmentDataDestroy (pllAlignmentData * alignmentData)
-{
-  int i;
-
-  for (i = 1; i <= alignmentData->sequenceCount; ++ i)
-   {
-     rax_free (alignmentData->sequenceLabels[i]);
-   }
-  rax_free (alignmentData->sequenceLabels);
-  rax_free (alignmentData->sequenceData[1]);
-  rax_free (alignmentData->sequenceData);
-  rax_free (alignmentData->siteWeights);
-  rax_free (alignmentData);
-}
-
-
-/** @ingroup alignmentGroup
-    @brief Prints the alignment to the console
-
-    @param alignmentData
-      The alignment data structure
-*/
-void 
-pllAlignmentDataDumpConsole (pllAlignmentData * alignmentData)
- {
-   int i;
-
-   printf ("%d %d\n", alignmentData->sequenceCount, alignmentData->sequenceLength);
-   for (i = 1; i <= alignmentData->sequenceCount; ++ i)
-    {
-      printf ("%s %s\n", alignmentData->sequenceLabels[i], alignmentData->sequenceData[i]);
-    }
- }
-
-
-
-static void dump_fasta_content(FILE * fp, pllAlignmentData * alignmentData)
-{
-  int i;
-
-  for (i = 1; i <= alignmentData->sequenceCount; ++i)
-     fprintf (fp, ">%s\n%s\n", alignmentData->sequenceLabels[i], alignmentData->sequenceData[i]);
-}
-
-static void dump_phylip_content(FILE * fp, pllAlignmentData * alignmentData)
-{
-  int i;
-
-  for (i = 1; i <= alignmentData->sequenceCount; ++i)
-     fprintf (fp, "%s %s\n", alignmentData->sequenceLabels[i], alignmentData->sequenceData[i]);
-}
-
-/** @ingroup alignmentGroup
-    @brief Dump the alignment to a file of format \a fileFormat
-
-    Dumps the alignment contained in \a alignmentData to file \a filename of type \a fileFormat.
-
-    @note If \a filename exists, all contents will be erased
-
-    @param alignmentData
-      Alignment data structure
-
-    @param fileFormat
-      Format of output file. Can take the value \b PLL_FORMAT_PHYLIP or \b PLL_FORMAT_FASTA
-
-    @param filename
-      Output filename
-
-    @return
-      Returns \b PLL_TRUE on success, otherwise \b PLL_FALSE.
-*/
-int
-pllAlignmentDataDumpFile (pllAlignmentData * alignmentData, int fileFormat, const char * filename)
-{
-  FILE * fp;
-  void (*outfun)(FILE *, pllAlignmentData *);
-  
-  if (fileFormat != PLL_FORMAT_PHYLIP && fileFormat != PLL_FORMAT_FASTA) return (PLL_FALSE);
-
-  outfun = (fileFormat == PLL_FORMAT_PHYLIP) ? dump_phylip_content : dump_fasta_content;
-
-  fp = fopen (filename,"wb");
-  if (!fp) return (PLL_FALSE);
-  
-  /* if PHYLIP print the silly header at the beginning */
-  if (fileFormat == PLL_FORMAT_PHYLIP)
-   {
-     fprintf (fp, "%d %d\n", alignmentData->sequenceCount, alignmentData->sequenceLength);
-   }
-  
-  outfun(fp, alignmentData);
-
-  fclose (fp);
-  return (PLL_TRUE);
-}
-
-
-
-/* ROUTINES FOR PHYLIP PARSING */
-/** @ingroup alignmentGroup
-    @brief Parse the PHYLIP file header
-*/
-static int
-read_phylip_header (int * inp, int * sequenceCount, int * sequenceLength)
-{
-  pllLexToken token;
-  int input;
-
-  input = *inp;
-
-
-  NEXT_TOKEN
-  CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
-
-  if (token.tokenType != PLL_TOKEN_NUMBER) return (0);
-
-  *sequenceCount = atoi (token.lexeme);
-
-  NEXT_TOKEN
-  CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
-  if (token.tokenType != PLL_TOKEN_NUMBER) return (0);
-
-  *sequenceLength = atoi (token.lexeme);
-
-  *inp = input;
-
-  return (*sequenceCount && *sequenceLength);
-}
-
-static __inline int
-parsedOk (int * actLen, int sequenceCount, int sequenceLength)
-{
-  int i;
-
-  for (i = 1; i <= sequenceCount; ++ i)
-   {
-     if (actLen[i] != sequenceLength) return (0);
-   }
-  
-  return (1);
-}
-
-
-/** @ingroup alignmentGroup
-    @brief Parse the PHYLIP file body
-*/
-static int
-parse_phylip (pllAlignmentData * alignmentData, int input)
-{
-  int i,j;
-  pllLexToken token;
-  int * sequenceLength;
-  int rc;
-
-  sequenceLength = (int *) rax_calloc (alignmentData->sequenceCount + 1, sizeof (int));
-
-  NEXT_TOKEN
-  for (i = 0; ; ++i)
-  {
-    j = i % alignmentData->sequenceCount;
-    if (i < alignmentData->sequenceCount) 
-     {
-       if (token.tokenType == PLL_TOKEN_EOF)
-        {
-          rc = parsedOk (sequenceLength, alignmentData->sequenceCount, alignmentData->sequenceLength);
-          rax_free (sequenceLength);
-          return (rc);
-        }
-
-       if (token.tokenType == PLL_TOKEN_UNKNOWN)
-        {
-          rax_free (sequenceLength);
-          return (0);
-        }
-
-       CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
-
-
-       if (token.tokenType != PLL_TOKEN_STRING && token.tokenType != PLL_TOKEN_NUMBER && token.tokenType != PLL_TOKEN_FLOAT)
-        {
-          rax_free (sequenceLength);
-          return (0);
-        }
-       alignmentData->sequenceLabels[i + 1] = my_strndup (token.lexeme, token.len);
-       NEXT_TOKEN
-       CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
-     }
-    
-    while (1)
-     {
-       if (token.tokenType == PLL_TOKEN_EOF)
-        {
-          rc = parsedOk (sequenceLength, alignmentData->sequenceCount, alignmentData->sequenceLength);
-          rax_free (sequenceLength);
-          return (rc);
-        }
-
-       if (token.tokenType == PLL_TOKEN_UNKNOWN)
-        {
-         rax_free (sequenceLength);
-         return (0);
-        }
-       
-       if (token.tokenType == PLL_TOKEN_NEWLINE) break;
-
-       if (token.tokenType != PLL_TOKEN_STRING)
-        {
-          rax_free (sequenceLength);
-          return (0);
-        }
-
-       if (sequenceLength[j + 1] + token.len > alignmentData->sequenceLength) 
-        {
-          fprintf (stderr, "Sequence %d is larger than specified\n", j + 1);
-          rax_free (sequenceLength);
-          return (0);
-        }
-       memmove (alignmentData->sequenceData[j + 1] + sequenceLength[j + 1], token.lexeme, token.len);
-       sequenceLength[j + 1] += token.len;
-
-       NEXT_TOKEN
-       CONSUME (PLL_TOKEN_WHITESPACE)
-     }
-    CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE);
-  }
-}
-
-/* Phylip parsers. Use the following attributed grammar 
- * 
- *        S -> HEADER ENDL DATA
- *   HEADER -> PLL_TOKEN_NUMBER PLL_TOKEN_WHITESPACE PLL_TOKEN_NUMBER ENDL |
- *             PLL_TOKEN_WHITESPACE PLL_TOKEN_NUMBER PLL_TOKEN_WHITESPACE PLL_TOKEN_NUMBER ENDL
- *     ENDL -> PLL_TOKEN_WHITESPACE PLL_TOKEN_NEWLINE | PLL_TOKEN_NEWLINE
- *     DATA -> PLL_TOKEN_STRING PLL_TOKEN_WHITESPACE PLL_TOKEN_STRING ENDL DATA |
- *             PLL_TOKEN_WHITESPACE PLL_TOKEN_STRING PLL_TOKEN_WHITESPACE PLL_TOKEN_STRING ENDL DATA | 
- *             PLL_TOKEN_STRING PLL_TOKEN_WHITESPACE PLL_TOKEN_STRING PLL_TOKEN_EOF |
- *             PLL_TOKEN_WHITESPACE PLL_TOKEN_STRING PLL_TOKEN_WHITESPACE PLL_TOKEN_STRING PLL_TOKEN_EOF
- */
-
-/** @ingroup alignmentGroup
-    @brief Parse a PHYLIP file
-
-    Parses the PHYLIP file \a filename and returns a ::pllAlignmentData structure
-    with the alignment.
-
-    @param filename
-      Name of file to be parsed
-
-    @return
-      Returns a structure of type ::pllAlignmentData that contains the alignment, or \b NULL
-      in case of failure.
-*/
-static pllAlignmentData *
-pllParsePHYLIP (const char * filename)
-{
-  int 
-    i, input, sequenceCount, sequenceLength;
-  char * rawdata;
-  long filesize;
-  pllAlignmentData * alignmentData;
-
-  rawdata = pllReadFile (filename, &filesize);
-  if (!rawdata)
-   {
-     errno = PLL_ERROR_FILE_OPEN;
-     return (NULL);
-   }
-  
-  init_lexan (rawdata, filesize);
-  input = get_next_symbol();
-
-  /* parse the header to obtain the number of taxa and sequence length */
-  if (!read_phylip_header (&input, &sequenceCount, &sequenceLength))
-   {
-     rax_free (rawdata);
-     fprintf (stderr, "Error while parsing PHYLIP header (number of taxa and sequence length)\n");
-     errno = PLL_ERROR_PHYLIP_HEADER_SYNTAX;
-     return (NULL);
-   }
-
-  lex_table_amend_phylip();
-
-  /* allocate alignment structure */
-  alignmentData = pllInitAlignmentData (sequenceCount, sequenceLength);
-
-  if (! parse_phylip (alignmentData, input))
-   {
-     errno = PLL_ERROR_PHYLIP_BODY_SYNTAX;
-     pllAlignmentDataDestroy (alignmentData);
-     lex_table_restore();
-     rax_free (rawdata);
-     return (NULL);
-   }
-  
-  lex_table_restore();
-  rax_free (rawdata);
-
-  alignmentData->siteWeights  = (int *) rax_malloc (alignmentData->sequenceLength * sizeof (int));
-  for (i = 0; i < alignmentData->sequenceLength; ++ i) 
-    alignmentData->siteWeights[i] = 1;
-
-  return (alignmentData);
-}
-
-pllAlignmentData *
-pllParsePHYLIPString (const char *rawdata, long filesize)
-{
-  int
-    i, input, sequenceCount, sequenceLength;
-//  char * rawdata;
-//  long filesize;
-  pllAlignmentData * alignmentData;
-
-//  rawdata = pllReadFile (filename, &filesize);
-//  if (!rawdata)
-//   {
-//     errno = PLL_ERROR_FILE_OPEN;
-//     return (NULL);
-//   }
-
-  init_lexan (rawdata, filesize);
-  input = get_next_symbol();
-
-  /* parse the header to obtain the number of taxa and sequence length */
-  if (!read_phylip_header (&input, &sequenceCount, &sequenceLength))
-   {
-//     rax_free (rawdata);
-     fprintf (stderr, "Error while parsing PHYLIP header (number of taxa and sequence length)\n");
-     errno = PLL_ERROR_PHYLIP_HEADER_SYNTAX;
-     return (NULL);
-   }
-
-  lex_table_amend_phylip();
-
-  /* allocate alignment structure */
-  alignmentData = pllInitAlignmentData (sequenceCount, sequenceLength);
-
-  if (! parse_phylip (alignmentData, input))
-   {
-     errno = PLL_ERROR_PHYLIP_BODY_SYNTAX;
-     pllAlignmentDataDestroy (alignmentData);
-     lex_table_restore();
-//     rax_free (rawdata);
-     return (NULL);
-   }
-
-  lex_table_restore();
-//  rax_free (rawdata);
-
-  alignmentData->siteWeights  = (int *) rax_malloc (alignmentData->sequenceLength * sizeof (int));
-  for (i = 0; i < alignmentData->sequenceLength; ++ i)
-    alignmentData->siteWeights[i] = 1;
-
-  return (alignmentData);
-}
-
-/* FASTA routines */
-/* only check whether it is a valid alignment in fasta format */
-/** @ingroup alignmentGroup
-    @brief Get information about the FASTA alignment
-
-    Get the information such as number of sequences and length of sequences of a FASTA alignment
-
-    @return
-      Returns \b PLL_TRUE if the alignment is valid, otherwise \b PLL_FALSE
-*/
-static int
-getFastaAlignmentInfo (int * inp, int * seqCount, int * seqLen)
-{
-  pllLexToken token;
-  int input;
-
-  input = *inp;
-
-  *seqCount = *seqLen = 0;
-
-  NEXT_TOKEN
-  CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
-
-  if (token.tokenType != PLL_TOKEN_NUMBER && token.tokenType != PLL_TOKEN_STRING) return (PLL_FALSE);
-
-  while (1)
-   {
-     switch (token.tokenType)
-      {
-        case PLL_TOKEN_EOF:
-          return (PLL_TRUE);
-
-        case PLL_TOKEN_NUMBER:
-        case PLL_TOKEN_STRING:
-          if (token.len < 2 || token.lexeme[0] != '>') return (0);
-          break;
-        default:
-          return (PLL_FALSE);
-      }
-     
-     NEXT_TOKEN
-     CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
-
-     /* read second token (sequence) */
-     switch (token.tokenType)
-      {
-        case PLL_TOKEN_EOF:
-          return (PLL_FALSE);
-          break;
-
-        case PLL_TOKEN_NUMBER:
-        case PLL_TOKEN_STRING:
-          if (!*seqLen)
-            *seqLen = token.len;
-          else
-           {
-             if (*seqLen != token.len) return (0);
-           }
-          break;
-        default:
-          return (PLL_FALSE);
-      }
-     NEXT_TOKEN
-     CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
-     ++ (*seqCount);
-   }
-
-  return (PLL_TRUE);
-}
-
-/** @ingroup alignmentGroup
-    @brief Check whether the FASTA content is valid
-*/
-static int
-parseFastaAlignment (pllAlignmentData * alignmentData, int input)
-{
-  pllLexToken token;
-  int i;
-
-  NEXT_TOKEN
-  CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
-
-  if (token.tokenType != PLL_TOKEN_NUMBER && token.tokenType != PLL_TOKEN_STRING) return (0);
-
-  i = 1;
-  while (1)
-   {
-     /* first parse the sequence label */
-     switch (token.tokenType)
-      {
-        case PLL_TOKEN_EOF:
-          return (1);
-          break;
-
-        case PLL_TOKEN_NUMBER:
-        case PLL_TOKEN_STRING:
-          alignmentData->sequenceLabels[i] = my_strndup (token.lexeme + 1, token.len - 1);
-          break;
-        default:
-          return (0);
-      }
-     
-     NEXT_TOKEN
-     CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
-
-     /* now parse the sequence itself */
-     switch (token.tokenType)
-      {
-        case PLL_TOKEN_EOF:
-          return (0);
-          break;
-
-        case PLL_TOKEN_NUMBER:
-        case PLL_TOKEN_STRING:
-          memmove (alignmentData->sequenceData[i], token.lexeme, token.len);
-          break;
-        default:
-          return (0);
-      }
-     NEXT_TOKEN
-     CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
-     ++ i;
-   }
-}
-
-
-/** @ingroup alignmentGroup
-    @brief Parse a FASTA file
-    
-    Parses the FASTA file \a filename and returns a ::pllAlignmentData structure
-    with the alignment.
-
-    @param filename
-      Name of file to be parsed
-
-    @return
-      Returns a structure of type ::pllAlignmentData that contains the alignment, or \b NULL
-      in case of failure.
-*/
-static pllAlignmentData *
-pllParseFASTA (const char * filename)
-{
-  int
-    i,
-    seqLen,
-    seqCount,
-    input;
-  long filesize;
-
-  char * rawdata;
-  pllAlignmentData * alignmentData;
-
-  rawdata = pllReadFile (filename, &filesize);
-  if (!rawdata)
-   {
-     errno = PLL_ERROR_FILE_OPEN;
-     return (NULL);
-   }
-
-  lex_table_amend_fasta ();
-  
-  init_lexan (rawdata, filesize);
-  input = get_next_symbol ();
-
-
-  if (!getFastaAlignmentInfo (&input, &seqCount, &seqLen))
-   {
-     errno = PLL_ERROR_FASTA_SYNTAX;
-     lex_table_restore ();
-     rax_free (rawdata);
-     return (NULL);
-   }
-  
-  alignmentData = pllInitAlignmentData (seqCount, seqLen);
-  
-  printf ("\n---------------\n\n");
-
-  init_lexan (rawdata, filesize);
-  input = get_next_symbol ();
-
-  if (!parseFastaAlignment (alignmentData, input))
-   {
-     errno = PLL_ERROR_FASTA_SYNTAX;
-     pllAlignmentDataDestroy (alignmentData);
-     lex_table_restore();
-     rax_free(rawdata);
-     return (NULL);
-   }
-
-  /* allocate alignment structure */
-
-
-  lex_table_restore ();
-  rax_free (rawdata);
-
-  alignmentData->siteWeights = (int *) rax_malloc (alignmentData->sequenceLength * sizeof (int));
-  for (i = 0; i < alignmentData->sequenceLength; ++ i)
-    alignmentData->siteWeights[i] = 1;
-
-  return (alignmentData);
-}
-
-
-
-/** @ingroup alignmentGroup
-    @brief Parse a file that contains a multiple sequence alignment
-
-    Parses the file \a filename of type \a fileType which contains a multiple sequence alignment.
-    The supported file types are the sequential and interleaved versions of PHYLIP format, and
-    the FASTA format. The parsed alignment is returned as a pointer to a structure of type
-    ::pllAlignmentData
-
-    @param fileType
-      Type of file to parse. Can be either \b PLL_FORMAT_PHYLIP or \b PLL_FORMAT_FASTA
-
-    @param filename
-      Name of file to parse
-
-    @return
-      Returns a structure of type ::pllAlignmentData that contains the multiple sequence alignment,
-      otherwise returns \b NULL in case of failure.
-*/
-pllAlignmentData *
-pllParseAlignmentFile (int fileType, const char * filename)
-{
-
-  switch (fileType)
-   {
-     case PLL_FORMAT_PHYLIP:
-       return (pllParsePHYLIP (filename));
-     case PLL_FORMAT_FASTA:
-       return (pllParseFASTA (filename));
-     default:
-       /* RTFM */
-       errno = PLL_ERROR_INVALID_FILETYPE;
-       return (NULL);
-   }
-}
diff --git a/pllrepo/src/avxLikelihood.c b/pllrepo/src/avxLikelihood.c
deleted file mode 100644
index 5202883..0000000
--- a/pllrepo/src/avxLikelihood.c
+++ /dev/null
@@ -1,4111 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file avxLikelihood.c
- *
- * @brief AVX versions of the likelihood functions
- *
- * AVX versions of the likelihood functions
- */
-#ifndef WIN32
-#include <unistd.h>
-#endif
-
-#include <math.h>
-#include <time.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <stdint.h>
-#include <limits.h>
-#include <stdint.h>
-#include <xmmintrin.h>
-#include <pmmintrin.h>
-#include <immintrin.h>
-#include <assert.h>
-
-#ifdef _FMA
-#include <x86intrin.h>
-#define FMAMACC(a,b,c) _mm256_fmadd_pd(b,c,a)
-#endif
-
-#include "pll.h"
-#include "pllInternal.h"
-
-extern const unsigned int mask32[32];
-
-PLL_ALIGN_BEGIN const union PLL_ALIGN_END
-{
-  uint64_t i[4];
-  __m256d m;
-  
-} absMask_AVX = {{0x7fffffffffffffffULL, 0x7fffffffffffffffULL, 0x7fffffffffffffffULL, 0x7fffffffffffffffULL}};
-
-
-
-static __inline __m256d hadd4(__m256d v, __m256d u)
-{ 
-  __m256d
-    a, b;
-  
-  v = _mm256_hadd_pd(v, v);
-  a = _mm256_permute2f128_pd(v, v, 1);
-  v = _mm256_add_pd(a, v);
-
-  u = _mm256_hadd_pd(u, u);
-  b = _mm256_permute2f128_pd(u, u, 1);
-  u = _mm256_add_pd(b, u);
-
-  v = _mm256_mul_pd(v, u);	
-  
-  return v;
-}
-
-static __inline __m256d hadd3(__m256d v)
-{ 
-  __m256d
-    a;
-  
-  v = _mm256_hadd_pd(v, v);
-  a = _mm256_permute2f128_pd(v, v, 1);
-  v = _mm256_add_pd(a, v);
-  
-  return v;
-}
-
-
-void  newviewGTRGAMMA_AVX(int tipCase,
-			 double *x1, double *x2, double *x3,
-			 double *extEV, double *tipVector,
-			 int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-			 const int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling
-			 )
-{
- 
-  int  
-    i, 
-    k, 
-    scale, 
-    addScale = 0;
- 
-  __m256d 
-    minlikelihood_avx = _mm256_set1_pd(PLL_MINLIKELIHOOD),
-    twoto = _mm256_set1_pd(PLL_TWOTOTHE256);
- 
-
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:
-      {
-	double 
-	  *uX1, *uX2;
-	PLL_ALIGN_BEGIN double
-	  umpX1[1024] PLL_ALIGN_END,
-	  umpX2[1024] PLL_ALIGN_END;
-
-	for (i = 1; i < 16; i++)
-	  {
-	    __m256d 
-	      tv = _mm256_load_pd(&(tipVector[i * 4]));
-
-	    int 
-	      j;
-	    
-	    for (j = 0; j < 4; j++)
-	      for (k = 0; k < 4; k++)
-		{		 
-		  __m256d 
-		    left1 = _mm256_load_pd(&left[j * 16 + k * 4]);		  		  		  
-
-		  left1 = _mm256_mul_pd(left1, tv);		  
-		  left1 = hadd3(left1);
-		  		  		  
-		  _mm256_store_pd(&umpX1[i * 64 + j * 16 + k * 4], left1);
-		}
-	  
-	    for (j = 0; j < 4; j++)
-	      for (k = 0; k < 4; k++)
-		{		 
-		  __m256d 
-		    left1 = _mm256_load_pd(&right[j * 16 + k * 4]);		  		  		  
-
-		  left1 = _mm256_mul_pd(left1, tv);		  
-		  left1 = hadd3(left1);
-		  		  		  
-		  _mm256_store_pd(&umpX2[i * 64 + j * 16 + k * 4], left1);
-		}	    
-	  }   	
-	  
-
-	for(i = 0; i < n; i++)
-	  {	    		 	    
-	    uX1 = &umpX1[64 * tipX1[i]];
-	    uX2 = &umpX2[64 * tipX2[i]];		  
-	    
-	    for(k = 0; k < 4; k++)
-	      {
-		__m256d	   
-		  xv = _mm256_setzero_pd();
-	       
-		int 
-		  l;
-		
-		for(l = 0; l < 4; l++)
-		  {	       	     				      	      																	   
-		    __m256d
-		      x1v =  _mm256_mul_pd(_mm256_load_pd(&uX1[k * 16 + l * 4]), _mm256_load_pd(&uX2[k * 16 + l * 4]));
-		
-		    __m256d 
-		      evv = _mm256_load_pd(&extEV[l * 4]);
-#ifdef _FMA
-		    xv = FMAMACC(xv,x1v,evv);
-#else						  
-		    xv = _mm256_add_pd(xv, _mm256_mul_pd(x1v, evv));
-#endif
-		  }
-		
-		_mm256_store_pd(&x3[16 * i + 4 * k], xv);
-	      }	         	   	    
-	  }
-      }
-      break;
-    case PLL_TIP_INNER:
-      {
-	double 
-	  *uX1;
-	PLL_ALIGN_BEGIN double
-	  umpX1[1024] PLL_ALIGN_END;
-
-	for (i = 1; i < 16; i++)
-	  {
-	    __m256d 
-	      tv = _mm256_load_pd(&(tipVector[i*4]));
-
-	    int 
-	      j;
-	    
-	    for (j = 0; j < 4; j++)
-	      for (k = 0; k < 4; k++)
-		{		 
-		  __m256d 
-		    left1 = _mm256_load_pd(&left[j * 16 + k * 4]);		  		  		  
-
-		  left1 = _mm256_mul_pd(left1, tv);		  
-		  left1 = hadd3(left1);
-		  		  		  
-		  _mm256_store_pd(&umpX1[i * 64 + j * 16 + k * 4], left1);
-		}	 	   
-	  }   	
-	
-	for(i = 0; i < n; i++)
-	  { 
-	    __m256d
-	      xv[4];	    	   
-	    
-	    scale = 1;
-	    uX1 = &umpX1[64 * tipX1[i]];
-
-	    for(k = 0; k < 4; k++)
-	      {
-		__m256d	   		 
-		  xvr = _mm256_load_pd(&(x2[i * 16 + k * 4]));
-
-		int 
-		  l;
-
-		xv[k]  = _mm256_setzero_pd();
-		  
-		for(l = 0; l < 4; l++)
-		  {	       	     				      	      															
-		    __m256d  
-		      x1v = _mm256_load_pd(&uX1[k * 16 + l * 4]),		     
-		      x2v = _mm256_mul_pd(xvr, _mm256_load_pd(&right[k * 16 + l * 4]));			    
-			
-		    x2v = hadd3(x2v);
-		    x1v = _mm256_mul_pd(x1v, x2v);			
-		
-		    __m256d 
-		      evv = _mm256_load_pd(&extEV[l * 4]);
-			
-#ifdef _FMA
-		    xv[k] = FMAMACC(xv[k],x1v,evv);
-#else			  
-		    xv[k] = _mm256_add_pd(xv[k], _mm256_mul_pd(x1v, evv));
-#endif
-		  }
-		    
-		if(scale)
-		  {
-		    __m256d 	     
-		      v1 = _mm256_and_pd(xv[k], absMask_AVX.m);
-
-		    v1 = _mm256_cmp_pd(v1,  minlikelihood_avx, _CMP_LT_OS);
-		    
-		    if(_mm256_movemask_pd( v1 ) != 15)
-		      scale = 0;
-		  }
-	      }	    
-
-	    if(scale)
-	      {
-		xv[0] = _mm256_mul_pd(xv[0], twoto);
-		xv[1] = _mm256_mul_pd(xv[1], twoto);
-		xv[2] = _mm256_mul_pd(xv[2], twoto);
-		xv[3] = _mm256_mul_pd(xv[3], twoto);
-
-		if(useFastScaling)
-		  addScale += wgt[i];
-		else
-		  ex3[i] += 1;
-	      }
-
-	    _mm256_store_pd(&x3[16 * i],      xv[0]);
-	    _mm256_store_pd(&x3[16 * i + 4],  xv[1]);
-	    _mm256_store_pd(&x3[16 * i + 8],  xv[2]);
-	    _mm256_store_pd(&x3[16 * i + 12], xv[3]);
-	  }
-      }
-      break;
-    case PLL_INNER_INNER:
-      {
-	for(i = 0; i < n; i++)
-	  {	
-	    __m256d
-	      xv[4];
-	    
-	    scale = 1;
-
-	    for(k = 0; k < 4; k++)
-	      {
-		__m256d	   
-		 
-		  xvl = _mm256_load_pd(&(x1[i * 16 + k * 4])),
-		  xvr = _mm256_load_pd(&(x2[i * 16 + k * 4]));
-
-		int 
-		  l;
-
-		xv[k] = _mm256_setzero_pd();
-
-		for(l = 0; l < 4; l++)
-		  {	       	     				      	      															
-		    __m256d 
-		      x1v = _mm256_mul_pd(xvl, _mm256_load_pd(&left[k * 16 + l * 4])),
-		      x2v = _mm256_mul_pd(xvr, _mm256_load_pd(&right[k * 16 + l * 4]));			    
-			
-		    x1v = hadd4(x1v, x2v);			
-		
-		    __m256d 
-		      evv = _mm256_load_pd(&extEV[l * 4]);
-						  
-		    xv[k] = _mm256_add_pd(xv[k], _mm256_mul_pd(x1v, evv));
-		  }
-		
-		if(scale)
-		  {
-		    __m256d 	     
-		      v1 = _mm256_and_pd(xv[k], absMask_AVX.m);
-
-		    v1 = _mm256_cmp_pd(v1,  minlikelihood_avx, _CMP_LT_OS);
-		    
-		    if(_mm256_movemask_pd( v1 ) != 15)
-		      scale = 0;
-		  }
-	      }
-
-	     if(scale)
-	      {
-		xv[0] = _mm256_mul_pd(xv[0], twoto);
-		xv[1] = _mm256_mul_pd(xv[1], twoto);
-		xv[2] = _mm256_mul_pd(xv[2], twoto);
-		xv[3] = _mm256_mul_pd(xv[3], twoto);
-
-		if(useFastScaling)
-		  addScale += wgt[i];
-		else
-		  ex3[i] += 1;		
-	      }
-		
-	    _mm256_store_pd(&x3[16 * i],      xv[0]);
-	    _mm256_store_pd(&x3[16 * i + 4],  xv[1]);
-	    _mm256_store_pd(&x3[16 * i + 8],  xv[2]);
-	    _mm256_store_pd(&x3[16 * i + 12], xv[3]);
-	  }
-      }
-      break;
-    default:
-      assert(0);
-    }
-
-  if(useFastScaling)
-    *scalerIncrement = addScale;
-  
-}
-
-void  newviewGTRGAMMA_AVX_GAPPED_SAVE(int tipCase,
-				      double *x1_start, double *x2_start, double *x3_start,
-				      double *extEV, double *tipVector,
-				      int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-				      const int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling,
-				      unsigned int *x1_gap, unsigned int *x2_gap, unsigned int *x3_gap, 
-				      double *x1_gapColumn, double *x2_gapColumn, double *x3_gapColumn
-				      )
-{
- 
-  int  
-    i, 
-    k, 
-    scale,
-    scaleGap,
-    addScale = 0;
- 
-  __m256d 
-    minlikelihood_avx = _mm256_set1_pd( PLL_MINLIKELIHOOD ),
-    twoto = _mm256_set1_pd(PLL_TWOTOTHE256);
- 
-  double
-    *x1,
-    *x2,
-    *x3,
-    *x1_ptr = x1_start,
-    *x2_ptr = x2_start;
-
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:
-      {
-	double 
-	  *uX1, *uX2;
-	PLL_ALIGN_BEGIN double
-	  umpX1[1024] PLL_ALIGN_END,
-	  umpX2[1024] PLL_ALIGN_END;
-
-	for (i = 1; i < 16; i++)
-	  {
-	    __m256d 
-	      tv = _mm256_load_pd(&(tipVector[i * 4]));
-
-	    int 
-	      j;
-	    
-	    for (j = 0; j < 4; j++)
-	      for (k = 0; k < 4; k++)
-		{		 
-		  __m256d 
-		    left1 = _mm256_load_pd(&left[j * 16 + k * 4]);		  		  		  
-
-		  left1 = _mm256_mul_pd(left1, tv);		  
-		  left1 = hadd3(left1);
-		  		  		  
-		  _mm256_store_pd(&umpX1[i * 64 + j * 16 + k * 4], left1);
-		}
-	  
-	    for (j = 0; j < 4; j++)
-	      for (k = 0; k < 4; k++)
-		{		 
-		  __m256d 
-		    left1 = _mm256_load_pd(&right[j * 16 + k * 4]);		  		  		  
-
-		  left1 = _mm256_mul_pd(left1, tv);		  
-		  left1 = hadd3(left1);
-		  		  		  
-		  _mm256_store_pd(&umpX2[i * 64 + j * 16 + k * 4], left1);
-		}	    
-	  }   	
-	  
-	x3 = x3_gapColumn;
-
-	{
-	  uX1 = &umpX1[960];
-	  uX2 = &umpX2[960];		  
-	  
-	  for(k = 0; k < 4; k++)
-	    {
-	      __m256d	   
-		xv = _mm256_setzero_pd();
-	      
-	      int 
-		l;
-	      
-	      for(l = 0; l < 4; l++)
-		{	       	     				      	      																	   
-		  __m256d
-		    x1v =  _mm256_mul_pd(_mm256_load_pd(&uX1[k * 16 + l * 4]), _mm256_load_pd(&uX2[k * 16 + l * 4]));
-		  
-		  __m256d 
-		    evv = _mm256_load_pd(&extEV[l * 4]);
-#ifdef _FMA
-		  xv = FMAMACC(xv,x1v,evv);
-#else						  
-		  xv = _mm256_add_pd(xv, _mm256_mul_pd(x1v, evv));
-#endif
-		}
-		    
-	      _mm256_store_pd(&x3[4 * k], xv);
-	    }
-	}
-	
-	x3 = x3_start;
-
-	for(i = 0; i < n; i++)
-	  {		    	    	
-	    if(!(x3_gap[i / 32] & mask32[i % 32]))	     
-	      {
-		uX1 = &umpX1[64 * tipX1[i]];
-		uX2 = &umpX2[64 * tipX2[i]];		  
-	    
-		for(k = 0; k < 4; k++)
-		  {
-		    __m256d	   
-		      xv = _mm256_setzero_pd();
-	       
-		    int 
-		      l;
-		
-		    for(l = 0; l < 4; l++)
-		      {	       	     				      	      																	   
-			__m256d
-			  x1v =  _mm256_mul_pd(_mm256_load_pd(&uX1[k * 16 + l * 4]), _mm256_load_pd(&uX2[k * 16 + l * 4]));
-			
-			__m256d 
-			  evv = _mm256_load_pd(&extEV[l * 4]);
-#ifdef _FMA
-			xv = FMAMACC(xv,x1v,evv);
-#else						  
-			xv = _mm256_add_pd(xv, _mm256_mul_pd(x1v, evv));
-#endif
-		      }
-		    
-		    _mm256_store_pd(&x3[4 * k], xv);
-		  }
-
-		x3 += 16;
-	      }
-	  }
-      }
-      break;
-    case PLL_TIP_INNER:
-      {
-	double 
-	  *uX1;
-	PLL_ALIGN_BEGIN double
-	  umpX1[1024] PLL_ALIGN_END;
-       
-	for (i = 1; i < 16; i++)
-	  {
-	    __m256d 
-	      tv = _mm256_load_pd(&(tipVector[i*4]));
-
-	    int 
-	      j;
-	    
-	    for (j = 0; j < 4; j++)
-	      for (k = 0; k < 4; k++)
-		{		 
-		  __m256d 
-		    left1 = _mm256_load_pd(&left[j * 16 + k * 4]);		  		  		  
-
-		  left1 = _mm256_mul_pd(left1, tv);		  
-		  left1 = hadd3(left1);
-		  		  		  
-		  _mm256_store_pd(&umpX1[i * 64 + j * 16 + k * 4], left1);
-		}	 	   
-	  }	
-
-	{ 
-	  __m256d
-	    xv[4];
-	  
-	  scaleGap = 1;
-	  uX1 = &umpX1[960];
-
-	  x2 = x2_gapColumn;			 
-	  x3 = x3_gapColumn;
-
-	  for(k = 0; k < 4; k++)
-	    {
-	      __m256d	   		 
-		xvr = _mm256_load_pd(&(x2[k * 4]));
-
-	      int 
-		l;
-
-	      xv[k]  = _mm256_setzero_pd();
-		  
-	      for(l = 0; l < 4; l++)
-		{	       	     				      	      															
-		  __m256d  
-		    x1v = _mm256_load_pd(&uX1[k * 16 + l * 4]),		     
-		    x2v = _mm256_mul_pd(xvr, _mm256_load_pd(&right[k * 16 + l * 4]));			    
-			
-		  x2v = hadd3(x2v);
-		  x1v = _mm256_mul_pd(x1v, x2v);			
-		
-		  __m256d 
-		    evv = _mm256_load_pd(&extEV[l * 4]);
-			
-#ifdef _FMA
-		  xv[k] = FMAMACC(xv[k],x1v,evv);
-#else			  
-		  xv[k] = _mm256_add_pd(xv[k], _mm256_mul_pd(x1v, evv));
-#endif
-		}
-		    
-	      if(scaleGap)
-		{
-		  __m256d 	     
-		    v1 = _mm256_and_pd(xv[k], absMask_AVX.m);
-		  
-		  v1 = _mm256_cmp_pd(v1,  minlikelihood_avx, _CMP_LT_OS);
-		    
-		  if(_mm256_movemask_pd( v1 ) != 15)
-		    scaleGap = 0;
-		}
-	    }
-	
-	  if(scaleGap)
-	    {
-	      xv[0] = _mm256_mul_pd(xv[0], twoto);
-	      xv[1] = _mm256_mul_pd(xv[1], twoto);
-	      xv[2] = _mm256_mul_pd(xv[2], twoto);
-	      xv[3] = _mm256_mul_pd(xv[3], twoto);	    
-	    }
-
-	  _mm256_store_pd(&x3[0],      xv[0]);
-	  _mm256_store_pd(&x3[4],  xv[1]);
-	  _mm256_store_pd(&x3[8],  xv[2]);
-	  _mm256_store_pd(&x3[12], xv[3]);
-	}
-	
-	x3 = x3_start;
-	
-	for(i = 0; i < n; i++)
-	  {
-	    if((x3_gap[i / 32] & mask32[i % 32]))
-	      {
-		if(scaleGap)
-		  {
-		    if(useFastScaling)
-		      addScale += wgt[i];
-		    else
-		      ex3[i]  += 1;
-		  }
-	      }
-	    else
-	      {
-		if(x2_gap[i / 32] & mask32[i % 32])
-		  x2 = x2_gapColumn;
-		else
-		  {
-		    x2 = x2_ptr;
-		    x2_ptr += 16;
-		  }
-		
-		__m256d
-		  xv[4];	    	   
-		
-		scale = 1;
-		uX1 = &umpX1[64 * tipX1[i]];
-		
-		for(k = 0; k < 4; k++)
-		  {
-		    __m256d	   		 
-		      xvr = _mm256_load_pd(&(x2[k * 4]));
-		    
-		    int 
-		      l;
-		    
-		    xv[k]  = _mm256_setzero_pd();
-		    
-		    for(l = 0; l < 4; l++)
-		      {	       	     				      	      															
-			__m256d  
-			  x1v = _mm256_load_pd(&uX1[k * 16 + l * 4]),		     
-			  x2v = _mm256_mul_pd(xvr, _mm256_load_pd(&right[k * 16 + l * 4]));			    
-			
-			x2v = hadd3(x2v);
-			x1v = _mm256_mul_pd(x1v, x2v);			
-			
-			__m256d 
-			  evv = _mm256_load_pd(&extEV[l * 4]);
-			
-#ifdef _FMA
-			xv[k] = FMAMACC(xv[k],x1v,evv);
-#else			  
-			xv[k] = _mm256_add_pd(xv[k], _mm256_mul_pd(x1v, evv));
-#endif
-		      }
-		    
-		    if(scale)
-		      {
-			__m256d 	     
-			  v1 = _mm256_and_pd(xv[k], absMask_AVX.m);
-			
-			v1 = _mm256_cmp_pd(v1,  minlikelihood_avx, _CMP_LT_OS);
-			
-			if(_mm256_movemask_pd( v1 ) != 15)
-			  scale = 0;
-		      }
-		  }	    
-	      
-		if(scale)
-		  {
-		    xv[0] = _mm256_mul_pd(xv[0], twoto);
-		    xv[1] = _mm256_mul_pd(xv[1], twoto);
-		    xv[2] = _mm256_mul_pd(xv[2], twoto);
-		    xv[3] = _mm256_mul_pd(xv[3], twoto);
-
-		    if(useFastScaling)
-		      addScale += wgt[i];
-		    else
-		      ex3[i] += 1;		   
-		  }
-	      
-		_mm256_store_pd(&x3[0],      xv[0]);
-		_mm256_store_pd(&x3[4],  xv[1]);
-		_mm256_store_pd(&x3[8],  xv[2]);
-		_mm256_store_pd(&x3[12], xv[3]);
-	      
-		x3 += 16;
-	      }
-	  }
-      }
-      break;
-    case PLL_INNER_INNER:
-      {          
-	{		
-	  x1 = x1_gapColumn;	     	    
-	  x2 = x2_gapColumn;	    
-	  x3 = x3_gapColumn;
-
-	  __m256d
-	    xv[4];
-	    
-	  scaleGap = 1;
-
-	  for(k = 0; k < 4; k++)
-	    {
-	      __m256d	   
-		
-		xvl = _mm256_load_pd(&(x1[k * 4])),
-		xvr = _mm256_load_pd(&(x2[k * 4]));
-
-	      int 
-		l;
-
-	      xv[k] = _mm256_setzero_pd();
-
-	      for(l = 0; l < 4; l++)
-		{	       	     				      	      															
-		  __m256d 
-		    x1v = _mm256_mul_pd(xvl, _mm256_load_pd(&left[k * 16 + l * 4])),
-		    x2v = _mm256_mul_pd(xvr, _mm256_load_pd(&right[k * 16 + l * 4]));			    
-		  
-		  x1v = hadd4(x1v, x2v);			
-		  
-		  __m256d 
-		    evv = _mm256_load_pd(&extEV[l * 4]);
-		  
-		  xv[k] = _mm256_add_pd(xv[k], _mm256_mul_pd(x1v, evv));
-		}
-		
-	      if(scaleGap)
-		  {
-		    __m256d 	     
-		      v1 = _mm256_and_pd(xv[k], absMask_AVX.m);
-
-		    v1 = _mm256_cmp_pd(v1,  minlikelihood_avx, _CMP_LT_OS);
-		    
-		    if(_mm256_movemask_pd( v1 ) != 15)
-		      scaleGap = 0;
-		  }
-	    }
-
-	  if(scaleGap)
-	    {
-	      xv[0] = _mm256_mul_pd(xv[0], twoto);
-	      xv[1] = _mm256_mul_pd(xv[1], twoto);
-	      xv[2] = _mm256_mul_pd(xv[2], twoto);
-	      xv[3] = _mm256_mul_pd(xv[3], twoto);	       
-	    }
-		
-	  _mm256_store_pd(&x3[0],  xv[0]);
-	  _mm256_store_pd(&x3[4],  xv[1]);
-	  _mm256_store_pd(&x3[8],  xv[2]);
-	  _mm256_store_pd(&x3[12], xv[3]);
-	}	  
-      
-	x3 = x3_start;
-
-	for(i = 0; i < n; i++)
-	  {
-	    if(x3_gap[i / 32] & mask32[i % 32])
-	      {	     
-		if(scaleGap)
-		  {
-		    if(useFastScaling)
-		      addScale += wgt[i];
-		    else
-		      ex3[i]  += 1; 	       
-		  }
-	      }
-	    else
-	      {	
-		if(x1_gap[i / 32] & mask32[i % 32])
-		  x1 = x1_gapColumn;
-		else
-		  {
-		    x1 = x1_ptr;
-		    x1_ptr += 16;
-		  }
-	     
-		if(x2_gap[i / 32] & mask32[i % 32])
-		  x2 = x2_gapColumn;
-		else
-		  {
-		    x2 = x2_ptr;
-		    x2_ptr += 16;
-		  }
-
-		__m256d
-		  xv[4];
-	    
-		scale = 1;
-
-		for(k = 0; k < 4; k++)
-		  {
-		    __m256d	   
-		      
-		      xvl = _mm256_load_pd(&(x1[k * 4])),
-		      xvr = _mm256_load_pd(&(x2[k * 4]));
-		    
-		    int 
-		      l;
-		    
-		    xv[k] = _mm256_setzero_pd();
-		    
-		    for(l = 0; l < 4; l++)
-		      {	       	     				      	      															
-			__m256d 
-			  x1v = _mm256_mul_pd(xvl, _mm256_load_pd(&left[k * 16 + l * 4])),
-			  x2v = _mm256_mul_pd(xvr, _mm256_load_pd(&right[k * 16 + l * 4]));			    
-			
-			x1v = hadd4(x1v, x2v);			
-			
-			__m256d 
-			  evv = _mm256_load_pd(&extEV[l * 4]);
-			
-			xv[k] = _mm256_add_pd(xv[k], _mm256_mul_pd(x1v, evv));
-		      }
-		    
-		    if(scale)
-		      {
-			__m256d 	     
-			  v1 = _mm256_and_pd(xv[k], absMask_AVX.m);
-			
-			v1 = _mm256_cmp_pd(v1,  minlikelihood_avx, _CMP_LT_OS);
-			
-			if(_mm256_movemask_pd( v1 ) != 15)
-			  scale = 0;
-		      }
-		  }
-
-		if(scale)
-		  {
-		    xv[0] = _mm256_mul_pd(xv[0], twoto);
-		    xv[1] = _mm256_mul_pd(xv[1], twoto);
-		    xv[2] = _mm256_mul_pd(xv[2], twoto);
-		    xv[3] = _mm256_mul_pd(xv[3], twoto);
-		    
-		    if(useFastScaling)
-		      addScale += wgt[i];
-		    else
-		      ex3[i] += 1;
-		  }
-		
-		_mm256_store_pd(&x3[0],      xv[0]);
-		_mm256_store_pd(&x3[4],  xv[1]);
-		_mm256_store_pd(&x3[8],  xv[2]);
-		_mm256_store_pd(&x3[12], xv[3]);
-	      
-		x3 += 16;
-	      }
-	  }
-      }
-      break;
-    default:
-      assert(0);
-    }
-
-  if(useFastScaling)
-    *scalerIncrement = addScale;
-  
-}
-
-
-
-
-void newviewGTRCAT_AVX(int tipCase,  double *EV,  int *cptr,
-			   double *x1_start, double *x2_start,  double *x3_start, double *tipVector,
-			   int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-			   int n,  double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling)
-{
-  double
-    *le,
-    *ri,
-    *x1,
-    *x2;
-    
-  int 
-    i, 
-    addScale = 0;
-   
-  __m256d 
-    minlikelihood_avx = _mm256_set1_pd( PLL_MINLIKELIHOOD ),
-    twoto = _mm256_set1_pd(PLL_TWOTOTHE256);
-  
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:      
-      for (i = 0; i < n; i++)
-	{	 
-	  int 
-	    l;
-	  
-	  le = &left[cptr[i] * 16];
-	  ri = &right[cptr[i] * 16];
-
-	  x1 = &(tipVector[4 * tipX1[i]]);
-	  x2 = &(tipVector[4 * tipX2[i]]);
-	  
-	  __m256d	   
-	    vv = _mm256_setzero_pd();
-	   	   	    
-	  for(l = 0; l < 4; l++)
-	    {	       	     				      	      															
-	      __m256d 
-		x1v = _mm256_mul_pd(_mm256_load_pd(x1), _mm256_load_pd(&le[l * 4])),
-		x2v = _mm256_mul_pd(_mm256_load_pd(x2), _mm256_load_pd(&ri[l * 4]));			    
-			
-	      x1v = hadd4(x1v, x2v);			
-		
-	      __m256d 
-		evv = _mm256_load_pd(&EV[l * 4]);
-#ifdef _FMA
-	      vv = FMAMACC(vv,x1v,evv);
-#else				
-	      vv = _mm256_add_pd(vv, _mm256_mul_pd(x1v, evv));						      	
-#endif
-	    }	  		  
-
-	  _mm256_store_pd(&x3_start[4 * i], vv);	    	   	    
-	}
-      break;
-    case PLL_TIP_INNER:      
-      for (i = 0; i < n; i++)
-	{
-	  int 
-	    l;
-
-	  x1 = &(tipVector[4 * tipX1[i]]);
-	  x2 = &x2_start[4 * i];	 
-	  
-	  le =  &left[cptr[i] * 16];
-	  ri =  &right[cptr[i] * 16];
-
-	  __m256d	   
-	    vv = _mm256_setzero_pd();
-	  
-	  for(l = 0; l < 4; l++)
-	    {	       	     				      	      															
-	      __m256d 
-		x1v = _mm256_mul_pd(_mm256_load_pd(x1), _mm256_load_pd(&le[l * 4])),
-		x2v = _mm256_mul_pd(_mm256_load_pd(x2), _mm256_load_pd(&ri[l * 4]));			    
-			
-	      x1v = hadd4(x1v, x2v);			
-		
-	      __m256d 
-		evv = _mm256_load_pd(&EV[l * 4]);
-				
-#ifdef _FMA
-	      vv = FMAMACC(vv,x1v,evv);
-#else	      
-	      vv = _mm256_add_pd(vv, _mm256_mul_pd(x1v, evv));
-#endif
-	    }	  		  
-	  
-	  
-	  __m256d 	     
-	    v1 = _mm256_and_pd(vv, absMask_AVX.m);
-
-	  v1 = _mm256_cmp_pd(v1,  minlikelihood_avx, _CMP_LT_OS);
-	    
-	  if(_mm256_movemask_pd( v1 ) == 15)
-	    {	     	      
-	      vv = _mm256_mul_pd(vv, twoto);	      
-	      
-	      if(useFastScaling)
-		addScale += wgt[i];
-	      else
-		ex3[i] += 1;	      	     
-	    }       
-	  
-	  _mm256_store_pd(&x3_start[4 * i], vv);	 	  	  
-	}
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-	{
-	  int 
-	    l;
-
-	  x1 = &x1_start[4 * i];
-	  x2 = &x2_start[4 * i];
-	  
-	  
-	  le =  &left[cptr[i] * 16];
-	  ri =  &right[cptr[i] * 16];
-
-	  __m256d	   
-	    vv = _mm256_setzero_pd();
-	  
-	  for(l = 0; l < 4; l++)
-	    {	       	     				      	      															
-	      __m256d 
-		x1v = _mm256_mul_pd(_mm256_load_pd(x1), _mm256_load_pd(&le[l * 4])),
-		x2v = _mm256_mul_pd(_mm256_load_pd(x2), _mm256_load_pd(&ri[l * 4]));			    
-			
-	      x1v = hadd4(x1v, x2v);			
-		
-	      __m256d 
-		evv = _mm256_load_pd(&EV[l * 4]);
-#ifdef _FMA
-	      vv = FMAMACC(vv,x1v,evv);
-#else						
-	      vv = _mm256_add_pd(vv, _mm256_mul_pd(x1v, evv));						      	
-#endif
-	    }	  		  
-
-	 
-	  __m256d 	     
-	    v1 = _mm256_and_pd(vv, absMask_AVX.m);
-
-	  v1 = _mm256_cmp_pd(v1,  minlikelihood_avx, _CMP_LT_OS);
-	    
-	  if(_mm256_movemask_pd( v1 ) == 15)
-	    {	
-	      vv = _mm256_mul_pd(vv, twoto);
-	      
-	      if(useFastScaling)
-		addScale += wgt[i];
-	      else
-		ex3[i] += 1;	   
-	    }	
-
-	  _mm256_store_pd(&x3_start[4 * i], vv);
-	  	  
-	}
-      break;
-    default:
-      assert(0);
-    }
-
-  if(useFastScaling)
-    *scalerIncrement = addScale;
-}
-
-
-void newviewGTRCAT_AVX_GAPPED_SAVE(int tipCase,  double *EV,  int *cptr,
-				   double *x1_start, double *x2_start,  double *x3_start, double *tipVector,
-				   int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-				   int n,  double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling,
-				   unsigned int *x1_gap, unsigned int *x2_gap, unsigned int *x3_gap,
-				   double *x1_gapColumn, double *x2_gapColumn, double *x3_gapColumn, const int maxCats)
-{
-  double
-    *le,
-    *ri,
-    *x1,
-    *x2, 
-    *x3,
-    *x1_ptr = x1_start,
-    *x2_ptr = x2_start, 
-    *x3_ptr = x3_start;
-  
-  int 
-    i, 
-    scaleGap = 0,
-    addScale = 0;
-   
-  __m256d 
-    minlikelihood_avx = _mm256_set1_pd( PLL_MINLIKELIHOOD ),
-    twoto = _mm256_set1_pd(PLL_TWOTOTHE256);
-  
-
-  {
-    int 
-      l;
-
-    x1 = x1_gapColumn;	      
-    x2 = x2_gapColumn;
-    x3 = x3_gapColumn;    	 
-	  	  
-    le =  &left[maxCats * 16];
-    ri =  &right[maxCats * 16];
-
-    __m256d	   
-      vv = _mm256_setzero_pd();
-	  
-    for(l = 0; l < 4; l++)
-      {	       	     				      	      															
-	__m256d 
-	  x1v = _mm256_mul_pd(_mm256_load_pd(x1), _mm256_load_pd(&le[l * 4])),
-	  x2v = _mm256_mul_pd(_mm256_load_pd(x2), _mm256_load_pd(&ri[l * 4]));			    
-	
-	x1v = hadd4(x1v, x2v);			
-	
-	__m256d 
-	  evv = _mm256_load_pd(&EV[l * 4]);
-#ifdef _FMA
-	vv = FMAMACC(vv,x1v,evv);
-#else						
-	vv = _mm256_add_pd(vv, _mm256_mul_pd(x1v, evv));						      	
-#endif
-      }	  		  
-
-    if(tipCase != PLL_TIP_TIP)
-      {
-	__m256d 	     
-	  v1 = _mm256_and_pd(vv, absMask_AVX.m);
-    
-	v1 = _mm256_cmp_pd(v1,  minlikelihood_avx, _CMP_LT_OS);
-    
-	if(_mm256_movemask_pd( v1 ) == 15)
-	  {
-	    vv = _mm256_mul_pd(vv, twoto);	      	 
-	    scaleGap = 1;
-	  }
-      }
-    
-    _mm256_store_pd(x3, vv);    
-  }
-
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:      
-      for (i = 0; i < n; i++)
-	{ 
-	  if(noGap(x3_gap, i))
-	    {	 
-	      int 
-		l;
-	      
-	      x1 = &(tipVector[4 * tipX1[i]]);
-	      x2 = &(tipVector[4 * tipX2[i]]);
-
-	      x3 = x3_ptr;
-
-	      if(isGap(x1_gap, i))
-		le =  &left[maxCats * 16];
-	      else	  	  
-		le =  &left[cptr[i] * 16];	  
-	      
-	      if(isGap(x2_gap, i))
-		ri =  &right[maxCats * 16];
-	      else	 	  
-		ri =  &right[cptr[i] * 16];
-	  	  
-	      __m256d	   
-		vv = _mm256_setzero_pd();
-	      
-	      for(l = 0; l < 4; l++)
-		{	       	     				      	      															
-		  __m256d 
-		    x1v = _mm256_mul_pd(_mm256_load_pd(x1), _mm256_load_pd(&le[l * 4])),
-		    x2v = _mm256_mul_pd(_mm256_load_pd(x2), _mm256_load_pd(&ri[l * 4]));			    
-		  
-		  x1v = hadd4(x1v, x2v);			
-		  
-		  __m256d 
-		    evv = _mm256_load_pd(&EV[l * 4]);
-#ifdef _FMA
-		  vv = FMAMACC(vv,x1v,evv);
-#else				
-		  vv = _mm256_add_pd(vv, _mm256_mul_pd(x1v, evv));						      	
-#endif
-		}	  		  
-
-	      _mm256_store_pd(x3, vv);	 
-	      
-	      x3_ptr += 4;
-	    }
-	}
-      break;
-    case PLL_TIP_INNER:      
-      for (i = 0; i < n; i++)
-	{ 
-	  if(isGap(x3_gap, i))
-	    {
-	      if(scaleGap)
-		{
-		  if(useFastScaling)
-		    addScale += wgt[i];
-		  else
-		    ex3[i] += 1;		   		    
-		}	       
-	    }
-	  else
-	    {
-	      int 
-		l;
-
-	      x1 = &(tipVector[4 * tipX1[i]]);    
-	      x3 = x3_ptr;
-
-	      if(isGap(x1_gap, i))
-		le =  &left[maxCats * 16];
-	      else
-		le =  &left[cptr[i] * 16];
-	  
-	      if(isGap(x2_gap, i))
-		{		 
-		  ri =  &right[maxCats * 16];
-		  x2 = x2_gapColumn;
-		}
-	      else
-		{
-		  ri =  &right[cptr[i] * 16];
-		  x2 = x2_ptr;
-		  x2_ptr += 4;
-		}	  	 
-
-	      __m256d	   
-		vv = _mm256_setzero_pd();
-	      
-	      for(l = 0; l < 4; l++)
-		{	       	     				      	      															
-		  __m256d 
-		    x1v = _mm256_mul_pd(_mm256_load_pd(x1), _mm256_load_pd(&le[l * 4])),
-		    x2v = _mm256_mul_pd(_mm256_load_pd(x2), _mm256_load_pd(&ri[l * 4]));			    
-		  
-		  x1v = hadd4(x1v, x2v);			
-		  
-		  __m256d 
-		    evv = _mm256_load_pd(&EV[l * 4]);
-		  
-#ifdef _FMA
-		  vv = FMAMACC(vv,x1v,evv);
-#else	      
-		  vv = _mm256_add_pd(vv, _mm256_mul_pd(x1v, evv));
-#endif
-		}	  		  
-	  
-	  
-	      __m256d 	     
-		v1 = _mm256_and_pd(vv, absMask_AVX.m);
-	      
-	      v1 = _mm256_cmp_pd(v1,  minlikelihood_avx, _CMP_LT_OS);
-	      
-	      if(_mm256_movemask_pd( v1 ) == 15)
-		{	     	      
-		  vv = _mm256_mul_pd(vv, twoto);	      
-		  
-		  if(useFastScaling)
-		    addScale += wgt[i];
-		  else
-		    ex3[i] += 1;		 
-		}       
-	  
-	      _mm256_store_pd(x3, vv);	 	  	  
-
-	      x3_ptr += 4;
-	    }
-	}
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-	{
-	  if(isGap(x3_gap, i))
-	    {
-	      if(scaleGap)		   		    
-		{
-		  if(useFastScaling)
-		    addScale += wgt[i];
-		  else
-		    ex3[i] += 1;
-		}	      
-	    }
-	  else
-	    {
-	      int 
-		l;
-	      
-	      x3 = x3_ptr;
-	      
-	      if(isGap(x1_gap, i))
-		{
-		  x1 = x1_gapColumn;
-		  le =  &left[maxCats * 16];
-		}
-	      else
-		{
-		  le =  &left[cptr[i] * 16];
-		  x1 = x1_ptr;
-		  x1_ptr += 4;
-		}
-
-	      if(isGap(x2_gap, i))	
-		{
-		  x2 = x2_gapColumn;
-		  ri =  &right[maxCats * 16];	    
-		}
-	      else
-		{
-		  ri =  &right[cptr[i] * 16];
-		  x2 = x2_ptr;
-		  x2_ptr += 4;
-		}	 	  	  	  
-	  
-	      __m256d	   
-		vv = _mm256_setzero_pd();
-	      
-	      for(l = 0; l < 4; l++)
-		{	       	     				      	      															
-		  __m256d 
-		    x1v = _mm256_mul_pd(_mm256_load_pd(x1), _mm256_load_pd(&le[l * 4])),
-		    x2v = _mm256_mul_pd(_mm256_load_pd(x2), _mm256_load_pd(&ri[l * 4]));			    
-		  
-		  x1v = hadd4(x1v, x2v);			
-		  
-		  __m256d 
-		    evv = _mm256_load_pd(&EV[l * 4]);
-#ifdef _FMA
-		  vv = FMAMACC(vv,x1v,evv);
-#else						
-		  vv = _mm256_add_pd(vv, _mm256_mul_pd(x1v, evv));						      	
-#endif
-		}	  		  
-	      
-	      
-	      __m256d 	     
-		v1 = _mm256_and_pd(vv, absMask_AVX.m);
-	      
-	      v1 = _mm256_cmp_pd(v1,  minlikelihood_avx, _CMP_LT_OS);
-	      
-	      if(_mm256_movemask_pd( v1 ) == 15)
-		{	
-		  vv = _mm256_mul_pd(vv, twoto);	      
-		  
-		  if(useFastScaling)
-		    addScale += wgt[i];
-		  else
-		    ex3[i] += 1;		
-		}	
-	      
-	      _mm256_store_pd(x3, vv);
-	      
-	      x3_ptr += 4;
-	    }	  	  
-	}
-      break;
-    default:
-      assert(0);
-    }
-
-  if(useFastScaling)
-    *scalerIncrement = addScale;
-}
-
-void newviewGTRCATPROT_AVX(int tipCase, double *extEV,
-			       int *cptr,
-			       double *x1, double *x2, double *x3, double *tipVector,
-			       int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-			       int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling)
-{
-  double
-    *le, *ri, *v, *vl, *vr;
-
-  int i, l, scale, addScale = 0;
-
-#ifdef _FMA
-  int k;
-#endif
-
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:
-      {
-	for (i = 0; i < n; i++)
-	  {	   
-	    le = &left[cptr[i] * 400];
-	    ri = &right[cptr[i] * 400];
-
-	    vl = &(tipVector[20 * tipX1[i]]);
-	    vr = &(tipVector[20 * tipX2[i]]);
-	    v  = &x3[20 * i];	    	    	   	    
-
-	    __m256d vv[5];
-	    
-	    vv[0] = _mm256_setzero_pd();
-	    vv[1] = _mm256_setzero_pd();
-	    vv[2] = _mm256_setzero_pd();
-	    vv[3] = _mm256_setzero_pd();
-	    vv[4] = _mm256_setzero_pd();	   	    
-
-	    for(l = 0; l < 20; l++)
-	      {	       
-		__m256d 
-		  x1v = _mm256_setzero_pd(),
-		  x2v = _mm256_setzero_pd();	
-				
-		double 
-		  *ev = &extEV[l * 20],
-		  *lv = &le[l * 20],
-		  *rv = &ri[l * 20];														
-
-#ifdef _FMA		
-		for(k = 0; k < 20; k += 4) 
-		  {
-		    __m256d vlv = _mm256_load_pd(&vl[k]);
-		    __m256d lvv = _mm256_load_pd(&lv[k]);
-		    x1v = FMAMACC(x1v,vlv,lvv);
-		    __m256d vrv = _mm256_load_pd(&vr[k]);
-		    __m256d rvv = _mm256_load_pd(&rv[k]);
-		    x2v = FMAMACC(x2v,vrv,rvv);
-		  }
-#else		
-		x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[0]), _mm256_load_pd(&lv[0])));
-		x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[4]), _mm256_load_pd(&lv[4])));
-		x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[8]), _mm256_load_pd(&lv[8])));
-		x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[12]), _mm256_load_pd(&lv[12])));
-		x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[16]), _mm256_load_pd(&lv[16])));
-
-		x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[0]), _mm256_load_pd(&rv[0])));			    
-		x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[4]), _mm256_load_pd(&rv[4])));				    
-		x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[8]), _mm256_load_pd(&rv[8])));			    
-		x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[12]), _mm256_load_pd(&rv[12])));				    
-		x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[16]), _mm256_load_pd(&rv[16])));	
-#endif
-
-		x1v = hadd4(x1v, x2v);			
-#ifdef _FMA
-		for(k = 0; k < 5; k++) 
-		  {
-		    __m256d evv = _mm256_load_pd(&ev[k*4]);
-		    vv[k] = FMAMACC(vv[k],x1v,evv);
-		  }	  
-#else		
-		__m256d 
-		  evv[5];
-	    	
-		evv[0] = _mm256_load_pd(&ev[0]);
-		evv[1] = _mm256_load_pd(&ev[4]);
-		evv[2] = _mm256_load_pd(&ev[8]);
-		evv[3] = _mm256_load_pd(&ev[12]);
-		evv[4] = _mm256_load_pd(&ev[16]);		
-		
-		vv[0] = _mm256_add_pd(vv[0], _mm256_mul_pd(x1v, evv[0]));
-		vv[1] = _mm256_add_pd(vv[1], _mm256_mul_pd(x1v, evv[1]));
-		vv[2] = _mm256_add_pd(vv[2], _mm256_mul_pd(x1v, evv[2]));
-		vv[3] = _mm256_add_pd(vv[3], _mm256_mul_pd(x1v, evv[3]));
-		vv[4] = _mm256_add_pd(vv[4], _mm256_mul_pd(x1v, evv[4]));				      		      	  
-#endif
-	      }
-	    _mm256_store_pd(&v[0], vv[0]);
-	    _mm256_store_pd(&v[4], vv[1]);
-	    _mm256_store_pd(&v[8], vv[2]);
-	    _mm256_store_pd(&v[12], vv[3]);
-	    _mm256_store_pd(&v[16], vv[4]);
-	  }
-      }
-      break;
-    case PLL_TIP_INNER:      	
-      for (i = 0; i < n; i++)
-	{
-	  le = &left[cptr[i] * 400];
-	  ri = &right[cptr[i] * 400];
-	  
-	  vl = &(tipVector[20 * tipX1[i]]);
-	  vr = &x2[20 * i];
-	  v  = &x3[20 * i];	   
-	  
-	  __m256d vv[5];
-	  
-	  vv[0] = _mm256_setzero_pd();
-	  vv[1] = _mm256_setzero_pd();
-	  vv[2] = _mm256_setzero_pd();
-	  vv[3] = _mm256_setzero_pd();
-	  vv[4] = _mm256_setzero_pd();
-	  
-	 
-
-	  for(l = 0; l < 20; l++)
-	    {	       
-	      __m256d 
-		x1v = _mm256_setzero_pd(),
-		x2v = _mm256_setzero_pd();	
-	      
-	      double 
-		*ev = &extEV[l * 20],
-		*lv = &le[l * 20],
-		*rv = &ri[l * 20];														
-#ifdef _FMA
-	      for(k = 0; k < 20; k += 4) 
-		{
-		  __m256d vlv = _mm256_load_pd(&vl[k]);
-		  __m256d lvv = _mm256_load_pd(&lv[k]);
-		  x1v = FMAMACC(x1v,vlv,lvv);
-		  __m256d vrv = _mm256_load_pd(&vr[k]);
-		  __m256d rvv = _mm256_load_pd(&rv[k]);
-		  x2v = FMAMACC(x2v,vrv,rvv);
-		}
-#else	      
-	      x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[0]), _mm256_load_pd(&lv[0])));
-	      x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[4]), _mm256_load_pd(&lv[4])));
-	      x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[8]), _mm256_load_pd(&lv[8])));
-	      x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[12]), _mm256_load_pd(&lv[12])));
-	      x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[16]), _mm256_load_pd(&lv[16])));
-	      
-	      x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[0]), _mm256_load_pd(&rv[0])));			    
-	      x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[4]), _mm256_load_pd(&rv[4])));				    
-	      x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[8]), _mm256_load_pd(&rv[8])));			    
-	      x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[12]), _mm256_load_pd(&rv[12])));				    
-	      x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[16]), _mm256_load_pd(&rv[16])));
-#endif
-
-	      x1v = hadd4(x1v, x2v);			
-	      
-	      __m256d 
-		evv[5];
-	      
-	      evv[0] = _mm256_load_pd(&ev[0]);
-	      evv[1] = _mm256_load_pd(&ev[4]);
-	      evv[2] = _mm256_load_pd(&ev[8]);
-	      evv[3] = _mm256_load_pd(&ev[12]);
-	      evv[4] = _mm256_load_pd(&ev[16]);		
-
-#ifdef _FMA
-	      for(k = 0; k < 5; k++)
-		vv[k] = FMAMACC(vv[k],x1v,evv[k]);		 
-#else	      
-	      vv[0] = _mm256_add_pd(vv[0], _mm256_mul_pd(x1v, evv[0]));
-	      vv[1] = _mm256_add_pd(vv[1], _mm256_mul_pd(x1v, evv[1]));
-	      vv[2] = _mm256_add_pd(vv[2], _mm256_mul_pd(x1v, evv[2]));
-	      vv[3] = _mm256_add_pd(vv[3], _mm256_mul_pd(x1v, evv[3]));
-	      vv[4] = _mm256_add_pd(vv[4], _mm256_mul_pd(x1v, evv[4]));				      	
-#endif
-	    }	  
-
-	   	     
-	  __m256d minlikelihood_avx = _mm256_set1_pd( PLL_MINLIKELIHOOD );
-	  
-	  scale = 1;
-	  
-	  for(l = 0; scale && (l < 20); l += 4)
-	    {	       
-	      __m256d 
-		v1 = _mm256_and_pd(vv[l / 4], absMask_AVX.m);
-	      v1 = _mm256_cmp_pd(v1,  minlikelihood_avx, _CMP_LT_OS);
-	      
-	      if(_mm256_movemask_pd( v1 ) != 15)
-		scale = 0;
-	    }	    	  	  
-	 
-
-	  if(scale)
-	    {
-	      __m256d 
-		twoto = _mm256_set1_pd(PLL_TWOTOTHE256);
-	      
-	      for(l = 0; l < 20; l += 4)
-		vv[l / 4] = _mm256_mul_pd(vv[l / 4] , twoto);		    		 
-	  
-	      if(useFastScaling)
-		addScale += wgt[i];
-	      else
-		ex3[i]  += 1;	      
-	    }
-
-	  _mm256_store_pd(&v[0], vv[0]);
-	  _mm256_store_pd(&v[4], vv[1]);
-	  _mm256_store_pd(&v[8], vv[2]);
-	  _mm256_store_pd(&v[12], vv[3]);
-	  _mm256_store_pd(&v[16], vv[4]);	       
-	}
-      break;
-    case PLL_INNER_INNER:
-      for(i = 0; i < n; i++)
-	{
-	  le = &left[cptr[i] * 400];
-	  ri = &right[cptr[i] * 400];
-
-	  vl = &x1[20 * i];
-	  vr = &x2[20 * i];
-	  v = &x3[20 * i];
-
-	  __m256d vv[5];
-	  
-	  vv[0] = _mm256_setzero_pd();
-	  vv[1] = _mm256_setzero_pd();
-	  vv[2] = _mm256_setzero_pd();
-	  vv[3] = _mm256_setzero_pd();
-	  vv[4] = _mm256_setzero_pd();
-	  
-	  for(l = 0; l < 20; l++)
-	    {	       
-	      __m256d 
-		x1v = _mm256_setzero_pd(),
-		x2v = _mm256_setzero_pd();	
-	      
-	      double 
-		*ev = &extEV[l * 20],
-		*lv = &le[l * 20],
-		*rv = &ri[l * 20];														
-	      
-	      x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[0]), _mm256_load_pd(&lv[0])));
-	      x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[4]), _mm256_load_pd(&lv[4])));
-	      x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[8]), _mm256_load_pd(&lv[8])));
-	      x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[12]), _mm256_load_pd(&lv[12])));
-	      x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[16]), _mm256_load_pd(&lv[16])));
-	      
-	      x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[0]), _mm256_load_pd(&rv[0])));			    
-	      x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[4]), _mm256_load_pd(&rv[4])));				    
-	      x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[8]), _mm256_load_pd(&rv[8])));			    
-	      x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[12]), _mm256_load_pd(&rv[12])));				    
-	      x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[16]), _mm256_load_pd(&rv[16])));
-
-	      x1v = hadd4(x1v, x2v);			
-#ifdef _FMA
-	       for(k = 0; k < 5; k++) 
-		 {
-		   __m256d evv = _mm256_load_pd(&ev[k*4]);
-		   vv[k] = FMAMACC(vv[k],x1v,evv);
-		 }
-#else	      
-	      __m256d 
-		evv[5];
-	      
-	      evv[0] = _mm256_load_pd(&ev[0]);
-	      evv[1] = _mm256_load_pd(&ev[4]);
-	      evv[2] = _mm256_load_pd(&ev[8]);
-	      evv[3] = _mm256_load_pd(&ev[12]);
-	      evv[4] = _mm256_load_pd(&ev[16]);		
-	      
-	      vv[0] = _mm256_add_pd(vv[0], _mm256_mul_pd(x1v, evv[0]));
-	      vv[1] = _mm256_add_pd(vv[1], _mm256_mul_pd(x1v, evv[1]));
-	      vv[2] = _mm256_add_pd(vv[2], _mm256_mul_pd(x1v, evv[2]));
-	      vv[3] = _mm256_add_pd(vv[3], _mm256_mul_pd(x1v, evv[3]));
-	      vv[4] = _mm256_add_pd(vv[4], _mm256_mul_pd(x1v, evv[4]));				      	
-#endif
-	    }	  
-
-	   	     
-	  __m256d minlikelihood_avx = _mm256_set1_pd( PLL_MINLIKELIHOOD );
-	  
-	  scale = 1;
-	  
-	  for(l = 0; scale && (l < 20); l += 4)
-	    {	       
-	      __m256d 
-		v1 = _mm256_and_pd(vv[l / 4], absMask_AVX.m);
-	      v1 = _mm256_cmp_pd(v1,  minlikelihood_avx, _CMP_LT_OS);
-	      
-	      if(_mm256_movemask_pd( v1 ) != 15)
-		scale = 0;
-	    }	    	  	  
-
-	  if(scale)
-	    {
-	      __m256d 
-		twoto = _mm256_set1_pd(PLL_TWOTOTHE256);
-	      
-	      for(l = 0; l < 20; l += 4)
-		vv[l / 4] = _mm256_mul_pd(vv[l / 4] , twoto);		    		 
-	  
-	      if(useFastScaling)
-		addScale += wgt[i];
-	      else
-		ex3[i]  += 1;	      
-	    }
-
-	  _mm256_store_pd(&v[0], vv[0]);
-	  _mm256_store_pd(&v[4], vv[1]);
-	  _mm256_store_pd(&v[8], vv[2]);
-	  _mm256_store_pd(&v[12], vv[3]);
-	  _mm256_store_pd(&v[16], vv[4]);
-	 
-	}
-      break;
-    default:
-      assert(0);
-    }
-  
-  if(useFastScaling)
-    *scalerIncrement = addScale;
-}
-
-void newviewGTRCATPROT_AVX_GAPPED_SAVE(int tipCase, double *extEV,
-				       int *cptr,
-				       double *x1, double *x2, double *x3, double *tipVector,
-				       int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-				       int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling,
-				       unsigned int *x1_gap, unsigned int *x2_gap, unsigned int *x3_gap,
-				       double *x1_gapColumn, double *x2_gapColumn, double *x3_gapColumn, const int maxCats)
-{
-  double
-    *le, 
-    *ri, 
-    *v, 
-    *vl, 
-    *vr,
-    *x1_ptr = x1,
-    *x2_ptr = x2, 
-    *x3_ptr = x3;
-  
-  int 
-    i, 
-    l, 
-    scale, 
-    addScale = 0,
-    scaleGap = 0;
-
-#ifdef _FMA
-  int k;
-#endif
-
-  {
-    le = &left[maxCats * 400];
-    ri = &right[maxCats * 400];
-    
-    vl = x1_gapColumn;
-    vr = x2_gapColumn;
-    v  = x3_gapColumn;
-
-    __m256d vv[5];
-    
-    vv[0] = _mm256_setzero_pd();
-    vv[1] = _mm256_setzero_pd();
-    vv[2] = _mm256_setzero_pd();
-    vv[3] = _mm256_setzero_pd();
-    vv[4] = _mm256_setzero_pd();
-    
-    for(l = 0; l < 20; l++)
-      {	       
-	__m256d 
-	  x1v = _mm256_setzero_pd(),
-	  x2v = _mm256_setzero_pd();	
-	
-	double 
-	  *ev = &extEV[l * 20],
-	  *lv = &le[l * 20],
-	  *rv = &ri[l * 20];														
-	
-	x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[0]), _mm256_load_pd(&lv[0])));
-	x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[4]), _mm256_load_pd(&lv[4])));
-	x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[8]), _mm256_load_pd(&lv[8])));
-	x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[12]), _mm256_load_pd(&lv[12])));
-	x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[16]), _mm256_load_pd(&lv[16])));
-	
-	x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[0]), _mm256_load_pd(&rv[0])));			    
-	x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[4]), _mm256_load_pd(&rv[4])));				    
-	x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[8]), _mm256_load_pd(&rv[8])));			    
-	x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[12]), _mm256_load_pd(&rv[12])));				    
-	x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[16]), _mm256_load_pd(&rv[16])));
-	
-	x1v = hadd4(x1v, x2v);			
-#ifdef _FMA
-	for(k = 0; k < 5; k++) 
-	  {
-	    __m256d evv = _mm256_load_pd(&ev[k*4]);
-	    vv[k] = FMAMACC(vv[k],x1v,evv);
-	  }
-#else	      
-	__m256d 
-	  evv[5];
-	
-	evv[0] = _mm256_load_pd(&ev[0]);
-	evv[1] = _mm256_load_pd(&ev[4]);
-	evv[2] = _mm256_load_pd(&ev[8]);
-	evv[3] = _mm256_load_pd(&ev[12]);
-	evv[4] = _mm256_load_pd(&ev[16]);		
-	
-	vv[0] = _mm256_add_pd(vv[0], _mm256_mul_pd(x1v, evv[0]));
-	vv[1] = _mm256_add_pd(vv[1], _mm256_mul_pd(x1v, evv[1]));
-	vv[2] = _mm256_add_pd(vv[2], _mm256_mul_pd(x1v, evv[2]));
-	vv[3] = _mm256_add_pd(vv[3], _mm256_mul_pd(x1v, evv[3]));
-	vv[4] = _mm256_add_pd(vv[4], _mm256_mul_pd(x1v, evv[4]));				      	
-#endif
-      }	  
-
-
-     if(tipCase != PLL_TIP_TIP)
-       {
-	 __m256d minlikelihood_avx = _mm256_set1_pd( PLL_MINLIKELIHOOD );
-	  
-	 scale = 1;
-	  
-	 for(l = 0; scale && (l < 20); l += 4)
-	   {	       
-	     __m256d 
-	       v1 = _mm256_and_pd(vv[l / 4], absMask_AVX.m);
-	     v1 = _mm256_cmp_pd(v1,  minlikelihood_avx, _CMP_LT_OS);
-	     
-	     if(_mm256_movemask_pd( v1 ) != 15)
-	       scale = 0;
-	   }	    	  	  
-
-	 if(scale)
-	   {
-	      __m256d 
-		twoto = _mm256_set1_pd(PLL_TWOTOTHE256);
-	      
-	      for(l = 0; l < 20; l += 4)
-		vv[l / 4] = _mm256_mul_pd(vv[l / 4] , twoto);		    		 	      	     	      
-	   
-	      scaleGap = 1;
-	   }
-       }
-
-     _mm256_store_pd(&v[0], vv[0]);
-     _mm256_store_pd(&v[4], vv[1]);
-     _mm256_store_pd(&v[8], vv[2]);
-     _mm256_store_pd(&v[12], vv[3]);
-     _mm256_store_pd(&v[16], vv[4]);     
-  }
-
-
-
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:
-      {
-	for (i = 0; i < n; i++)
-	  {
-	    if(noGap(x3_gap, i))	   
-	      {	    
-		vl = &(tipVector[20 * tipX1[i]]);
-		vr = &(tipVector[20 * tipX2[i]]);
-		v  = x3_ptr;	    	    	   	    
-
-		if(isGap(x1_gap, i))
-		  le =  &left[maxCats * 400];
-		else	  	  
-		  le =  &left[cptr[i] * 400];	  
-		
-		if(isGap(x2_gap, i))
-		  ri =  &right[maxCats * 400];
-		else	 	  
-		  ri =  &right[cptr[i] * 400];
-
-		__m256d vv[5];
-		
-		vv[0] = _mm256_setzero_pd();
-		vv[1] = _mm256_setzero_pd();
-		vv[2] = _mm256_setzero_pd();
-		vv[3] = _mm256_setzero_pd();
-		vv[4] = _mm256_setzero_pd();	   	    
-		
-		for(l = 0; l < 20; l++)
-		  {	       
-		    __m256d 
-		      x1v = _mm256_setzero_pd(),
-		      x2v = _mm256_setzero_pd();	
-		    
-		    double 
-		      *ev = &extEV[l * 20],
-		      *lv = &le[l * 20],
-		      *rv = &ri[l * 20];														
-		    
-#ifdef _FMA		
-		    for(k = 0; k < 20; k += 4) 
-		      {
-			__m256d vlv = _mm256_load_pd(&vl[k]);
-			__m256d lvv = _mm256_load_pd(&lv[k]);
-			x1v = FMAMACC(x1v,vlv,lvv);
-			__m256d vrv = _mm256_load_pd(&vr[k]);
-			__m256d rvv = _mm256_load_pd(&rv[k]);
-			x2v = FMAMACC(x2v,vrv,rvv);
-		      }
-#else		
-		    x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[0]), _mm256_load_pd(&lv[0])));
-		    x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[4]), _mm256_load_pd(&lv[4])));
-		    x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[8]), _mm256_load_pd(&lv[8])));
-		    x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[12]), _mm256_load_pd(&lv[12])));
-		    x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[16]), _mm256_load_pd(&lv[16])));
-		    
-		    x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[0]), _mm256_load_pd(&rv[0])));			    
-		    x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[4]), _mm256_load_pd(&rv[4])));				    
-		    x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[8]), _mm256_load_pd(&rv[8])));			    
-		    x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[12]), _mm256_load_pd(&rv[12])));				    
-		    x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[16]), _mm256_load_pd(&rv[16])));	
-#endif
-		    
-		    x1v = hadd4(x1v, x2v);			
-#ifdef _FMA
-		    for(k = 0; k < 5; k++) 
-		      {
-			__m256d evv = _mm256_load_pd(&ev[k*4]);
-			vv[k] = FMAMACC(vv[k],x1v,evv);
-		      }	  
-#else		
-		    __m256d 
-		      evv[5];
-		    
-		    evv[0] = _mm256_load_pd(&ev[0]);
-		    evv[1] = _mm256_load_pd(&ev[4]);
-		    evv[2] = _mm256_load_pd(&ev[8]);
-		    evv[3] = _mm256_load_pd(&ev[12]);
-		    evv[4] = _mm256_load_pd(&ev[16]);		
-		    
-		    vv[0] = _mm256_add_pd(vv[0], _mm256_mul_pd(x1v, evv[0]));
-		    vv[1] = _mm256_add_pd(vv[1], _mm256_mul_pd(x1v, evv[1]));
-		    vv[2] = _mm256_add_pd(vv[2], _mm256_mul_pd(x1v, evv[2]));
-		    vv[3] = _mm256_add_pd(vv[3], _mm256_mul_pd(x1v, evv[3]));
-		    vv[4] = _mm256_add_pd(vv[4], _mm256_mul_pd(x1v, evv[4]));				      		      	  
-#endif
-		  }
-		
-		_mm256_store_pd(&v[0], vv[0]);
-		_mm256_store_pd(&v[4], vv[1]);
-		_mm256_store_pd(&v[8], vv[2]);
-		_mm256_store_pd(&v[12], vv[3]);
-		_mm256_store_pd(&v[16], vv[4]);
-
-		x3_ptr += 20;
-	      }
-	  }
-      }
-      break;
-    case PLL_TIP_INNER:      	
-      for (i = 0; i < n; i++)
-	{
-	  if(isGap(x3_gap, i))
-	    {
-	      if(scaleGap)
-		{
-		  if(useFastScaling)
-		    addScale += wgt[i];
-		  else
-		    ex3[i] += 1;		   		    
-		}	     
-	    }
-	  else
-	    {
-	      vl = &(tipVector[20 * tipX1[i]]);
-
-	      vr = x2_ptr;
-	      v = x3_ptr;
-	      
-	      if(isGap(x1_gap, i))
-		le =  &left[maxCats * 400];
-	      else
-		le =  &left[cptr[i] * 400];
-	      
-	      if(isGap(x2_gap, i))
-		{		 
-		  ri =  &right[maxCats * 400];
-		  vr = x2_gapColumn;
-		}
-	      else
-		{
-		  ri =  &right[cptr[i] * 400];
-		  vr = x2_ptr;
-		  x2_ptr += 20;
-		}	  	  
-	  
-	      __m256d vv[5];
-	      
-	      vv[0] = _mm256_setzero_pd();
-	      vv[1] = _mm256_setzero_pd();
-	      vv[2] = _mm256_setzero_pd();
-	      vv[3] = _mm256_setzero_pd();
-	      vv[4] = _mm256_setzero_pd();
-	      	      	      
-	      for(l = 0; l < 20; l++)
-		{	       
-		  __m256d 
-		    x1v = _mm256_setzero_pd(),
-		    x2v = _mm256_setzero_pd();	
-		  
-		  double 
-		    *ev = &extEV[l * 20],
-		    *lv = &le[l * 20],
-		    *rv = &ri[l * 20];														
-#ifdef _FMA
-		  for(k = 0; k < 20; k += 4) 
-		    {
-		      __m256d vlv = _mm256_load_pd(&vl[k]);
-		      __m256d lvv = _mm256_load_pd(&lv[k]);
-		      x1v = FMAMACC(x1v,vlv,lvv);
-		      __m256d vrv = _mm256_load_pd(&vr[k]);
-		      __m256d rvv = _mm256_load_pd(&rv[k]);
-		      x2v = FMAMACC(x2v,vrv,rvv);
-		    }
-#else	      
-		  x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[0]), _mm256_load_pd(&lv[0])));
-		  x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[4]), _mm256_load_pd(&lv[4])));
-		  x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[8]), _mm256_load_pd(&lv[8])));
-		  x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[12]), _mm256_load_pd(&lv[12])));
-		  x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[16]), _mm256_load_pd(&lv[16])));
-		  
-		  x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[0]), _mm256_load_pd(&rv[0])));			    
-		  x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[4]), _mm256_load_pd(&rv[4])));				    
-		  x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[8]), _mm256_load_pd(&rv[8])));			    
-		  x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[12]), _mm256_load_pd(&rv[12])));				    
-		  x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[16]), _mm256_load_pd(&rv[16])));
-#endif
-		  
-		  x1v = hadd4(x1v, x2v);			
-		  
-		  __m256d 
-		    evv[5];
-		  
-		  evv[0] = _mm256_load_pd(&ev[0]);
-		  evv[1] = _mm256_load_pd(&ev[4]);
-		  evv[2] = _mm256_load_pd(&ev[8]);
-		  evv[3] = _mm256_load_pd(&ev[12]);
-		  evv[4] = _mm256_load_pd(&ev[16]);		
-		  
-#ifdef _FMA
-		  for(k = 0; k < 5; k++)
-		    vv[k] = FMAMACC(vv[k],x1v,evv[k]);		 
-#else	      
-		  vv[0] = _mm256_add_pd(vv[0], _mm256_mul_pd(x1v, evv[0]));
-		  vv[1] = _mm256_add_pd(vv[1], _mm256_mul_pd(x1v, evv[1]));
-		  vv[2] = _mm256_add_pd(vv[2], _mm256_mul_pd(x1v, evv[2]));
-		  vv[3] = _mm256_add_pd(vv[3], _mm256_mul_pd(x1v, evv[3]));
-		  vv[4] = _mm256_add_pd(vv[4], _mm256_mul_pd(x1v, evv[4]));				      	
-#endif
-		}	  
-
-	   	     
-	      __m256d minlikelihood_avx = _mm256_set1_pd( PLL_MINLIKELIHOOD );
-	  
-	      scale = 1;
-	      
-	      for(l = 0; scale && (l < 20); l += 4)
-		{	       
-		  __m256d 
-		    v1 = _mm256_and_pd(vv[l / 4], absMask_AVX.m);
-		  v1 = _mm256_cmp_pd(v1,  minlikelihood_avx, _CMP_LT_OS);
-		  
-		  if(_mm256_movemask_pd( v1 ) != 15)
-		    scale = 0;
-		}	    	  	  
-	 
-	      if(scale)
-		{
-		  __m256d 
-		    twoto = _mm256_set1_pd(PLL_TWOTOTHE256);
-		  
-		  for(l = 0; l < 20; l += 4)
-		    vv[l / 4] = _mm256_mul_pd(vv[l / 4] , twoto);		    		 
-		  
-		  if(useFastScaling)
-		    addScale += wgt[i];
-		  else
-		    ex3[i]  += 1;	      
-		}
-
-	      _mm256_store_pd(&v[0], vv[0]);
-	      _mm256_store_pd(&v[4], vv[1]);
-	      _mm256_store_pd(&v[8], vv[2]);
-	      _mm256_store_pd(&v[12], vv[3]);
-	      _mm256_store_pd(&v[16], vv[4]);	       
-	      
-	      x3_ptr += 20;
-	    }
-	}    
-      break;
-    case PLL_INNER_INNER:
-      for(i = 0; i < n; i++)
-	{
-	   if(isGap(x3_gap, i))
-	     {
-	       if(scaleGap)		   		    
-		 {
-		   if(useFastScaling)
-		     addScale += wgt[i];
-		   else
-		     ex3[i] += 1;
-		 }		 	       
-	     }
-	   else
-	     {
-
-	        v = x3_ptr;
-
-		if(isGap(x1_gap, i))
-		  {
-		    vl = x1_gapColumn;
-		    le =  &left[maxCats * 400];
-		  }
-		else
-		  {
-		    le =  &left[cptr[i] * 400];
-		    vl = x1_ptr;
-		    x1_ptr += 20;
-		  }
-		
-		if(isGap(x2_gap, i))	
-		  {
-		    vr = x2_gapColumn;
-		    ri =  &right[maxCats * 400];	    
-		  }
-		else
-		  {
-		    ri =  &right[cptr[i] * 400];
-		    vr = x2_ptr;
-		    x2_ptr += 20;
-		  }	 	  	 
-		
-		__m256d vv[5];
-		
-		vv[0] = _mm256_setzero_pd();
-		vv[1] = _mm256_setzero_pd();
-		vv[2] = _mm256_setzero_pd();
-		vv[3] = _mm256_setzero_pd();
-		vv[4] = _mm256_setzero_pd();
-		
-		for(l = 0; l < 20; l++)
-		  {	       
-		    __m256d 
-		      x1v = _mm256_setzero_pd(),
-		      x2v = _mm256_setzero_pd();	
-		    
-		    double 
-		      *ev = &extEV[l * 20],
-		      *lv = &le[l * 20],
-		      *rv = &ri[l * 20];														
-		    
-		    x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[0]), _mm256_load_pd(&lv[0])));
-		    x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[4]), _mm256_load_pd(&lv[4])));
-		    x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[8]), _mm256_load_pd(&lv[8])));
-		    x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[12]), _mm256_load_pd(&lv[12])));
-		    x1v = _mm256_add_pd(x1v, _mm256_mul_pd(_mm256_load_pd(&vl[16]), _mm256_load_pd(&lv[16])));
-		    
-		    x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[0]), _mm256_load_pd(&rv[0])));			    
-		    x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[4]), _mm256_load_pd(&rv[4])));				    
-		    x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[8]), _mm256_load_pd(&rv[8])));			    
-		    x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[12]), _mm256_load_pd(&rv[12])));				    
-		    x2v = _mm256_add_pd(x2v,  _mm256_mul_pd(_mm256_load_pd(&vr[16]), _mm256_load_pd(&rv[16])));
-		    
-		    x1v = hadd4(x1v, x2v);			
-#ifdef _FMA
-		    for(k = 0; k < 5; k++) 
-		      {
-			__m256d evv = _mm256_load_pd(&ev[k*4]);
-			vv[k] = FMAMACC(vv[k],x1v,evv);
-		      }
-#else	      
-		    __m256d 
-		      evv[5];
-		    
-		    evv[0] = _mm256_load_pd(&ev[0]);
-		    evv[1] = _mm256_load_pd(&ev[4]);
-		    evv[2] = _mm256_load_pd(&ev[8]);
-		    evv[3] = _mm256_load_pd(&ev[12]);
-		    evv[4] = _mm256_load_pd(&ev[16]);		
-		    
-		    vv[0] = _mm256_add_pd(vv[0], _mm256_mul_pd(x1v, evv[0]));
-		    vv[1] = _mm256_add_pd(vv[1], _mm256_mul_pd(x1v, evv[1]));
-		    vv[2] = _mm256_add_pd(vv[2], _mm256_mul_pd(x1v, evv[2]));
-		    vv[3] = _mm256_add_pd(vv[3], _mm256_mul_pd(x1v, evv[3]));
-		    vv[4] = _mm256_add_pd(vv[4], _mm256_mul_pd(x1v, evv[4]));				      	
-#endif
-		  }	  
-
-	   	     
-		__m256d minlikelihood_avx = _mm256_set1_pd( PLL_MINLIKELIHOOD );
-		
-		scale = 1;
-		
-		for(l = 0; scale && (l < 20); l += 4)
-		  {	       
-		    __m256d 
-		      v1 = _mm256_and_pd(vv[l / 4], absMask_AVX.m);
-		    v1 = _mm256_cmp_pd(v1,  minlikelihood_avx, _CMP_LT_OS);
-		    
-		    if(_mm256_movemask_pd( v1 ) != 15)
-		      scale = 0;
-		  }	    	  	  
-		
-		if(scale)
-		  {
-		    __m256d 
-		      twoto = _mm256_set1_pd(PLL_TWOTOTHE256);
-		    
-		    for(l = 0; l < 20; l += 4)
-		      vv[l / 4] = _mm256_mul_pd(vv[l / 4] , twoto);		    		 
-		    
-		    if(useFastScaling)
-		      addScale += wgt[i];
-		    else
-		      ex3[i]  += 1;	      
-		  }
-
-		_mm256_store_pd(&v[0], vv[0]);
-		_mm256_store_pd(&v[4], vv[1]);
-		_mm256_store_pd(&v[8], vv[2]);
-		_mm256_store_pd(&v[12], vv[3]);
-		_mm256_store_pd(&v[16], vv[4]);
-
-		 x3_ptr += 20;
-	     }
-	}   
-      break;
-    default:
-      assert(0);
-    }
-  
-  if(useFastScaling)
-    *scalerIncrement = addScale;
-}
-
-
-
-void newviewGTRGAMMAPROT_AVX_LG4(int tipCase,
-				 double *x1, double *x2, double *x3, double *extEV[4], double *tipVector[4],
-				 int *ex3, unsigned char *tipX1, unsigned char *tipX2, int n, 
-				 double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling) 
-{
-  double	
-    *uX1, 
-    *uX2, 
-    *v, 
-    x1px2, 
-    *vl, 
-    *vr;
-  
-  int	
-    i, 
-    j, 
-    l, 
-    k, 
-    scale, 
-    addScale = 0;
-
- 
-#ifndef GCC_VERSION
-#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
-#endif
-
-
-#if GCC_VERSION < 40500 && defined (__GNUC__)
-   __m256d
-    bitmask = _mm256_set_pd(0,0,0,-1);
-#else
-  __m256i
-    bitmask = _mm256_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
-#endif 
-  
-  switch(tipCase) 
-    {
-    case PLL_TIP_TIP: 
-      {
-       
-    PLL_ALIGN_BEGIN double
-	  umpX1[1840] PLL_ALIGN_END,
-	  umpX2[1840] PLL_ALIGN_END;
-
-	
-	for(i = 0; i < 23; i++) 
-	  {	    	    
-	    for(k = 0; k < 80; k++) 
-	      {
-		double 
-		  *ll =  &left[k * 20],
-		  *rr =  &right[k * 20];
-		
-		__m256d 
-		  umpX1v = _mm256_setzero_pd(),
-		  umpX2v = _mm256_setzero_pd();
-		
-		v = &(tipVector[k / 20][20 * i]);
-
-		for(l = 0; l < 20; l+=4) 
-		  {
-		    __m256d vv = _mm256_load_pd(&v[l]);
-#ifdef _FMA
-		    __m256d llv = _mm256_load_pd(&ll[l]);
-		    umpX1v = FMAMACC(umpX1v,vv,llv);
-		    __m256d rrv = _mm256_load_pd(&rr[l]);
-		    umpX2v = FMAMACC(umpX2v,vv,rrv);
-#else		    
-		    umpX1v = _mm256_add_pd(umpX1v,_mm256_mul_pd(vv,_mm256_load_pd(&ll[l])));
-		    umpX2v = _mm256_add_pd(umpX2v,_mm256_mul_pd(vv,_mm256_load_pd(&rr[l])));
-#endif
-		  }
-		
-		umpX1v = hadd3(umpX1v);
-		umpX2v = hadd3(umpX2v);
-		_mm256_maskstore_pd(&umpX1[80 * i + k], bitmask, umpX1v);
-		_mm256_maskstore_pd(&umpX2[80 * i + k], bitmask, umpX2v);
-	      } 
-	  }
-
-	for(i = 0; i < n; i++) 
-	  {	    
-	    uX1 = &umpX1[80 * tipX1[i]];
-	    uX2 = &umpX2[80 * tipX2[i]];
-	   
-	    for(j = 0; j < 4; j++) 
-	      {     	
-		__m256d vv[5];  
-
-		v = &x3[i * 80 + j * 20];
-			
-		vv[0] = _mm256_setzero_pd();
-		vv[1] = _mm256_setzero_pd();
-		vv[2] = _mm256_setzero_pd();
-		vv[3] = _mm256_setzero_pd();
-		vv[4] = _mm256_setzero_pd();
-
-		for(k = 0; k < 20; k++) 
-		  {			 
-		    x1px2 = uX1[j * 20 + k] * uX2[j * 20 + k];
-
-		    __m256d x1px2v = _mm256_set1_pd(x1px2);		    
-		    
-		    __m256d extEvv = _mm256_load_pd(&extEV[j][20 * k]);
-#ifdef _FMA
-		    vv[0] = FMAMACC(vv[0],x1px2v,extEvv);
-#else
-		    vv[0] = _mm256_add_pd(vv[0],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-		    _mm256_store_pd(&v[0],vv[0]);
-		    
-		    extEvv = _mm256_load_pd(&extEV[j][20 * k + 4]);
-#ifdef _FMA
-		    vv[1] = FMAMACC(vv[1],x1px2v,extEvv);
-#else
-		    vv[1] = _mm256_add_pd(vv[1],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-		    _mm256_store_pd(&v[4],vv[1]);
-
-		    extEvv = _mm256_load_pd(&extEV[j][20 * k + 8]);
-#ifdef _FMA
-		    vv[2] = FMAMACC(vv[2],x1px2v,extEvv);
-#else
-		    vv[2] = _mm256_add_pd(vv[2],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-		    _mm256_store_pd(&v[8],vv[2]);
-
-		    extEvv = _mm256_load_pd(&extEV[j][20 * k + 12]);
-#ifdef _FMA
-		    vv[3] = FMAMACC(vv[3],x1px2v,extEvv);
-#else
-		    vv[3] = _mm256_add_pd(vv[3],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-		    _mm256_store_pd(&v[12],vv[3]);
-
-		    extEvv = _mm256_load_pd(&extEV[j][20 * k + 16]);
-#ifdef _FMA
-		    vv[4] = FMAMACC(vv[4],x1px2v,extEvv);
-#else
-		    vv[4] = _mm256_add_pd(vv[4],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-		    _mm256_store_pd(&v[16],vv[4]);
-		  } 
-	      } 
-	  } 
-      } 
-      break;
-    case PLL_TIP_INNER: 
-      {
-
-    	  PLL_ALIGN_BEGIN double
-	  umpX1[1840] PLL_ALIGN_END,
-	  ump_x2[20] PLL_ALIGN_END;
-
-	for(i = 0; i < 23; i++) 
-	  {	   
-	    for(k = 0; k < 80; k++) 
-	      {
-		__m256d umpX1v = _mm256_setzero_pd();
-		
-		 v = &(tipVector[k / 20][20 * i]);
-
-		for(l = 0; l < 20; l+=4) 
-		  {
-		    __m256d vv = _mm256_load_pd(&v[l]);
-		    __m256d leftv = _mm256_load_pd(&left[k * 20 + l]);
-#ifdef _FMA
-		   
-		    umpX1v = FMAMACC(umpX1v, vv, leftv);
-#else
-		    umpX1v = _mm256_add_pd(umpX1v, _mm256_mul_pd(vv, leftv));
-#endif
-		  }
-		umpX1v = hadd3(umpX1v);
-		_mm256_maskstore_pd(&umpX1[80 * i + k], bitmask, umpX1v);
-	      } 
-	  }
-	
-	for (i = 0; i < n; i++) 
-	  {	   
-	    uX1 = &umpX1[80 * tipX1[i]];
-	   	    
-	    for(k = 0; k < 4; k++) 
-	      {
-		v = &(x2[80 * i + k * 20]);
-		
-		for(l = 0; l < 20; l++) 
-		  {
-		    __m256d ump_x2v = _mm256_setzero_pd();
-		    		  
-		    __m256d vv = _mm256_load_pd(&v[0]);
-		    __m256d rightv = _mm256_load_pd(&right[k*400+l*20+0]);
-#ifdef _FMA
-		    ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-		    ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-		    
-		    vv = _mm256_load_pd(&v[4]);
-		    rightv = _mm256_load_pd(&right[k*400+l*20+4]);
-#ifdef _FMA
-		    ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-		    ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-
-		    vv = _mm256_load_pd(&v[8]);
-		    rightv = _mm256_load_pd(&right[k*400+l*20+8]);
-#ifdef _FMA
-		    ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-		    ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-
-		    vv = _mm256_load_pd(&v[12]);
-		    rightv = _mm256_load_pd(&right[k*400+l*20+12]);
-#ifdef _FMA
-		    ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-		    ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-
-		    vv = _mm256_load_pd(&v[16]);
-		    rightv = _mm256_load_pd(&right[k*400+l*20+16]);
-#ifdef _FMA
-		    ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-		    ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-		    
-		    ump_x2v = hadd3(ump_x2v);
-		    _mm256_maskstore_pd(&ump_x2[l], bitmask, ump_x2v);
-		  }
-		
-		v = &(x3[80 * i + 20 * k]);
-	
-
-		__m256d vv[5]; 
-
-		vv[0] = _mm256_setzero_pd();
-		vv[1] = _mm256_setzero_pd();
-		vv[2] = _mm256_setzero_pd();
-		vv[3] = _mm256_setzero_pd();
-		vv[4] = _mm256_setzero_pd();
-		
-		for(l = 0; l < 20; l++) 
-		  {
-		    x1px2 = uX1[k * 20 + l]	* ump_x2[l];
-		    __m256d x1px2v = _mm256_set1_pd(x1px2);	
-	    		 
-#ifdef _FMA
-		    __m256d ev = _mm256_load_pd(&extEV[k][l * 20 + 0]);
-		    vv[0] = FMAMACC(vv[0],x1px2v, ev);
-#else
-		    vv[0] = _mm256_add_pd(vv[0],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[k][l * 20 + 0])));
-#endif
-		    _mm256_store_pd(&v[0],vv[0]);
-
-#ifdef _FMA
-		    ev = _mm256_load_pd(&extEV[k][l * 20 + 4]);
-		    vv[1] = FMAMACC(vv[1],x1px2v, ev);
-#else
-		    vv[1] = _mm256_add_pd(vv[1],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[k][l * 20 + 4])));
-#endif
-		    _mm256_store_pd(&v[4],vv[1]);
-
-#ifdef _FMA
-		    ev = _mm256_load_pd(&extEV[k][l * 20 + 8]);
-		    vv[2] = FMAMACC(vv[2],x1px2v, ev);
-#else
-		    vv[2] = _mm256_add_pd(vv[2],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[k][l * 20 + 8])));
-#endif
-		    _mm256_store_pd(&v[8],vv[2]);
-		    
-#ifdef _FMA
-		    ev = _mm256_load_pd(&extEV[k][l * 20 + 12]);
-		    vv[3] = FMAMACC(vv[3],x1px2v, ev);
-#else
-		    vv[3] = _mm256_add_pd(vv[3],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[k][l * 20 + 12])));
-#endif
-		    _mm256_store_pd(&v[12],vv[3]);
-
-
-#ifdef _FMA
-		    ev = _mm256_load_pd(&extEV[k][l * 20 + 16]);
-		    vv[4] = FMAMACC(vv[4],x1px2v, ev);
-#else
-		    vv[4] = _mm256_add_pd(vv[4],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[k][l * 20 + 16])));
-#endif
-		    _mm256_store_pd(&v[16],vv[4]);
-
-		  } 
-	      }
-	   
-	    v = &x3[80 * i];
-	    __m256d minlikelihood_avx = _mm256_set1_pd(PLL_MINLIKELIHOOD);
-	    scale = 1;
-	    for(l = 0; scale && (l < 80); l += 4) 
-	      {
-		__m256d vv = _mm256_load_pd(&v[l]);
-		__m256d vv_abs = _mm256_and_pd(vv,absMask_AVX.m);
-		vv_abs = _mm256_cmp_pd(vv_abs,minlikelihood_avx,_CMP_LT_OS);
-		if(_mm256_movemask_pd(vv_abs) != 15)
-		  scale = 0;
-	      }
-	    
-	    if(scale) 
-	      {		
-		__m256d PLL_TWOTOTHE256v = _mm256_set_pd(PLL_TWOTOTHE256,PLL_TWOTOTHE256,PLL_TWOTOTHE256,PLL_TWOTOTHE256);
-		for(l = 0; l < 80; l += 4) 
-		  {
-		    __m256d vv = _mm256_load_pd(&v[l]);
-		    _mm256_store_pd(&v[l],_mm256_mul_pd(vv,PLL_TWOTOTHE256v));
-		  }
-		if(useFastScaling)
-		  addScale += wgt[i];				
-		else
-		  ex3[i] += 1;
-	      } 
-	  } 
-      } 
-      break;
-    case PLL_INNER_INNER:      
-      for(i = 0; i < n; i++) 
-	{ 
-	  scale = 1;
-	  
-	  for(k = 0; k < 4; k++) 
-	    {
-	      vl = &(x1[80 * i + 20 * k]);
-	      vr = &(x2[80 * i + 20 * k]);
-	      v  = &(x3[80 * i + 20 * k]);	      	   
-
-	      __m256d vv[5]; 
-	      
-	      vv[0] = _mm256_setzero_pd();
-	      vv[1] = _mm256_setzero_pd();
-	      vv[2] = _mm256_setzero_pd();
-	      vv[3] = _mm256_setzero_pd();
-	      vv[4] = _mm256_setzero_pd();
-	      
-	      for(l = 0; l < 20; l++) 
-		{		  
-		  __m256d al = _mm256_setzero_pd();
-		  __m256d ar = _mm256_setzero_pd();
-       		  
-		  __m256d leftv  = _mm256_load_pd(&left[k * 400 + l * 20 + 0]);
-		  __m256d rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 0]);
-		  __m256d vlv = _mm256_load_pd(&vl[0]);
-		  __m256d vrv = _mm256_load_pd(&vr[0]);
-		  
-#ifdef _FMA
-		    
-		  al = FMAMACC(al, vlv, leftv);
-		  ar = FMAMACC(ar, vrv, rightv);
-#else
-		  al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-		  ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));		  
-#endif
-
-		  leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 4]);
-		  rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 4]);
-		  vlv = _mm256_load_pd(&vl[4]);
-		  vrv = _mm256_load_pd(&vr[4]);
-#ifdef _FMA
-		    
-		  al = FMAMACC(al, vlv, leftv);
-		  ar = FMAMACC(ar, vrv, rightv);
-#else
-		  al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-		  ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
-#endif
-
-		  leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 8]);
-		  rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 8]);
-		  vlv = _mm256_load_pd(&vl[8]);
-		  vrv = _mm256_load_pd(&vr[8]);
-#ifdef _FMA
-		    
-		  al = FMAMACC(al, vlv, leftv);
-		  ar = FMAMACC(ar, vrv, rightv);
-#else
-		  al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-		  ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
-#endif
-
-		  leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 12]);
-		  rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 12]);
-		  vlv = _mm256_load_pd(&vl[12]);
-		  vrv = _mm256_load_pd(&vr[12]);
-#ifdef _FMA
-		    
-		  al = FMAMACC(al, vlv, leftv);
-		  ar = FMAMACC(ar, vrv, rightv);
-#else
-		  al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-		  ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
-#endif
-
-		  leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 16]);
-		  rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 16]);
-		  vlv = _mm256_load_pd(&vl[16]);
-		  vrv = _mm256_load_pd(&vr[16]);
-
-#ifdef _FMA		    
-		  al = FMAMACC(al, vlv, leftv);
-		  ar = FMAMACC(ar, vrv, rightv);
-#else
-		  al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-		  ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
-#endif
-
-		  /**************************************************************************************************************/
-
-		  al = hadd3(al);
-		  ar = hadd3(ar);
-		  al = _mm256_mul_pd(ar,al);
-		  
-		  /************************************************************************************************************/
-#ifdef _FMA		    
-		  __m256d ev =  _mm256_load_pd(&extEV[k][20 * l + 0]);
-		  vv[0] = FMAMACC(vv[0], al, ev);		 
-#else
-		  vv[0] = _mm256_add_pd(vv[0],_mm256_mul_pd(al, _mm256_load_pd(&extEV[k][20 * l + 0])));			  		 		  
-#endif
-		  _mm256_store_pd(&v[0],vv[0]);
-
-#ifdef _FMA		    
-		  ev =  _mm256_load_pd(&extEV[k][20 * l + 4]);
-		  vv[1] = FMAMACC(vv[1], al, ev);		 
-#else
-		  vv[1] = _mm256_add_pd(vv[1],_mm256_mul_pd(al, _mm256_load_pd(&extEV[k][20 * l + 4])));		  		 
-#endif
-		  _mm256_store_pd(&v[4],vv[1]);
-
-#ifdef _FMA		    
-		  ev =  _mm256_load_pd(&extEV[k][20 * l + 8]);
-		  vv[2] = FMAMACC(vv[2], al, ev);		 
-#else
-		  vv[2] = _mm256_add_pd(vv[2],_mm256_mul_pd(al, _mm256_load_pd(&extEV[k][20 * l + 8])));		  		 
-#endif
-		  _mm256_store_pd(&v[8],vv[2]);
-
-#ifdef _FMA		    
-		  ev =  _mm256_load_pd(&extEV[k][20 * l + 12]);
-		  vv[3] = FMAMACC(vv[3], al, ev);		 
-#else
-		  vv[3] = _mm256_add_pd(vv[3],_mm256_mul_pd(al, _mm256_load_pd(&extEV[k][20 * l + 12])));		  		 
-#endif
-		  _mm256_store_pd(&v[12],vv[3]);
-
-#ifdef _FMA		    
-		  ev =  _mm256_load_pd(&extEV[k][20 * l + 16]);
-		  vv[4] = FMAMACC(vv[4], al, ev);		 
-#else
-		  vv[4] = _mm256_add_pd(vv[4],_mm256_mul_pd(al, _mm256_load_pd(&extEV[k][20 * l + 16])));			 	  
-#endif
-		  _mm256_store_pd(&v[16],vv[4]);		 
-		} 
-	    }
-	  v = &(x3[80 * i]);
-	  scale = 1;
-	  __m256d minlikelihood_avx = _mm256_set1_pd(PLL_MINLIKELIHOOD);	 
-
-	  for(l = 0; scale && (l < 80); l += 4) 
-	    {
-	      __m256d vv = _mm256_load_pd(&v[l]);
-	      __m256d vv_abs = _mm256_and_pd(vv,absMask_AVX.m);
-	      vv_abs = _mm256_cmp_pd(vv_abs,minlikelihood_avx,_CMP_LT_OS);
-	      if(_mm256_movemask_pd(vv_abs) != 15)
-		scale = 0;	     
-	    }
-
-	  if(scale) 
-	    {		     	      
-	      __m256d PLL_TWOTOTHE256v = _mm256_set_pd(PLL_TWOTOTHE256,PLL_TWOTOTHE256,PLL_TWOTOTHE256,PLL_TWOTOTHE256);
-	      for(l = 0; l < 80; l += 4) 
-		{
-		  __m256d vv = _mm256_load_pd(&v[l]);
-		  _mm256_store_pd(&v[l],_mm256_mul_pd(vv,PLL_TWOTOTHE256v));
-		}
-	      if(useFastScaling)
-		addScale += wgt[i];					
-	      else
-		ex3[i] += 1;
-	    } 
-	}
-      break;
-    default:
-      assert(0);
-    }
- 
-  if(useFastScaling)
-    *scalerIncrement = addScale;
-}
- 
-
-void newviewGTRGAMMAPROT_AVX(int tipCase,
-			     double *x1, double *x2, double *x3, double *extEV, double *tipVector,
-			     int *ex3, unsigned char *tipX1, unsigned char *tipX2, int n, 
-			     double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling) 
-{
-  double	
-    *uX1, 
-    *uX2, 
-    *v, 
-    x1px2, 
-    *vl, 
-    *vr;
-  
-  int	
-    i, 
-    j, 
-    l, 
-    k, 
-    scale, 
-    addScale = 0;
-
- 
-#ifndef GCC_VERSION
-#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
-#endif
-
-
-#if GCC_VERSION < 40500 && defined(__GNUC__)
-   __m256d
-    bitmask = _mm256_set_pd(0,0,0,-1);
-#else
-  __m256i
-    bitmask = _mm256_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
-#endif 
-  
-  switch(tipCase) 
-    {
-    case PLL_TIP_TIP: 
-      {
-       
-    PLL_ALIGN_BEGIN double
-	  umpX1[1840] PLL_ALIGN_END,
-	  umpX2[1840] PLL_ALIGN_END;
-
-	for(i = 0; i < 23; i++) 
-	  {
-	    v = &(tipVector[20 * i]);
-	    
-	    for(k = 0; k < 80; k++) 
-	      {
-		double 
-		  *ll =  &left[k * 20],
-		  *rr =  &right[k * 20];
-		
-		__m256d 
-		  umpX1v = _mm256_setzero_pd(),
-		  umpX2v = _mm256_setzero_pd();
-		
-		for(l = 0; l < 20; l+=4) 
-		  {
-		    __m256d vv = _mm256_load_pd(&v[l]);
-#ifdef _FMA
-		    __m256d llv = _mm256_load_pd(&ll[l]);
-		    umpX1v = FMAMACC(umpX1v,vv,llv);
-		    __m256d rrv = _mm256_load_pd(&rr[l]);
-		    umpX2v = FMAMACC(umpX2v,vv,rrv);
-#else		    
-		    umpX1v = _mm256_add_pd(umpX1v,_mm256_mul_pd(vv,_mm256_load_pd(&ll[l])));
-		    umpX2v = _mm256_add_pd(umpX2v,_mm256_mul_pd(vv,_mm256_load_pd(&rr[l])));
-#endif
-		  }
-		
-		umpX1v = hadd3(umpX1v);
-		umpX2v = hadd3(umpX2v);
-		_mm256_maskstore_pd(&umpX1[80 * i + k], bitmask, umpX1v);
-		_mm256_maskstore_pd(&umpX2[80 * i + k], bitmask, umpX2v);
-	      } 
-	  }
-
-	for(i = 0; i < n; i++) 
-	  {	    
-	    uX1 = &umpX1[80 * tipX1[i]];
-	    uX2 = &umpX2[80 * tipX2[i]];
-	   
-	    for(j = 0; j < 4; j++) 
-	      {     	
-		__m256d vv[5];  
-
-		v = &x3[i * 80 + j * 20];
-			
-		vv[0] = _mm256_setzero_pd();
-		vv[1] = _mm256_setzero_pd();
-		vv[2] = _mm256_setzero_pd();
-		vv[3] = _mm256_setzero_pd();
-		vv[4] = _mm256_setzero_pd();
-
-		for(k = 0; k < 20; k++) 
-		  {			 
-		    x1px2 = uX1[j * 20 + k] * uX2[j * 20 + k];
-
-		    __m256d x1px2v = _mm256_set1_pd(x1px2);		    
-		    
-		    __m256d extEvv = _mm256_load_pd(&extEV[20 * k]);
-#ifdef _FMA
-		    vv[0] = FMAMACC(vv[0],x1px2v,extEvv);
-#else
-		    vv[0] = _mm256_add_pd(vv[0],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-		    _mm256_store_pd(&v[0],vv[0]);
-		    
-		    extEvv = _mm256_load_pd(&extEV[20 * k + 4]);
-#ifdef _FMA
-		    vv[1] = FMAMACC(vv[1],x1px2v,extEvv);
-#else
-		    vv[1] = _mm256_add_pd(vv[1],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-		    _mm256_store_pd(&v[4],vv[1]);
-
-		    extEvv = _mm256_load_pd(&extEV[20 * k + 8]);
-#ifdef _FMA
-		    vv[2] = FMAMACC(vv[2],x1px2v,extEvv);
-#else
-		    vv[2] = _mm256_add_pd(vv[2],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-		    _mm256_store_pd(&v[8],vv[2]);
-
-		    extEvv = _mm256_load_pd(&extEV[20 * k + 12]);
-#ifdef _FMA
-		    vv[3] = FMAMACC(vv[3],x1px2v,extEvv);
-#else
-		    vv[3] = _mm256_add_pd(vv[3],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-		    _mm256_store_pd(&v[12],vv[3]);
-
-		    extEvv = _mm256_load_pd(&extEV[20 * k + 16]);
-#ifdef _FMA
-		    vv[4] = FMAMACC(vv[4],x1px2v,extEvv);
-#else
-		    vv[4] = _mm256_add_pd(vv[4],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-		    _mm256_store_pd(&v[16],vv[4]);
-		  } 
-	      } 
-	  } 
-      } 
-      break;
-    case PLL_TIP_INNER: 
-      {
-
-    	  PLL_ALIGN_BEGIN double
-	  umpX1[1840] PLL_ALIGN_END,
-	  ump_x2[20] PLL_ALIGN_END;
-
-	for(i = 0; i < 23; i++) 
-	  {
-	    v = &(tipVector[20 * i]);
-
-	    for(k = 0; k < 80; k++) 
-	      {
-		__m256d umpX1v = _mm256_setzero_pd();
-		for(l = 0; l < 20; l+=4) 
-		  {
-		    __m256d vv = _mm256_load_pd(&v[l]);
-		    __m256d leftv = _mm256_load_pd(&left[k * 20 + l]);
-#ifdef _FMA
-		   
-		    umpX1v = FMAMACC(umpX1v, vv, leftv);
-#else
-		    umpX1v = _mm256_add_pd(umpX1v, _mm256_mul_pd(vv, leftv));
-#endif
-		  }
-		umpX1v = hadd3(umpX1v);
-		_mm256_maskstore_pd(&umpX1[80 * i + k], bitmask, umpX1v);
-	      } 
-	  }
-	
-	for (i = 0; i < n; i++) 
-	  {	   
-	    uX1 = &umpX1[80 * tipX1[i]];
-	   	    
-	    for(k = 0; k < 4; k++) 
-	      {
-		v = &(x2[80 * i + k * 20]);
-		
-		for(l = 0; l < 20; l++) 
-		  {
-		    __m256d ump_x2v = _mm256_setzero_pd();
-		    		  
-		    __m256d vv = _mm256_load_pd(&v[0]);
-		    __m256d rightv = _mm256_load_pd(&right[k*400+l*20+0]);
-#ifdef _FMA
-		    ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-		    ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-		    
-		    vv = _mm256_load_pd(&v[4]);
-		    rightv = _mm256_load_pd(&right[k*400+l*20+4]);
-#ifdef _FMA
-		    ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-		    ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-
-		    vv = _mm256_load_pd(&v[8]);
-		    rightv = _mm256_load_pd(&right[k*400+l*20+8]);
-#ifdef _FMA
-		    ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-		    ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-
-		    vv = _mm256_load_pd(&v[12]);
-		    rightv = _mm256_load_pd(&right[k*400+l*20+12]);
-#ifdef _FMA
-		    ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-		    ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-
-		    vv = _mm256_load_pd(&v[16]);
-		    rightv = _mm256_load_pd(&right[k*400+l*20+16]);
-#ifdef _FMA
-		    ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-		    ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-		    
-		    ump_x2v = hadd3(ump_x2v);
-		    _mm256_maskstore_pd(&ump_x2[l], bitmask, ump_x2v);
-		  }
-		
-		v = &(x3[80 * i + 20 * k]);
-	
-
-		__m256d vv[5]; 
-
-		vv[0] = _mm256_setzero_pd();
-		vv[1] = _mm256_setzero_pd();
-		vv[2] = _mm256_setzero_pd();
-		vv[3] = _mm256_setzero_pd();
-		vv[4] = _mm256_setzero_pd();
-		
-		for(l = 0; l < 20; l++) 
-		  {
-		    x1px2 = uX1[k * 20 + l]	* ump_x2[l];
-		    __m256d x1px2v = _mm256_set1_pd(x1px2);	
-	    		 
-#ifdef _FMA
-		    __m256d ev = _mm256_load_pd(&extEV[l * 20 + 0]);
-		    vv[0] = FMAMACC(vv[0],x1px2v, ev);
-#else
-		    vv[0] = _mm256_add_pd(vv[0],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[l * 20 + 0])));
-#endif
-		    _mm256_store_pd(&v[0],vv[0]);
-
-#ifdef _FMA
-		    ev = _mm256_load_pd(&extEV[l * 20 + 4]);
-		    vv[1] = FMAMACC(vv[1],x1px2v, ev);
-#else
-		    vv[1] = _mm256_add_pd(vv[1],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[l * 20 + 4])));
-#endif
-		    _mm256_store_pd(&v[4],vv[1]);
-
-#ifdef _FMA
-		    ev = _mm256_load_pd(&extEV[l * 20 + 8]);
-		    vv[2] = FMAMACC(vv[2],x1px2v, ev);
-#else
-		    vv[2] = _mm256_add_pd(vv[2],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[l * 20 + 8])));
-#endif
-		    _mm256_store_pd(&v[8],vv[2]);
-		    
-#ifdef _FMA
-		    ev = _mm256_load_pd(&extEV[l * 20 + 12]);
-		    vv[3] = FMAMACC(vv[3],x1px2v, ev);
-#else
-		    vv[3] = _mm256_add_pd(vv[3],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[l * 20 + 12])));
-#endif
-		    _mm256_store_pd(&v[12],vv[3]);
-
-
-#ifdef _FMA
-		    ev = _mm256_load_pd(&extEV[l * 20 + 16]);
-		    vv[4] = FMAMACC(vv[4],x1px2v, ev);
-#else
-		    vv[4] = _mm256_add_pd(vv[4],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[l * 20 + 16])));
-#endif
-		    _mm256_store_pd(&v[16],vv[4]);
-
-		  } 
-	      }
-	   
-	    v = &x3[80 * i];
-	    __m256d minlikelihood_avx = _mm256_set1_pd(PLL_MINLIKELIHOOD);
-	    scale = 1;
-	    for(l = 0; scale && (l < 80); l += 4) 
-	      {
-		__m256d vv = _mm256_load_pd(&v[l]);
-		__m256d vv_abs = _mm256_and_pd(vv,absMask_AVX.m);
-		vv_abs = _mm256_cmp_pd(vv_abs,minlikelihood_avx,_CMP_LT_OS);
-		if(_mm256_movemask_pd(vv_abs) != 15)
-		  scale = 0;
-	      }
-	    
-	    if(scale) 
-	      {		
-		__m256d PLL_TWOTOTHE256v = _mm256_set_pd(PLL_TWOTOTHE256,PLL_TWOTOTHE256,PLL_TWOTOTHE256,PLL_TWOTOTHE256);
-		for(l = 0; l < 80; l += 4) 
-		  {
-		    __m256d vv = _mm256_load_pd(&v[l]);
-		    _mm256_store_pd(&v[l],_mm256_mul_pd(vv,PLL_TWOTOTHE256v));
-		  }
-		if(useFastScaling)
-		  addScale += wgt[i];				
-		else
-		  ex3[i] += 1;
-	      } 
-	  } 
-      } 
-      break;
-    case PLL_INNER_INNER:      
-      for(i = 0; i < n; i++) 
-	{ 
-	  scale = 1;
-	  
-	  for(k = 0; k < 4; k++) 
-	    {
-	      vl = &(x1[80 * i + 20 * k]);
-	      vr = &(x2[80 * i + 20 * k]);
-	      v  = &(x3[80 * i + 20 * k]);	      	   
-
-	      __m256d vv[5]; 
-	      
-	      vv[0] = _mm256_setzero_pd();
-	      vv[1] = _mm256_setzero_pd();
-	      vv[2] = _mm256_setzero_pd();
-	      vv[3] = _mm256_setzero_pd();
-	      vv[4] = _mm256_setzero_pd();
-	      
-	      for(l = 0; l < 20; l++) 
-		{		  
-		  __m256d al = _mm256_setzero_pd();
-		  __m256d ar = _mm256_setzero_pd();
-       		  
-		  __m256d leftv  = _mm256_load_pd(&left[k * 400 + l * 20 + 0]);
-		  __m256d rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 0]);
-		  __m256d vlv = _mm256_load_pd(&vl[0]);
-		  __m256d vrv = _mm256_load_pd(&vr[0]);
-		  
-#ifdef _FMA
-		    
-		  al = FMAMACC(al, vlv, leftv);
-		  ar = FMAMACC(ar, vrv, rightv);
-#else
-		  al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-		  ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));		  
-#endif
-
-		  leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 4]);
-		  rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 4]);
-		  vlv = _mm256_load_pd(&vl[4]);
-		  vrv = _mm256_load_pd(&vr[4]);
-#ifdef _FMA
-		    
-		  al = FMAMACC(al, vlv, leftv);
-		  ar = FMAMACC(ar, vrv, rightv);
-#else
-		  al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-		  ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
-#endif
-
-		  leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 8]);
-		  rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 8]);
-		  vlv = _mm256_load_pd(&vl[8]);
-		  vrv = _mm256_load_pd(&vr[8]);
-#ifdef _FMA
-		    
-		  al = FMAMACC(al, vlv, leftv);
-		  ar = FMAMACC(ar, vrv, rightv);
-#else
-		  al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-		  ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
-#endif
-
-		  leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 12]);
-		  rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 12]);
-		  vlv = _mm256_load_pd(&vl[12]);
-		  vrv = _mm256_load_pd(&vr[12]);
-#ifdef _FMA
-		    
-		  al = FMAMACC(al, vlv, leftv);
-		  ar = FMAMACC(ar, vrv, rightv);
-#else
-		  al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-		  ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
-#endif
-
-		  leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 16]);
-		  rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 16]);
-		  vlv = _mm256_load_pd(&vl[16]);
-		  vrv = _mm256_load_pd(&vr[16]);
-
-#ifdef _FMA		    
-		  al = FMAMACC(al, vlv, leftv);
-		  ar = FMAMACC(ar, vrv, rightv);
-#else
-		  al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-		  ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
-#endif
-
-		  /**************************************************************************************************************/
-
-		  al = hadd3(al);
-		  ar = hadd3(ar);
-		  al = _mm256_mul_pd(ar,al);
-		  
-		  /************************************************************************************************************/
-#ifdef _FMA		    
-		  __m256d ev =  _mm256_load_pd(&extEV[20 * l + 0]);
-		  vv[0] = FMAMACC(vv[0], al, ev);		 
-#else
-		  vv[0] = _mm256_add_pd(vv[0],_mm256_mul_pd(al, _mm256_load_pd(&extEV[20 * l + 0])));			  		 		  
-#endif
-		  _mm256_store_pd(&v[0],vv[0]);
-
-#ifdef _FMA		    
-		  ev =  _mm256_load_pd(&extEV[20 * l + 4]);
-		  vv[1] = FMAMACC(vv[1], al, ev);		 
-#else
-		  vv[1] = _mm256_add_pd(vv[1],_mm256_mul_pd(al, _mm256_load_pd(&extEV[20 * l + 4])));		  		 
-#endif
-		  _mm256_store_pd(&v[4],vv[1]);
-
-#ifdef _FMA		    
-		  ev =  _mm256_load_pd(&extEV[20 * l + 8]);
-		  vv[2] = FMAMACC(vv[2], al, ev);		 
-#else
-		  vv[2] = _mm256_add_pd(vv[2],_mm256_mul_pd(al, _mm256_load_pd(&extEV[20 * l + 8])));		  		 
-#endif
-		  _mm256_store_pd(&v[8],vv[2]);
-
-#ifdef _FMA		    
-		  ev =  _mm256_load_pd(&extEV[20 * l + 12]);
-		  vv[3] = FMAMACC(vv[3], al, ev);		 
-#else
-		  vv[3] = _mm256_add_pd(vv[3],_mm256_mul_pd(al, _mm256_load_pd(&extEV[20 * l + 12])));		  		 
-#endif
-		  _mm256_store_pd(&v[12],vv[3]);
-
-#ifdef _FMA		    
-		  ev =  _mm256_load_pd(&extEV[20 * l + 16]);
-		  vv[4] = FMAMACC(vv[4], al, ev);		 
-#else
-		  vv[4] = _mm256_add_pd(vv[4],_mm256_mul_pd(al, _mm256_load_pd(&extEV[20 * l + 16])));			 	  
-#endif
-		  _mm256_store_pd(&v[16],vv[4]);		 
-		} 
-	    }
-	  v = &(x3[80 * i]);
-	  scale = 1;
-	  __m256d minlikelihood_avx = _mm256_set1_pd(PLL_MINLIKELIHOOD);	 
-
-	  for(l = 0; scale && (l < 80); l += 4) 
-	    {
-	      __m256d vv = _mm256_load_pd(&v[l]);
-	      __m256d vv_abs = _mm256_and_pd(vv,absMask_AVX.m);
-	      vv_abs = _mm256_cmp_pd(vv_abs,minlikelihood_avx,_CMP_LT_OS);
-	      if(_mm256_movemask_pd(vv_abs) != 15)
-		scale = 0;	     
-	    }
-
-	  if(scale) 
-	    {		     	      
-	      __m256d PLL_TWOTOTHE256v = _mm256_set_pd(PLL_TWOTOTHE256,PLL_TWOTOTHE256,PLL_TWOTOTHE256,PLL_TWOTOTHE256);
-	      for(l = 0; l < 80; l += 4) 
-		{
-		  __m256d vv = _mm256_load_pd(&v[l]);
-		  _mm256_store_pd(&v[l],_mm256_mul_pd(vv,PLL_TWOTOTHE256v));
-		}
-	      if(useFastScaling)
-		addScale += wgt[i];					
-	      else
-		ex3[i] += 1;
-	    } 
-	}
-      break;
-    default:
-      assert(0);
-    }
- 
-  if(useFastScaling)
-    *scalerIncrement = addScale;
-}
-
-
-
-void newviewGTRGAMMAPROT_AVX_GAPPED_SAVE(int tipCase,
-					 double *x1_start, double *x2_start, double *x3_start, double *extEV, double *tipVector,
-					 int *ex3, unsigned char *tipX1, unsigned char *tipX2, int n, 
-					 double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling,
-					 unsigned int *x1_gap, unsigned int *x2_gap, unsigned int *x3_gap, 
-					 double *x1_gapColumn, double *x2_gapColumn, double *x3_gapColumn) 
-{
-  double	
-    *x1 = x1_start,
-    *x2 = x2_start,
-    *x3_ptr = x3_start,
-    *x2_ptr = x2_start,
-    *x1_ptr = x1_start,
-    *uX1, 
-    *uX2, 
-    *v, 
-    x1px2, 
-    *vl, 
-    *vr;
-  
-  int	
-    i, 
-    j, 
-    l, 
-    k, 
-    gapScaling = 0,
-    scale, 
-    addScale = 0;
-
- 
-#ifndef GCC_VERSION
-#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
-#endif
-
-
-#if GCC_VERSION < 40500 && defined(__GNUC__)
-   __m256d
-    bitmask = _mm256_set_pd(0,0,0,-1);
-#else
-  __m256i
-    bitmask = _mm256_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
-#endif 
-  
-  switch(tipCase) 
-    {
-    case PLL_TIP_TIP: 
-      {       
-    	  PLL_ALIGN_BEGIN double
-	  umpX1[1840] PLL_ALIGN_END,
-	  umpX2[1840] PLL_ALIGN_END;
-
-
-
-	for(i = 0; i < 23; i++) 
-	  {
-	    v = &(tipVector[20 * i]);
-	    
-	    for(k = 0; k < 80; k++) 
-	      {
-		double 
-		  *ll =  &left[k * 20],
-		  *rr =  &right[k * 20];
-		
-		__m256d 
-		  umpX1v = _mm256_setzero_pd(),
-		  umpX2v = _mm256_setzero_pd();
-		
-		for(l = 0; l < 20; l+=4) 
-		  {
-		    __m256d vv = _mm256_load_pd(&v[l]);
-#ifdef _FMA
-		    __m256d llv = _mm256_load_pd(&ll[l]);
-		    umpX1v = FMAMACC(umpX1v,vv,llv);
-		    __m256d rrv = _mm256_load_pd(&rr[l]);
-		    umpX2v = FMAMACC(umpX2v,vv,rrv);
-#else		    
-		    umpX1v = _mm256_add_pd(umpX1v,_mm256_mul_pd(vv,_mm256_load_pd(&ll[l])));
-		    umpX2v = _mm256_add_pd(umpX2v,_mm256_mul_pd(vv,_mm256_load_pd(&rr[l])));
-#endif
-		  }
-		
-		umpX1v = hadd3(umpX1v);
-		umpX2v = hadd3(umpX2v);
-		_mm256_maskstore_pd(&umpX1[80 * i + k], bitmask, umpX1v);
-		_mm256_maskstore_pd(&umpX2[80 * i + k], bitmask, umpX2v);
-	      } 
-	  }
-
-	
-	{	    
-	  uX1 = &umpX1[1760];
-	  uX2 = &umpX2[1760];
-	  
-	  for(j = 0; j < 4; j++) 
-	    {     	
-	      __m256d vv[5];  
-	      
-	      v = &x3_gapColumn[j * 20];
-	      
-	      vv[0] = _mm256_setzero_pd();
-	      vv[1] = _mm256_setzero_pd();
-	      vv[2] = _mm256_setzero_pd();
-	      vv[3] = _mm256_setzero_pd();
-	      vv[4] = _mm256_setzero_pd();
-	      
-	      for(k = 0; k < 20; k++) 
-		{			 
-		  x1px2 = uX1[j * 20 + k] * uX2[j * 20 + k];
-		  
-		  __m256d x1px2v = _mm256_set1_pd(x1px2);		    
-		  
-		  __m256d extEvv = _mm256_load_pd(&extEV[20 * k]);
-#ifdef _FMA
-		  vv[0] = FMAMACC(vv[0],x1px2v,extEvv);
-#else
-		  vv[0] = _mm256_add_pd(vv[0],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-		  _mm256_store_pd(&v[0],vv[0]);
-		  
-		  extEvv = _mm256_load_pd(&extEV[20 * k + 4]);
-#ifdef _FMA
-		  vv[1] = FMAMACC(vv[1],x1px2v,extEvv);
-#else
-		  vv[1] = _mm256_add_pd(vv[1],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-		  _mm256_store_pd(&v[4],vv[1]);
-		  
-		  extEvv = _mm256_load_pd(&extEV[20 * k + 8]);
-#ifdef _FMA
-		  vv[2] = FMAMACC(vv[2],x1px2v,extEvv);
-#else
-		  vv[2] = _mm256_add_pd(vv[2],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-		  _mm256_store_pd(&v[8],vv[2]);
-		  
-		  extEvv = _mm256_load_pd(&extEV[20 * k + 12]);
-#ifdef _FMA
-		  vv[3] = FMAMACC(vv[3],x1px2v,extEvv);
-#else
-		  vv[3] = _mm256_add_pd(vv[3],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-		  _mm256_store_pd(&v[12],vv[3]);
-		  
-		  extEvv = _mm256_load_pd(&extEV[20 * k + 16]);
-#ifdef _FMA
-		  vv[4] = FMAMACC(vv[4],x1px2v,extEvv);
-#else
-		  vv[4] = _mm256_add_pd(vv[4],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-		  _mm256_store_pd(&v[16],vv[4]);
-		} 
-	    } 
-	}
-
-	
-	for(i = 0; i < n; i++) 
-	  {
-	    if(!(x3_gap[i / 32] & mask32[i % 32]))
-	      {	    
-		uX1 = &umpX1[80 * tipX1[i]];
-		uX2 = &umpX2[80 * tipX2[i]];
-	   
-		for(j = 0; j < 4; j++) 
-		  {     	
-		    __m256d vv[5];  
-		    
-		    v = &x3_ptr[j * 20];
-			
-		    vv[0] = _mm256_setzero_pd();
-		    vv[1] = _mm256_setzero_pd();
-		    vv[2] = _mm256_setzero_pd();
-		    vv[3] = _mm256_setzero_pd();
-		    vv[4] = _mm256_setzero_pd();
-
-		    for(k = 0; k < 20; k++) 
-		      {			 
-			x1px2 = uX1[j * 20 + k] * uX2[j * 20 + k];
-			
-			__m256d x1px2v = _mm256_set1_pd(x1px2);		    
-			
-			__m256d extEvv = _mm256_load_pd(&extEV[20 * k]);
-#ifdef _FMA
-			vv[0] = FMAMACC(vv[0],x1px2v,extEvv);
-#else
-			vv[0] = _mm256_add_pd(vv[0],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-			_mm256_store_pd(&v[0],vv[0]);
-			
-			extEvv = _mm256_load_pd(&extEV[20 * k + 4]);
-#ifdef _FMA
-			vv[1] = FMAMACC(vv[1],x1px2v,extEvv);
-#else
-			vv[1] = _mm256_add_pd(vv[1],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-			_mm256_store_pd(&v[4],vv[1]);
-			
-			extEvv = _mm256_load_pd(&extEV[20 * k + 8]);
-#ifdef _FMA
-			vv[2] = FMAMACC(vv[2],x1px2v,extEvv);
-#else
-			vv[2] = _mm256_add_pd(vv[2],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-			_mm256_store_pd(&v[8],vv[2]);
-			
-			extEvv = _mm256_load_pd(&extEV[20 * k + 12]);
-#ifdef _FMA
-			vv[3] = FMAMACC(vv[3],x1px2v,extEvv);
-#else
-			vv[3] = _mm256_add_pd(vv[3],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-			_mm256_store_pd(&v[12],vv[3]);
-			
-			extEvv = _mm256_load_pd(&extEV[20 * k + 16]);
-#ifdef _FMA
-			vv[4] = FMAMACC(vv[4],x1px2v,extEvv);
-#else
-			vv[4] = _mm256_add_pd(vv[4],_mm256_mul_pd(x1px2v,extEvv));
-#endif
-			_mm256_store_pd(&v[16],vv[4]);
-		      } 
-		  }
-		x3_ptr += 80;		  
-	      }
-	  }
-      }
-      break;
-    case PLL_TIP_INNER: 
-      {
-    	  PLL_ALIGN_BEGIN double
-	  umpX1[1840] PLL_ALIGN_END,
-	  ump_x2[20] PLL_ALIGN_END;
-
-
-
-	for(i = 0; i < 23; i++) 
-	  {
-	    v = &(tipVector[20 * i]);
-
-	    for(k = 0; k < 80; k++) 
-	      {
-		__m256d umpX1v = _mm256_setzero_pd();
-		for(l = 0; l < 20; l+=4) 
-		  {
-		    __m256d vv = _mm256_load_pd(&v[l]);
-		    __m256d leftv = _mm256_load_pd(&left[k * 20 + l]);
-#ifdef _FMA
-		   
-		    umpX1v = FMAMACC(umpX1v, vv, leftv);
-#else
-		    umpX1v = _mm256_add_pd(umpX1v, _mm256_mul_pd(vv, leftv));
-#endif
-		  }
-		umpX1v = hadd3(umpX1v);
-		_mm256_maskstore_pd(&umpX1[80 * i + k], bitmask, umpX1v);
-	      } 
-	  }
-
-	{	   
-	  uX1 = &umpX1[1760];
-	   	    
-	  for(k = 0; k < 4; k++) 
-	    {
-	      v = &(x2_gapColumn[k * 20]);
-		
-		for(l = 0; l < 20; l++) 
-		  {
-		    __m256d ump_x2v = _mm256_setzero_pd();
-		    		  
-		    __m256d vv = _mm256_load_pd(&v[0]);
-		    __m256d rightv = _mm256_load_pd(&right[k*400+l*20+0]);
-#ifdef _FMA
-		    ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-		    ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-		    
-		    vv = _mm256_load_pd(&v[4]);
-		    rightv = _mm256_load_pd(&right[k*400+l*20+4]);
-#ifdef _FMA
-		    ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-		    ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-
-		    vv = _mm256_load_pd(&v[8]);
-		    rightv = _mm256_load_pd(&right[k*400+l*20+8]);
-#ifdef _FMA
-		    ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-		    ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-
-		    vv = _mm256_load_pd(&v[12]);
-		    rightv = _mm256_load_pd(&right[k*400+l*20+12]);
-#ifdef _FMA
-		    ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-		    ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-
-		    vv = _mm256_load_pd(&v[16]);
-		    rightv = _mm256_load_pd(&right[k*400+l*20+16]);
-#ifdef _FMA
-		    ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-		    ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-		    
-		    ump_x2v = hadd3(ump_x2v);
-		    _mm256_maskstore_pd(&ump_x2[l], bitmask, ump_x2v);
-		  }
-		
-		v = &x3_gapColumn[20 * k];
-	
-		__m256d vv[5]; 
-
-		vv[0] = _mm256_setzero_pd();
-		vv[1] = _mm256_setzero_pd();
-		vv[2] = _mm256_setzero_pd();
-		vv[3] = _mm256_setzero_pd();
-		vv[4] = _mm256_setzero_pd();
-		
-		for(l = 0; l < 20; l++) 
-		  {
-		    x1px2 = uX1[k * 20 + l]	* ump_x2[l];
-		    __m256d x1px2v = _mm256_set1_pd(x1px2);	
-	    		 
-#ifdef _FMA
-		    __m256d ev = _mm256_load_pd(&extEV[l * 20 + 0]);
-		    vv[0] = FMAMACC(vv[0],x1px2v, ev);
-#else
-		    vv[0] = _mm256_add_pd(vv[0],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[l * 20 + 0])));
-#endif
-		    _mm256_store_pd(&v[0],vv[0]);
-
-#ifdef _FMA
-		    ev = _mm256_load_pd(&extEV[l * 20 + 4]);
-		    vv[1] = FMAMACC(vv[1],x1px2v, ev);
-#else
-		    vv[1] = _mm256_add_pd(vv[1],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[l * 20 + 4])));
-#endif
-		    _mm256_store_pd(&v[4],vv[1]);
-
-#ifdef _FMA
-		    ev = _mm256_load_pd(&extEV[l * 20 + 8]);
-		    vv[2] = FMAMACC(vv[2],x1px2v, ev);
-#else
-		    vv[2] = _mm256_add_pd(vv[2],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[l * 20 + 8])));
-#endif
-		    _mm256_store_pd(&v[8],vv[2]);
-		    
-#ifdef _FMA
-		    ev = _mm256_load_pd(&extEV[l * 20 + 12]);
-		    vv[3] = FMAMACC(vv[3],x1px2v, ev);
-#else
-		    vv[3] = _mm256_add_pd(vv[3],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[l * 20 + 12])));
-#endif
-		    _mm256_store_pd(&v[12],vv[3]);
-
-
-#ifdef _FMA
-		    ev = _mm256_load_pd(&extEV[l * 20 + 16]);
-		    vv[4] = FMAMACC(vv[4],x1px2v, ev);
-#else
-		    vv[4] = _mm256_add_pd(vv[4],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[l * 20 + 16])));
-#endif
-		    _mm256_store_pd(&v[16],vv[4]);
-
-		  } 
-	      }
-	   
-	    v = x3_gapColumn;
-	    __m256d minlikelihood_avx = _mm256_set1_pd(PLL_MINLIKELIHOOD);
-	    scale = 1;
-	    for(l = 0; scale && (l < 80); l += 4) 
-	      {
-		__m256d vv = _mm256_load_pd(&v[l]);
-		__m256d vv_abs = _mm256_and_pd(vv,absMask_AVX.m);
-		vv_abs = _mm256_cmp_pd(vv_abs,minlikelihood_avx,_CMP_LT_OS);
-		if(_mm256_movemask_pd(vv_abs) != 15)
-		  scale = 0;
-	      }
-	    
-	    if(scale) 
-	      {		
-		__m256d PLL_TWOTOTHE256v = _mm256_set_pd(PLL_TWOTOTHE256,PLL_TWOTOTHE256,PLL_TWOTOTHE256,PLL_TWOTOTHE256);
-		gapScaling = 1;
-
-		for(l = 0; l < 80; l += 4) 
-		  {
-		    __m256d vv = _mm256_load_pd(&v[l]);
-		    _mm256_store_pd(&v[l],_mm256_mul_pd(vv,PLL_TWOTOTHE256v));
-		  }	
-	      } 
-	}       
-	
-	for (i = 0; i < n; i++) 
-	  {	   
-	    if((x3_gap[i / 32] & mask32[i % 32]))
-	      {	       
-		if(gapScaling)
-		  {
-		    if(useFastScaling)
-		      addScale += wgt[i];
-		    else
-		      ex3[i]  += 1;
-		  }
-	      }
-	    else
-	      {		
-		uX1 = &umpX1[80 * tipX1[i]];
-		
-		if(x2_gap[i / 32] & mask32[i % 32])
-		  x2 = x2_gapColumn;
-		else
-		  {
-		    x2 = x2_ptr;
-		    x2_ptr += 80;
-		  }	      
-	    
-		for(k = 0; k < 4; k++) 
-		  {
-		    v = &(x2[k * 20]);
-		    
-		    for(l = 0; l < 20; l++) 
-		      {
-			__m256d ump_x2v = _mm256_setzero_pd();
-		    	
-			__m256d vv = _mm256_load_pd(&v[0]);
-			__m256d rightv = _mm256_load_pd(&right[k*400+l*20+0]);
-#ifdef _FMA
-			ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-			ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-			
-			vv = _mm256_load_pd(&v[4]);
-			rightv = _mm256_load_pd(&right[k*400+l*20+4]);
-#ifdef _FMA
-			ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-			ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-			
-			vv = _mm256_load_pd(&v[8]);
-			rightv = _mm256_load_pd(&right[k*400+l*20+8]);
-#ifdef _FMA
-			ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-			ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-			
-			vv = _mm256_load_pd(&v[12]);
-			rightv = _mm256_load_pd(&right[k*400+l*20+12]);
-#ifdef _FMA
-			ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-			ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-			
-			vv = _mm256_load_pd(&v[16]);
-			rightv = _mm256_load_pd(&right[k*400+l*20+16]);
-#ifdef _FMA
-			ump_x2v = FMAMACC(ump_x2v,vv,rightv);
-#else
-			ump_x2v = _mm256_add_pd(ump_x2v, _mm256_mul_pd(vv, rightv));
-#endif
-			
-			ump_x2v = hadd3(ump_x2v);
-			_mm256_maskstore_pd(&ump_x2[l], bitmask, ump_x2v);
-		      }
-		  
-		    
-		    v = &x3_ptr[k * 20];
-		    
-		    __m256d vv[5]; 
-		    
-		    vv[0] = _mm256_setzero_pd();
-		    vv[1] = _mm256_setzero_pd();
-		    vv[2] = _mm256_setzero_pd();
-		    vv[3] = _mm256_setzero_pd();
-		    vv[4] = _mm256_setzero_pd();
-		    
-		    for(l = 0; l < 20; l++) 
-		      {
-			x1px2 = uX1[k * 20 + l]	* ump_x2[l];
-			__m256d x1px2v = _mm256_set1_pd(x1px2);	
-			
-#ifdef _FMA
-			__m256d ev = _mm256_load_pd(&extEV[l * 20 + 0]);
-			vv[0] = FMAMACC(vv[0],x1px2v, ev);
-#else
-			vv[0] = _mm256_add_pd(vv[0],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[l * 20 + 0])));
-#endif
-			_mm256_store_pd(&v[0],vv[0]);
-			
-#ifdef _FMA
-			ev = _mm256_load_pd(&extEV[l * 20 + 4]);
-			vv[1] = FMAMACC(vv[1],x1px2v, ev);
-#else
-			vv[1] = _mm256_add_pd(vv[1],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[l * 20 + 4])));
-#endif
-			_mm256_store_pd(&v[4],vv[1]);
-			
-#ifdef _FMA
-			ev = _mm256_load_pd(&extEV[l * 20 + 8]);
-			vv[2] = FMAMACC(vv[2],x1px2v, ev);
-#else
-			vv[2] = _mm256_add_pd(vv[2],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[l * 20 + 8])));
-#endif
-			_mm256_store_pd(&v[8],vv[2]);
-			
-#ifdef _FMA
-			ev = _mm256_load_pd(&extEV[l * 20 + 12]);
-			vv[3] = FMAMACC(vv[3],x1px2v, ev);
-#else
-			vv[3] = _mm256_add_pd(vv[3],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[l * 20 + 12])));
-#endif
-			_mm256_store_pd(&v[12],vv[3]);
-			
-			
-#ifdef _FMA
-			ev = _mm256_load_pd(&extEV[l * 20 + 16]);
-			vv[4] = FMAMACC(vv[4],x1px2v, ev);
-#else
-			vv[4] = _mm256_add_pd(vv[4],_mm256_mul_pd(x1px2v, _mm256_load_pd(&extEV[l * 20 + 16])));
-#endif
-			_mm256_store_pd(&v[16],vv[4]);
-			
-		      } 
-		  }
-		
-		v = x3_ptr;
-		__m256d minlikelihood_avx = _mm256_set1_pd(PLL_MINLIKELIHOOD);
-		scale = 1;
-		for(l = 0; scale && (l < 80); l += 4) 
-		  {
-		    __m256d vv = _mm256_load_pd(&v[l]);
-		    __m256d vv_abs = _mm256_and_pd(vv,absMask_AVX.m);
-		    vv_abs = _mm256_cmp_pd(vv_abs,minlikelihood_avx,_CMP_LT_OS);
-		    if(_mm256_movemask_pd(vv_abs) != 15)
-		      scale = 0;
-		  }
-	    
-		if(scale) 
-		  {		
-		    __m256d PLL_TWOTOTHE256v = _mm256_set_pd(PLL_TWOTOTHE256,PLL_TWOTOTHE256,PLL_TWOTOTHE256,PLL_TWOTOTHE256);
-		    for(l = 0; l < 80; l += 4) 
-		      {
-			__m256d vv = _mm256_load_pd(&v[l]);
-			_mm256_store_pd(&v[l],_mm256_mul_pd(vv,PLL_TWOTOTHE256v));
-		      }
-		    if(useFastScaling)
-		      addScale += wgt[i];				
-		    else
-		      ex3[i] += 1;
-		  }	      
-		x3_ptr += 80;
-	      }
-	  }
-      }
-      break;
-    case PLL_INNER_INNER:    	  
-      for(k = 0; k < 4; k++) 
-	{
-	  vl = &(x1_gapColumn[20 * k]);
-	  vr = &(x2_gapColumn[20 * k]);
-	  v  = &(x3_gapColumn[20 * k]);	      	   
-
-	  __m256d vv[5]; 
-	  
-	  vv[0] = _mm256_setzero_pd();
-	  vv[1] = _mm256_setzero_pd();
-	  vv[2] = _mm256_setzero_pd();
-	  vv[3] = _mm256_setzero_pd();
-	  vv[4] = _mm256_setzero_pd();
-	  
-	  for(l = 0; l < 20; l++) 
-	    {		  
-	      __m256d al = _mm256_setzero_pd();
-	      __m256d ar = _mm256_setzero_pd();
-	      
-	      __m256d leftv  = _mm256_load_pd(&left[k * 400 + l * 20 + 0]);
-	      __m256d rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 0]);
-	      __m256d vlv = _mm256_load_pd(&vl[0]);
-	      __m256d vrv = _mm256_load_pd(&vr[0]);
-	      
-#ifdef _FMA
-	      
-	      al = FMAMACC(al, vlv, leftv);
-	      ar = FMAMACC(ar, vrv, rightv);
-#else
-	      al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-	      ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));		  
-#endif
-	      
-	      leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 4]);
-	      rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 4]);
-	      vlv = _mm256_load_pd(&vl[4]);
-	      vrv = _mm256_load_pd(&vr[4]);
-#ifdef _FMA
-	      
-	      al = FMAMACC(al, vlv, leftv);
-	      ar = FMAMACC(ar, vrv, rightv);
-#else
-	      al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-	      ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
-#endif
-	      
-	      leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 8]);
-	      rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 8]);
-	      vlv = _mm256_load_pd(&vl[8]);
-	      vrv = _mm256_load_pd(&vr[8]);
-#ifdef _FMA
-	      
-	      al = FMAMACC(al, vlv, leftv);
-	      ar = FMAMACC(ar, vrv, rightv);
-#else
-	      al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-	      ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
-#endif
-	      
-	      leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 12]);
-	      rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 12]);
-	      vlv = _mm256_load_pd(&vl[12]);
-	      vrv = _mm256_load_pd(&vr[12]);
-#ifdef _FMA
-	      
-	      al = FMAMACC(al, vlv, leftv);
-	      ar = FMAMACC(ar, vrv, rightv);
-#else
-	      al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-	      ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
-#endif
-	      
-	      leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 16]);
-	      rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 16]);
-	      vlv = _mm256_load_pd(&vl[16]);
-	      vrv = _mm256_load_pd(&vr[16]);
-	      
-#ifdef _FMA		    
-	      al = FMAMACC(al, vlv, leftv);
-	      ar = FMAMACC(ar, vrv, rightv);
-#else
-	      al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-	      ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
-#endif
-	      
-	      /**************************************************************************************************************/
-	      
-	      al = hadd3(al);
-	      ar = hadd3(ar);
-	      al = _mm256_mul_pd(ar,al);
-	      
-	      /************************************************************************************************************/
-#ifdef _FMA		    
-	      __m256d ev =  _mm256_load_pd(&extEV[20 * l + 0]);
-	      vv[0] = FMAMACC(vv[0], al, ev);		 
-#else
-	      vv[0] = _mm256_add_pd(vv[0],_mm256_mul_pd(al, _mm256_load_pd(&extEV[20 * l + 0])));			  		 		  
-#endif
-	      _mm256_store_pd(&v[0],vv[0]);
-	      
-#ifdef _FMA		    
-	      ev =  _mm256_load_pd(&extEV[20 * l + 4]);
-	      vv[1] = FMAMACC(vv[1], al, ev);		 
-#else
-	      vv[1] = _mm256_add_pd(vv[1],_mm256_mul_pd(al, _mm256_load_pd(&extEV[20 * l + 4])));		  		 
-#endif
-	      _mm256_store_pd(&v[4],vv[1]);
-	      
-#ifdef _FMA		    
-	      ev =  _mm256_load_pd(&extEV[20 * l + 8]);
-	      vv[2] = FMAMACC(vv[2], al, ev);		 
-#else
-	      vv[2] = _mm256_add_pd(vv[2],_mm256_mul_pd(al, _mm256_load_pd(&extEV[20 * l + 8])));		  		 
-#endif
-	      _mm256_store_pd(&v[8],vv[2]);
-	      
-#ifdef _FMA		    
-	      ev =  _mm256_load_pd(&extEV[20 * l + 12]);
-	      vv[3] = FMAMACC(vv[3], al, ev);		 
-#else
-	      vv[3] = _mm256_add_pd(vv[3],_mm256_mul_pd(al, _mm256_load_pd(&extEV[20 * l + 12])));		  		 
-#endif
-	      _mm256_store_pd(&v[12],vv[3]);
-	      
-#ifdef _FMA		    
-	      ev =  _mm256_load_pd(&extEV[20 * l + 16]);
-	      vv[4] = FMAMACC(vv[4], al, ev);		 
-#else
-	      vv[4] = _mm256_add_pd(vv[4],_mm256_mul_pd(al, _mm256_load_pd(&extEV[20 * l + 16])));			 	  
-#endif
-	      _mm256_store_pd(&v[16],vv[4]);		 
-	    } 
-	}
-	
-      v = x3_gapColumn;
-      scale = 1;
-      __m256d minlikelihood_avx = _mm256_set1_pd(PLL_MINLIKELIHOOD);	 
-      
-      for(l = 0; scale && (l < 80); l += 4) 
-	{
-	  __m256d vv = _mm256_load_pd(&v[l]);
-	  __m256d vv_abs = _mm256_and_pd(vv,absMask_AVX.m);
-	  vv_abs = _mm256_cmp_pd(vv_abs,minlikelihood_avx,_CMP_LT_OS);
-	  if(_mm256_movemask_pd(vv_abs) != 15)
-	    scale = 0;	     
-	}
-
-      if(scale) 
-	{		     	      
-	  __m256d PLL_TWOTOTHE256v = _mm256_set_pd(PLL_TWOTOTHE256,PLL_TWOTOTHE256,PLL_TWOTOTHE256,PLL_TWOTOTHE256);
-	  gapScaling = 1;
-
-	  for(l = 0; l < 80; l += 4) 
-	    {
-	      __m256d vv = _mm256_load_pd(&v[l]);
-	      _mm256_store_pd(&v[l],_mm256_mul_pd(vv,PLL_TWOTOTHE256v));
-	    }
-	  
-	} 
-   
-     
-
-      for(i = 0; i < n; i++) 
-	{   
-	  
-	  if(x3_gap[i / 32] & mask32[i % 32])
-	    {	     
-	      if(gapScaling)
-		{
-		  if(useFastScaling)
-		    addScale += wgt[i];
-		  else
-		    ex3[i]  += 1; 	       
-		}
-	    }
-	  else
-	    {
-	      if(x1_gap[i / 32] & mask32[i % 32])
-		x1 = x1_gapColumn;
-	      else
-		{
-		  x1 = x1_ptr;
-		  x1_ptr += 80;
-		}
-
-	      if(x2_gap[i / 32] & mask32[i % 32])
-		x2 = x2_gapColumn;
-	      else
-		{
-		  x2 = x2_ptr;
-		  x2_ptr += 80;
-		}	   
-	  
-	      for(k = 0; k < 4; k++) 
-		{
-		  vl = &(x1[20 * k]);
-		  vr = &(x2[20 * k]);
-		  v  = &(x3_ptr[20 * k]);	      	   
-		  
-		  __m256d vv[5]; 
-		  
-		  vv[0] = _mm256_setzero_pd();
-		  vv[1] = _mm256_setzero_pd();
-		  vv[2] = _mm256_setzero_pd();
-		  vv[3] = _mm256_setzero_pd();
-		  vv[4] = _mm256_setzero_pd();
-		  
-		  for(l = 0; l < 20; l++) 
-		    {		  
-		      __m256d al = _mm256_setzero_pd();
-		      __m256d ar = _mm256_setzero_pd();
-		      
-		      __m256d leftv  = _mm256_load_pd(&left[k * 400 + l * 20 + 0]);
-		      __m256d rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 0]);
-		      __m256d vlv = _mm256_load_pd(&vl[0]);
-		      __m256d vrv = _mm256_load_pd(&vr[0]);
-		      
-#ifdef _FMA
-		      
-		      al = FMAMACC(al, vlv, leftv);
-		      ar = FMAMACC(ar, vrv, rightv);
-#else
-		      al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-		      ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));		  
-#endif
-		      
-		      leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 4]);
-		      rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 4]);
-		      vlv = _mm256_load_pd(&vl[4]);
-		      vrv = _mm256_load_pd(&vr[4]);
-#ifdef _FMA
-		      
-		      al = FMAMACC(al, vlv, leftv);
-		      ar = FMAMACC(ar, vrv, rightv);
-#else
-		      al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-		      ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
-#endif
-		      
-		      leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 8]);
-		      rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 8]);
-		      vlv = _mm256_load_pd(&vl[8]);
-		      vrv = _mm256_load_pd(&vr[8]);
-#ifdef _FMA
-		      
-		      al = FMAMACC(al, vlv, leftv);
-		      ar = FMAMACC(ar, vrv, rightv);
-#else
-		      al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-		      ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
-#endif
-		      
-		      leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 12]);
-		      rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 12]);
-		      vlv = _mm256_load_pd(&vl[12]);
-		      vrv = _mm256_load_pd(&vr[12]);
-#ifdef _FMA
-		      
-		      al = FMAMACC(al, vlv, leftv);
-		      ar = FMAMACC(ar, vrv, rightv);
-#else
-		      al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-		      ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
-#endif
-		      
-		      leftv = _mm256_load_pd(&left[k * 400 + l * 20 + 16]);
-		      rightv = _mm256_load_pd(&right[k * 400 + l * 20 + 16]);
-		      vlv = _mm256_load_pd(&vl[16]);
-		      vrv = _mm256_load_pd(&vr[16]);
-		      
-#ifdef _FMA		    
-		      al = FMAMACC(al, vlv, leftv);
-		      ar = FMAMACC(ar, vrv, rightv);
-#else
-		      al = _mm256_add_pd(al,_mm256_mul_pd(vlv,leftv));
-		      ar = _mm256_add_pd(ar,_mm256_mul_pd(vrv,rightv));
-#endif
-		      
-		      /**************************************************************************************************************/
-		      
-		      al = hadd3(al);
-		      ar = hadd3(ar);
-		      al = _mm256_mul_pd(ar,al);
-		      
-		      /************************************************************************************************************/
-#ifdef _FMA		    
-		      __m256d ev =  _mm256_load_pd(&extEV[20 * l + 0]);
-		      vv[0] = FMAMACC(vv[0], al, ev);		 
-#else
-		      vv[0] = _mm256_add_pd(vv[0],_mm256_mul_pd(al, _mm256_load_pd(&extEV[20 * l + 0])));			  		 		  
-#endif
-		      _mm256_store_pd(&v[0],vv[0]);
-		      
-#ifdef _FMA		    
-		      ev =  _mm256_load_pd(&extEV[20 * l + 4]);
-		      vv[1] = FMAMACC(vv[1], al, ev);		 
-#else
-		      vv[1] = _mm256_add_pd(vv[1],_mm256_mul_pd(al, _mm256_load_pd(&extEV[20 * l + 4])));		  		 
-#endif
-		      _mm256_store_pd(&v[4],vv[1]);
-		      
-#ifdef _FMA		    
-		      ev =  _mm256_load_pd(&extEV[20 * l + 8]);
-		      vv[2] = FMAMACC(vv[2], al, ev);		 
-#else
-		      vv[2] = _mm256_add_pd(vv[2],_mm256_mul_pd(al, _mm256_load_pd(&extEV[20 * l + 8])));		  		 
-#endif
-		      _mm256_store_pd(&v[8],vv[2]);
-		      
-#ifdef _FMA		    
-		      ev =  _mm256_load_pd(&extEV[20 * l + 12]);
-		      vv[3] = FMAMACC(vv[3], al, ev);		 
-#else
-		      vv[3] = _mm256_add_pd(vv[3],_mm256_mul_pd(al, _mm256_load_pd(&extEV[20 * l + 12])));		  		 
-#endif
-		      _mm256_store_pd(&v[12],vv[3]);
-		      
-#ifdef _FMA		    
-		      ev =  _mm256_load_pd(&extEV[20 * l + 16]);
-		      vv[4] = FMAMACC(vv[4], al, ev);		 
-#else
-		      vv[4] = _mm256_add_pd(vv[4],_mm256_mul_pd(al, _mm256_load_pd(&extEV[20 * l + 16])));			 	  
-#endif
-		      _mm256_store_pd(&v[16],vv[4]);		 
-		    }
-		}
-	      
-	      v = x3_ptr;
-	      scale = 1;
-	      
-	      __m256d minlikelihood_avx = _mm256_set1_pd(PLL_MINLIKELIHOOD);	 
-	      
-	      for(l = 0; scale && (l < 80); l += 4) 
-		{
-		  __m256d vv = _mm256_load_pd(&v[l]);
-		  __m256d vv_abs = _mm256_and_pd(vv,absMask_AVX.m);
-		  vv_abs = _mm256_cmp_pd(vv_abs,minlikelihood_avx,_CMP_LT_OS);
-		  if(_mm256_movemask_pd(vv_abs) != 15)
-		    scale = 0;	     
-		}
-	      
-	      if(scale) 
-		{		     	      
-		  __m256d PLL_TWOTOTHE256v = _mm256_set_pd(PLL_TWOTOTHE256,PLL_TWOTOTHE256,PLL_TWOTOTHE256,PLL_TWOTOTHE256);
-		  for(l = 0; l < 80; l += 4) 
-		    {
-		      __m256d vv = _mm256_load_pd(&v[l]);
-		      _mm256_store_pd(&v[l],_mm256_mul_pd(vv,PLL_TWOTOTHE256v));
-		    }
-		  if(useFastScaling)
-		    addScale += wgt[i];					
-		  else
-		    ex3[i] += 1;
-		}  
-	      x3_ptr += 80;
-	    }
-	}
-      break;
-    default:
-      assert(0);
-    }
- 
-  if(useFastScaling)
-    *scalerIncrement = addScale;
-}
diff --git a/pllrepo/src/bipartitionList.c b/pllrepo/src/bipartitionList.c
deleted file mode 100644
index 44c6888..0000000
--- a/pllrepo/src/bipartitionList.c
+++ /dev/null
@@ -1,434 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file bipartitionList.c
- */
-#include "mem_alloc.h"
-
-#ifndef WIN32  
-#include <sys/times.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <unistd.h>  
-#endif
-
-#include <limits.h>
-#include <math.h>
-#include <time.h> 
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <stdint.h>
-#include <assert.h>
-
-#include "pll.h"
-#include "pllInternal.h"
-
-
-static pllBipartitionEntry *initEntry(void);
-static void getxnodeBips (nodeptr p);
-static void newviewBipartitions(unsigned int **bitVectors, 
-                                nodeptr p, 
-                                int numsp, 
-                                unsigned int vectorLength, 
-                                int processID);
-
-static void insertHashRF(unsigned int *bitVector, 
-                         pllHashTable *h, 
-                         unsigned int vectorLength, 
-                         int treeNumber, 
-                         int treeVectorLength, 
-                         hashNumberType position, 
-                         int support, 
-                         pllBoolean computeWRF);
-
-extern const unsigned int mask32[32];
-
-
-static void getxnodeBips (nodeptr p)
-{
-  nodeptr  s;
-
-  if ((s = p->next)->xBips || (s = s->next)->xBips)
-    {
-      p->xBips = s->xBips;
-      s->xBips = 0;
-    }
-
-  assert(p->xBips);
-}
-
-
-static pllBipartitionEntry *initEntry(void)
-{
-  pllBipartitionEntry * e = (pllBipartitionEntry *)rax_malloc(sizeof(pllBipartitionEntry));
-
-  e->bitVector     = (unsigned int*)NULL;
-  e->treeVector    = (unsigned int*)NULL;
-  e->supportVector = (int*)NULL;
-  e->bipNumber  = 0;
-  e->bipNumber2 = 0;
-  e->supportFromTreeset[0] = 0;
-  e->supportFromTreeset[1] = 0;
-  e->next       = (pllBipartitionEntry *)NULL;
-
-  return e;
-} 
-
-void cleanupHashTable(pllHashTable *h, int state)
-{
-  unsigned int
-    k,
-    entryCount = 0,
-    removeCount = 0;
- 
-  assert(state == 1 || state == 0);
-
-  for(k = 0, entryCount = 0; k < h->size; k++)       
-    { 
-      pllHashItem * start     = NULL;
-      pllHashItem * lastValid = NULL;
-      
-      pllHashItem * hitem = h->Items[k];
-      while (hitem)
-       {                           
-         pllBipartitionEntry *e = (pllBipartitionEntry *)(hitem->data);
-         if(state == 0)
-           {
-             e->treeVector[0] = e->treeVector[0] & 2;      
-             assert(!(e->treeVector[0] & 1));
-           }
-         else
-           {
-             e->treeVector[0] = e->treeVector[0] & 1;
-             assert(!(e->treeVector[0] & 2));
-           }
-         
-         if(e->treeVector[0] != 0)
-           {
-             if(!start)
-               start = hitem;
-             lastValid = hitem;
-             hitem = hitem->next;
-           }         
-         else
-           {
-             pllHashItem *tmp = hitem;
-             pllBipartitionEntry *remove = e;
-             hitem = hitem->next;
-             
-             removeCount++;
-
-             if(lastValid) lastValid->next = hitem;
-
-             if(remove->bitVector)     rax_free(remove->bitVector);
-             if(remove->treeVector)    rax_free(remove->treeVector);
-             if(remove->supportVector) rax_free(remove->supportVector);
-             rax_free(remove);              
-             rax_free(tmp);
-           }
-         entryCount++;
-       }
-
-      if(!start)
-        {
-          assert(!lastValid);
-          h->Items[k] = NULL;
-        }
-      else
-        {
-          h->Items[k] = start;
-        }            
-    }
-
-  assert(entryCount ==  h->entries);
-  h->entries-= removeCount;
-}
-
-
-
-
-
-
-
-
-
-
-
-unsigned int **initBitVector(int mxtips, unsigned int *vectorLength)
-{
-  unsigned int 
-    **bitVectors = (unsigned int **)rax_malloc(sizeof(unsigned int*) * 2 * (size_t)mxtips);
-  
-  int 
-    i;
-
-  if(mxtips % PLL_MASK_LENGTH == 0)
-    *vectorLength = mxtips / PLL_MASK_LENGTH;
-  else
-    *vectorLength = 1 + (mxtips / PLL_MASK_LENGTH); 
-  
-  for(i = 1; i <= mxtips; i++)
-    {
-      bitVectors[i] = (unsigned int *)rax_calloc((size_t)(*vectorLength), sizeof(unsigned int));
-      assert(bitVectors[i]);
-      bitVectors[i][(i - 1) / PLL_MASK_LENGTH] |= mask32[(i - 1) % PLL_MASK_LENGTH];
-    }
-  
-  for(i = mxtips + 1; i < 2 * mxtips; i++) 
-    {
-      bitVectors[i] = (unsigned int *)rax_malloc(sizeof(unsigned int) * (size_t)(*vectorLength));
-      assert(bitVectors[i]);
-    }
-
-  return bitVectors;
-}
-
-void freeBitVectors(unsigned int **v, int n)
-{
-  int i;
-
-  for(i = 1; i < n; i++)
-    rax_free(v[i]);
-}
-
-
-static void newviewBipartitions(unsigned int **bitVectors, 
-                                nodeptr p, 
-                                int numsp, 
-                                unsigned int vectorLength, 
-                                int processID)
-{
-  
-  if(isTip(p->number, numsp))
-    return;
-  {
-    nodeptr 
-      q = p->next->back, 
-      r = p->next->next->back;
-    
-    
-    
-    unsigned int       
-      *vector = bitVectors[p->number],
-      *left  = bitVectors[q->number],
-      *right = bitVectors[r->number];
-    unsigned 
-      int i;      
-    
-    assert(processID == 0);
-    
-
-    while(!p->xBips)
-      { 
-        if(!p->xBips)
-          getxnodeBips(p);
-      }
-
-    p->hash = q->hash ^ r->hash;
-
-    if(isTip(q->number, numsp) && isTip(r->number, numsp))
-      {         
-        for(i = 0; i < vectorLength; i++)
-          vector[i] = left[i] | right[i];               
-      }
-    else
-      { 
-        if(isTip(q->number, numsp) || isTip(r->number, numsp))
-          {
-            if(isTip(r->number, numsp))
-              { 
-                nodeptr tmp = r;
-                r = q;
-                q = tmp;
-              }    
-                    
-            while(!r->xBips)
-              {
-                if(!r->xBips)
-                  newviewBipartitions(bitVectors, r, numsp, vectorLength, processID);
-              }    
-
-            for(i = 0; i < vectorLength; i++)
-              vector[i] = left[i] | right[i];            
-          }
-        else
-          {         
-            while((!r->xBips) || (!q->xBips))
-              {
-                if(!q->xBips)
-                  newviewBipartitions(bitVectors, q, numsp, vectorLength, processID);
-                if(!r->xBips)
-                  newviewBipartitions(bitVectors, r, numsp, vectorLength, processID);
-              }                                    
-
-            for(i = 0; i < vectorLength; i++)
-              vector[i] = left[i] | right[i];    
-          }
-
-      }     
-  }     
-}
-
-
-
-
-static void insertHashRF(unsigned int *bitVector, 
-                         pllHashTable *h, 
-                         unsigned int vectorLength, 
-                         int treeNumber, 
-                         int treeVectorLength, 
-                         hashNumberType position, 
-                         int support, 
-                         pllBoolean computeWRF)
-{
-  pllBipartitionEntry * e;
-  pllHashItem * hitem;
-
-  if(h->Items[position] != NULL)
-    {
-      for (hitem = h->Items[position]; hitem; hitem = hitem->next)
-        { 
-          e = (pllBipartitionEntry *)(hitem->data);
-          
-          if (!memcmp(bitVector, e->bitVector, vectorLength * sizeof(unsigned int)))
-            {
-              e->treeVector[treeNumber / PLL_MASK_LENGTH] |= mask32[treeNumber % PLL_MASK_LENGTH];
-              if(computeWRF)
-                {
-                  e->supportVector[treeNumber] = support;
-                  assert(0 <= treeNumber && treeNumber < treeVectorLength * PLL_MASK_LENGTH);
-                }
-              return;
-            }
-        }
-    }
-  e = initEntry(); 
-       
-  rax_posix_memalign ((void **)&(e->bitVector), PLL_BYTE_ALIGNMENT, (size_t)vectorLength * sizeof(unsigned int));
-  memset(e->bitVector, 0, vectorLength * sizeof(unsigned int));
-
-  e->treeVector = (unsigned int*)rax_calloc((size_t)treeVectorLength, sizeof(unsigned int));
-  if(computeWRF)
-    e->supportVector = (int*)rax_calloc((size_t)treeVectorLength * PLL_MASK_LENGTH, sizeof(int));
-
-  e->treeVector[treeNumber / PLL_MASK_LENGTH] |= mask32[treeNumber % PLL_MASK_LENGTH];
-  if(computeWRF)
-    {
-      e->supportVector[treeNumber] = support;
-     
-      assert(0 <= treeNumber && treeNumber < treeVectorLength * PLL_MASK_LENGTH);
-    }
-
-  memcpy(e->bitVector, bitVector, sizeof(unsigned int) * vectorLength);
-  
-  pllHashAdd (h, position, NULL, (void *)e);
-}
-
-
-
-void bitVectorInitravSpecial(unsigned int **bitVectors, nodeptr p, int numsp, unsigned int vectorLength, pllHashTable *h, int treeNumber, int function, branchInfo *bInf, 
-                             int *countBranches, int treeVectorLength, pllBoolean traverseOnly, pllBoolean computeWRF, int processID)
-{
-  if(isTip(p->number, numsp))
-    return;
-  else
-    {
-      nodeptr 
-        q = p->next;          
-
-      do 
-        {
-          bitVectorInitravSpecial(bitVectors, q->back, numsp, vectorLength, h, treeNumber, function, bInf, countBranches, treeVectorLength, traverseOnly, computeWRF, processID);
-          q = q->next;
-        }
-      while(q != p);
-           
-      newviewBipartitions(bitVectors, p, numsp, vectorLength, processID);
-      
-      assert(p->xBips);
-
-      assert(!traverseOnly);     
-
-      if(!(isTip(p->back->number, numsp)))
-        {
-          unsigned int 
-            *toInsert  = bitVectors[p->number];
-          
-          hashNumberType 
-            position = p->hash % h->size;
-         
-          assert(!(toInsert[0] & 1));
-          assert(!computeWRF);
-          
-          switch(function)
-            {        
-            case PLL_BIPARTITIONS_RF:        
-              insertHashRF(toInsert, h, vectorLength, treeNumber, treeVectorLength, position, 0, computeWRF);
-              *countBranches =  *countBranches + 1;
-              break;
-            default:
-              assert(0);
-            }             
-        }
-      
-    }
-}
-
-double convergenceCriterion(pllHashTable *h, int mxtips)
-{
-  int      
-    rf = 0; 
-
-  unsigned int 
-    k = 0, 
-    entryCount = 0;
-  
-  double    
-    rrf;  
-
-  pllHashItem * hitem;
-
-  for(k = 0, entryCount = 0; k < h->size; k++)          
-    {      
-      for (hitem = h->Items[k]; hitem; hitem = hitem->next)
-       {
-         pllBipartitionEntry *e = hitem->data;
-         unsigned int *vector = e->treeVector;          
-
-         if(((vector[0] & 1) > 0) + ((vector[0] & 2) > 0) == 1)
-           rf++;        
-          
-         entryCount++;
-         e = e->next;
-       }
-    }
-
-  assert(entryCount == h->entries);  
-  rrf = (double)rf/((double)(2 * (mxtips - 3)));  
-  return rrf;
-}
diff --git a/pllrepo/src/cycle.h b/pllrepo/src/cycle.h
deleted file mode 100644
index 889932a..0000000
--- a/pllrepo/src/cycle.h
+++ /dev/null
@@ -1,516 +0,0 @@
-/*
- * Copyright (c) 2003, 2007-8 Matteo Frigo
- * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-
-/* machine-dependent cycle counters code. Needs to be inlined. */
-
-/***************************************************************************/
-/* To use the cycle counters in your code, simply #include "cycle.h" (this
-   file), and then use the functions/macros:
-
-                 ticks getticks(void);
-
-   ticks is an opaque typedef defined below, representing the current time.
-   You extract the elapsed time between two calls to gettick() via:
-
-                 double elapsed(ticks t1, ticks t0);
-
-   which returns a double-precision variable in arbitrary units.  You
-   are not expected to convert this into human units like seconds; it
-   is intended only for *comparisons* of time intervals.
-
-   (In order to use some of the OS-dependent timer routines like
-   Solaris' gethrtime, you need to paste the autoconf snippet below
-   into your configure.ac file and #include "config.h" before cycle.h,
-   or define the relevant macros manually if you are not using autoconf.)
-*/
-
-/***************************************************************************/
-/* This file uses macros like HAVE_GETHRTIME that are assumed to be
-   defined according to whether the corresponding function/type/header
-   is available on your system.  The necessary macros are most
-   conveniently defined if you are using GNU autoconf, via the tests:
-   
-   dnl ---------------------------------------------------------------------
-
-   AC_C_INLINE
-   AC_HEADER_TIME
-   AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h])
-
-   AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in <sys/time.h>])],,[#if HAVE_SYS_TIME_H
-#include <sys/time.h>
-#endif])
-
-   AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime mach_absolute_time])
-
-   dnl Cray UNICOS _rtc() (real-time clock) intrinsic
-   AC_MSG_CHECKING([for _rtc intrinsic])
-   rtc_ok=yes
-   AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H
-#include <intrinsics.h>
-#endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no])
-   AC_MSG_RESULT($rtc_ok)
-
-   dnl ---------------------------------------------------------------------
-*/
-
-/***************************************************************************/
-
-#ifdef TIME_WITH_SYS_TIME
-# include <sys/time.h>
-# include <time.h>
-#else
-# ifdef HAVE_SYS_TIME_H
-#  include <sys/time.h>
-# else
-#  include <time.h>
-# endif
-#endif
-
-
-
-
-#define INLINE_ELAPSED(INL) static INL double elapsed(ticks t1, ticks t0) \
-{									  \
-     return (double)t1 - (double)t0;					  \
-}
-
-/*----------------------------------------------------------------*/
-/* Solaris */
-#if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER)
-typedef hrtime_t ticks;
-
-#define getticks gethrtime
-
-INLINE_ELAPSED(inline)
-
-#define HAVE_TICK_COUNTER
-#endif
-
-/*----------------------------------------------------------------*/
-/* AIX v. 4+ routines to read the real-time clock or time-base register */
-#if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && !defined(HAVE_TICK_COUNTER)
-typedef timebasestruct_t ticks;
-
-static __inline ticks getticks(void)
-{
-     ticks t;
-     read_real_time(&t, TIMEBASE_SZ);
-     return t;
-}
-
-static __inline double elapsed(ticks t1, ticks t0) /* time in nanoseconds */
-{
-     time_base_to_time(&t1, TIMEBASE_SZ);
-     time_base_to_time(&t0, TIMEBASE_SZ);
-     return (((double)t1.tb_high - (double)t0.tb_high) * 1.0e9 + 
-	     ((double)t1.tb_low - (double)t0.tb_low));
-}
-
-#define HAVE_TICK_COUNTER
-#endif
-
-/*----------------------------------------------------------------*/
-/*
- * PowerPC ``cycle'' counter using the time base register.
- */
-#if ((((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || (defined(__MWERKS__) && defined(macintosh)))) || (defined(__IBM_GCC_ASM) && (defined(__powerpc__) || defined(__ppc__))))  && !defined(HAVE_TICK_COUNTER)
-typedef unsigned long long ticks;
-
-static __inline__ ticks getticks(void)
-{
-     unsigned int tbl, tbu0, tbu1;
-
-     do {
-	  __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0));
-	  __asm__ __volatile__ ("mftb %0" : "=r"(tbl));
-	  __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1));
-     } while (tbu0 != tbu1);
-
-     return (((unsigned long long)tbu0) << 32) | tbl;
-}
-
-INLINE_ELAPSED(__inline__)
-
-#define HAVE_TICK_COUNTER
-#endif
-
-/* MacOS/Mach (Darwin) time-base register interface (unlike UpTime,
-   from Carbon, requires no additional libraries to be linked). */
-#if defined(HAVE_MACH_ABSOLUTE_TIME) && defined(HAVE_MACH_MACH_TIME_H) && !defined(HAVE_TICK_COUNTER)
-#include <mach/mach_time.h>
-typedef uint64_t ticks;
-#define getticks mach_absolute_time
-INLINE_ELAPSED(__inline__)
-#define HAVE_TICK_COUNTER
-#endif
-
-/*----------------------------------------------------------------*/
-/*
- * Pentium cycle counter 
- */
-#if (defined(__GNUC__) || defined(__ICC)) && defined(__i386__)  && !defined(HAVE_TICK_COUNTER)
-typedef unsigned long long ticks;
-
-static __inline__ ticks getticks(void)
-{
-     ticks ret;
-
-     __asm__ __volatile__("rdtsc": "=A" (ret));
-     /* no input, nothing else clobbered */
-     return ret;
-}
-
-INLINE_ELAPSED(__inline__)
-
-#define HAVE_TICK_COUNTER
-#define TIME_MIN 5000.0   /* unreliable pentium IV cycle counter */
-#endif
-
-/* Visual C++ -- thanks to Morten Nissov for his help with this */
-#if defined(_MSC_VER) && _MSC_VER >= 1200 && _M_IX86 >= 500 && !defined(HAVE_TICK_COUNTER)
-#include <windows.h>
-typedef LARGE_INTEGER ticks;
-#define RDTSC __asm __emit 0fh __asm __emit 031h /* hack for VC++ 5.0 */
-
-static __inline ticks getticks(void)
-{
-     ticks retval;
-
-     __asm {
-	  RDTSC
-	  mov retval.HighPart, edx
-	  mov retval.LowPart, eax
-     }
-     return retval;
-}
-
-static __inline double elapsed(ticks t1, ticks t0)
-{  
-     return (double)t1.QuadPart - (double)t0.QuadPart;
-}  
-
-#define HAVE_TICK_COUNTER
-#define TIME_MIN 5000.0   /* unreliable pentium IV cycle counter */
-#endif
-
-/*----------------------------------------------------------------*/
-/*
- * X86-64 cycle counter
- */
-#if (defined(__GNUC__) || defined(__ICC) || defined(__SUNPRO_C)) && defined(__x86_64__)  && !defined(HAVE_TICK_COUNTER)
-typedef unsigned long long ticks;
-
-static __inline__ ticks getticks(void)
-{
-     unsigned a, d; 
-     __asm volatile("rdtsc" : "=a" (a), "=d" (d)); 
-     return ((ticks)a) | (((ticks)d) << 32); 
-}
-
-INLINE_ELAPSED(__inline__)
-
-#define HAVE_TICK_COUNTER
-#endif
-
-/* PGI compiler, courtesy Cristiano Calonaci, Andrea Tarsi, & Roberto Gori.
-   NOTE: this code will fail to link unless you use the -Masmkeyword compiler
-   option (grrr). */
-#if defined(__PGI) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) 
-typedef unsigned long long ticks;
-static ticks getticks(void)
-{
-    asm(" rdtsc; shl    $0x20,%rdx; mov    %eax,%eax; or     %rdx,%rax;    ");
-}
-INLINE_ELAPSED(__inline__)
-#define HAVE_TICK_COUNTER
-#endif
-
-/* Visual C++, courtesy of Dirk Michaelis */
-#if defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(_M_AMD64) || defined(_M_X64)) && !defined(HAVE_TICK_COUNTER)
-
-#include <intrin.h>
-#pragma intrinsic(__rdtsc)
-typedef unsigned __int64 ticks;
-#define getticks __rdtsc
-INLINE_ELAPSED(__inline)
-
-#define HAVE_TICK_COUNTER
-#endif
-
-/*----------------------------------------------------------------*/
-/*
- * IA64 cycle counter
- */
-
-/* intel's icc/ecc compiler */
-#if (defined(__EDG_VERSION) || defined(__ECC)) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER)
-typedef unsigned long ticks;
-#include <ia64intrin.h>
-
-static __inline__ ticks getticks(void)
-{
-     return __getReg(_IA64_REG_AR_ITC);
-}
- 
-INLINE_ELAPSED(__inline__)
- 
-#define HAVE_TICK_COUNTER
-#endif
-
-/* gcc */
-#if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER)
-typedef unsigned long ticks;
-
-static __inline__ ticks getticks(void)
-{
-     ticks ret;
-
-     __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret));
-     return ret;
-}
-
-INLINE_ELAPSED(__inline__)
-
-#define HAVE_TICK_COUNTER
-#endif
-
-/* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */
-#if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER)
-#include <machine/sys/inline.h>
-typedef unsigned long ticks;
-
-static __inline ticks getticks(void)
-{
-     ticks ret;
-
-     ret = _Asm_mov_from_ar (_AREG_ITC);
-     return ret;
-}
-
-INLINE_ELAPSED(inline)
-
-#define HAVE_TICK_COUNTER
-#endif
-
-/* Microsoft Visual C++ */
-#if defined(_MSC_VER) && defined(_M_IA64) && !defined(HAVE_TICK_COUNTER)
-typedef unsigned __int64 ticks;
-
-#  ifdef __cplusplus
-extern "C"
-#  endif
-ticks __getReg(int whichReg);
-#pragma intrinsic(__getReg)
-
-static __inline ticks getticks(void)
-{
-     volatile ticks temp;
-     temp = __getReg(3116);
-     return temp;
-}
-
-INLINE_ELAPSED(inline)
-
-#define HAVE_TICK_COUNTER
-#endif
-
-/*----------------------------------------------------------------*/
-/*
- * PA-RISC cycle counter 
- */
-#if defined(__hppa__) || defined(__hppa) && !defined(HAVE_TICK_COUNTER)
-typedef unsigned long ticks;
-
-#  ifdef __GNUC__
-static __inline__ ticks getticks(void)
-{
-     ticks ret;
-
-     __asm__ __volatile__("mfctl 16, %0": "=r" (ret));
-     /* no input, nothing else clobbered */
-     return ret;
-}
-#  else
-#  include <machine/inline.h>
-static __inline unsigned long getticks(void)
-{
-     register ticks ret;
-     _MFCTL(16, ret);
-     return ret;
-}
-#  endif
-
-INLINE_ELAPSED(inline)
-
-#define HAVE_TICK_COUNTER
-#endif
-
-/*----------------------------------------------------------------*/
-/* S390, courtesy of James Treacy */
-#if defined(__GNUC__) && defined(__s390__) && !defined(HAVE_TICK_COUNTER)
-typedef unsigned long long ticks;
-
-static __inline__ ticks getticks(void)
-{
-     ticks cycles;
-     __asm__("stck 0(%0)" : : "a" (&(cycles)) : "memory", "cc");
-     return cycles;
-}
-
-INLINE_ELAPSED(__inline__)
-
-#define HAVE_TICK_COUNTER
-#endif
-/*----------------------------------------------------------------*/
-#if defined(__GNUC__) && defined(__alpha__) && !defined(HAVE_TICK_COUNTER)
-/*
- * The 32-bit cycle counter on alpha overflows pretty quickly, 
- * unfortunately.  A 1GHz machine overflows in 4 seconds.
- */
-typedef unsigned int ticks;
-
-static __inline__ ticks getticks(void)
-{
-     unsigned long cc;
-     __asm__ __volatile__ ("rpcc %0" : "=r"(cc));
-     return (cc & 0xFFFFFFFF);
-}
-
-INLINE_ELAPSED(__inline__)
-
-#define HAVE_TICK_COUNTER
-#endif
-
-/*----------------------------------------------------------------*/
-#if defined(__GNUC__) && defined(__sparc_v9__) && !defined(HAVE_TICK_COUNTER)
-typedef unsigned long ticks;
-
-static __inline__ ticks getticks(void)
-{
-     ticks ret;
-     __asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
-     return ret;
-}
-
-INLINE_ELAPSED(__inline__)
-
-#define HAVE_TICK_COUNTER
-#endif
-
-/*----------------------------------------------------------------*/
-#if (defined(__DECC) || defined(__DECCXX)) && defined(__alpha) && defined(HAVE_C_ASM_H) && !defined(HAVE_TICK_COUNTER)
-#  include <c_asm.h>
-typedef unsigned int ticks;
-
-static __inline ticks getticks(void)
-{
-     unsigned long cc;
-     cc = asm("rpcc %v0");
-     return (cc & 0xFFFFFFFF);
-}
-
-INLINE_ELAPSED(__inline)
-
-#define HAVE_TICK_COUNTER
-#endif
-/*----------------------------------------------------------------*/
-/* SGI/Irix */
-#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE) && !defined(HAVE_TICK_COUNTER)
-typedef struct timespec ticks;
-
-static __inline ticks getticks(void)
-{
-     struct timespec t;
-     clock_gettime(CLOCK_SGI_CYCLE, &t);
-     return t;
-}
-
-static __inline double elapsed(ticks t1, ticks t0)
-{
-     return ((double)t1.tv_sec - (double)t0.tv_sec) * 1.0E9 +
-	  ((double)t1.tv_nsec - (double)t0.tv_nsec);
-}
-#define HAVE_TICK_COUNTER
-#endif
-
-/*----------------------------------------------------------------*/
-/* Cray UNICOS _rtc() intrinsic function */
-#if defined(HAVE__RTC) && !defined(HAVE_TICK_COUNTER)
-#ifdef HAVE_INTRINSICS_H
-#  include <intrinsics.h>
-#endif
-
-typedef long long ticks;
-
-#define getticks _rtc
-
-INLINE_ELAPSED(inline)
-
-#define HAVE_TICK_COUNTER
-#endif
-
-/*----------------------------------------------------------------*/
-/* MIPS ZBus */
-#ifdef HAVE_MIPS_ZBUS_TIMER
-#if defined(__mips__) && !defined(HAVE_TICK_COUNTER)
-#include <sys/mman.h>
-#include <unistd.h>
-#include <fcntl.h>
-
-typedef uint64_t ticks;
-
-static __inline ticks getticks(void)
-{
-  static uint64_t* addr = 0;
-
-  if (addr == 0)
-  {
-    uint32_t rq_addr = 0x10030000;
-    int fd;
-    int pgsize;
-
-    pgsize = getpagesize();
-    fd = open ("/dev/mem", O_RDONLY | O_SYNC, 0);
-    if (fd < 0) {
-      perror("open");
-      return NULL;
-    }
-    addr = mmap(0, pgsize, PROT_READ, MAP_SHARED, fd, rq_addr);
-    close(fd);
-    if (addr == (uint64_t *)-1) {
-      perror("mmap");
-      return NULL;
-    }
-  }
-
-  return *addr;
-}
-
-INLINE_ELAPSED(inline)
-
-#define HAVE_TICK_COUNTER
-#endif
-#endif /* HAVE_MIPS_ZBUS_TIMER */
diff --git a/pllrepo/src/errcodes.h b/pllrepo/src/errcodes.h
deleted file mode 100644
index ce81e68..0000000
--- a/pllrepo/src/errcodes.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file errcodes.h
- */
-#ifndef ERRCODES_H
-#define ERRCODES_H
-
-#define PLL_ERROR_FILE_OPEN             1               /**< Error while opening file */
-#define PLL_ERROR_INVALID_FILETYPE      2               /**< Invalid fileType given at pllParseAlignmeFile */
-
-#define  PLL_NNI_P_TIP                  1 << 0          /**< Node p is a tip */
-#define  PLL_NNI_Q_TIP                  1 << 1          /**< Node p->back is a tip */
-
-#define  PLL_PARTITION_OUT_OF_BOUNDS    1 << 0      /**< Trying to access a partition index that is out of bounds */
-#define  PLL_BASE_FREQUENCIES_DO_NOT_SUM_TO_1 1 << 1      /**< base frequencies don't sum to 1.0 */
-
-#define PLL_LINKAGE_LIST_OUT_OF_BOUNDS 1 << 0      /**< trying to link a partition index that is out of bounds */
-
-#define PLL_SUBSTITUTION_RATE_OUT_OF_BOUNDS 1 << 0 /**< trying  to set a substitution rate to a value that is out of bounds */
-#define PLL_INVALID_Q_MATRIX_SYMMETRY       1 << 1 /**< specifyng an invalid parameter symmetry in the Q matrix */
-#define PLL_Q_MATRIX_SYMMETRY_OUT_OF_BOUNDS 1 << 2 /**<specifying a Q matrix symmetry that is out of bounds */
-
-#define PLL_UNKNOWN_MOLECULAR_DATA_TYPE 1 << 0 /**<PLL is trying to do something for an unknown data type */
-
-#define PLL_INCONSISTENT_SUBST_RATE_OPTIMIZATION_SETTING 1 << 0 /**<PLL detected an inconsistent setting for the Q matrix rate optimization */
-#define PLL_INCONSISTENT_Q_MATRIX_SYMMETRIES_ACROSS_LINKED_PARTITIONS 1 << 1 /**<Q matrix symmetry vector is not identical for linked partitions */
-#define PLL_INCONSISTENT_Q_MATRIX_ENTRIES_ACROSS_LINKED_PARTITIONS 1 << 2 /**<Q matrix entries are not identical for linked partitions */
-#define PLL_INCONSISTENT_ALPHA_STATES_ACROSS_LINKED_PARTITIONS 1 << 3 /**<alpha states are not identical across linked partitions */
-#define PLL_INCONSISTENT_ALPHA_VALUES_ACROSS_LINKED_PARTITIONS 1 << 4 /**<alpha values are not identical across linked partitions */
-#define PLL_INCONSISTENT_FREQUENCY_STATES_ACROSS_LINKED_PARTITIONS 1 << 5 /**<frequency states are not identical across linked partitions */
-#define PLL_INCONSISTENT_FREQUENCY_VALUES_ACROSS_LINKED_PARTITIONS 1 << 6 /**<frequency values are not identical across linked partitions */
-
-#define PLL_NEWICK_ROOTED_TREE          1 << 0          /**< @brief Binary root detected */
-#define PLL_NEWICK_BAD_STRUCTURE        1 << 1          /**< @brief Errornous tree detected */
-
-
-
-#define PLL_ERROR_PHYLIP_HEADER_SYNTAX         5
-#define PLL_ERROR_PHYLIP_BODY_SYNTAX           6
-#define PLL_ERROR_FASTA_SYNTAX                 7
-
-
-
-
-#endif
diff --git a/pllrepo/src/evaluateGenericSpecial.c b/pllrepo/src/evaluateGenericSpecial.c
deleted file mode 100644
index 9a0dfc8..0000000
--- a/pllrepo/src/evaluateGenericSpecial.c
+++ /dev/null
@@ -1,3321 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file evaluateGenericSpecial.c
- *   
- * @brief Functions for computing the log likelihood at a given branch of the tree (i.e. a virtual root that is placed at this branch)
- */
-#include "mem_alloc.h"
-
-#ifndef WIN32 
-#include <unistd.h>
-#endif
-
-#include <math.h>
-#include <time.h> 
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <assert.h>
-
-#include "pll.h"
-#include "pllInternal.h"
-
-#ifdef __MIC_NATIVE
-#include "mic_native.h"
-#endif
-
-/* the set of functions in here computes the log likelihood at a given branch (the virtual root of a tree) */
-
-/* includes for using SSE3 intrinsics */
-
-#ifdef __SSE3
-#include <xmmintrin.h>
-#include <pmmintrin.h>
-/*#include <tmmintrin.h>*/
-#endif
-
-
-/** @defgroup evaluateLikelihoodGroup Likelihood evaluation
-    
-    This set of functions deals with the evaluation of likelihood for the current topology
-*/
-
-
-
-
-
-
-
-/* below are the function headers for unreadeble highly optimized versions of the above functions 
-   for DNA and protein data that also use SSE3 intrinsics and implement some memory saving tricks.
-   The actual functions can be found at the end of this source file. 
-   All other likelihood function implementation files:
-
-   newviewGenericSpacial.c
-   makenewzSpecial.c
-   evaluatePartialGenericSpecial.c
-
-   are also structured like this 
-
-   To decide which set of function implementations to use you will have to undefine or define _OPTIMIZED_FUNCTIONS 
-   in the Makefile 
-   */
-#if (defined(__SSE3) || defined(__AVX))
-
-static double evaluateGTRGAMMAPROT_LG4(int *ex1, int *ex2, int *wptr,
-                                       double *x1, double *x2,  
-                                       double *tipVector[4], 
-                                       unsigned char *tipX1, int n, double *diagptable, const pllBoolean fastScaling,
-                                       double * lg4_weights);
-
-/* GAMMA for proteins with memory saving */
-
-static double evaluateGTRGAMMAPROT_GAPPED_SAVE (const pllBoolean fastScaling, int *ex1, int *ex2, int *wptr,
-                                                double *x1, double *x2,  
-                                                double *tipVector, 
-                                                unsigned char *tipX1, int n, double *diagptable, 
-                                                double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap);
-
-
-/* GAMMA for proteins */
-
-static double evaluateGTRGAMMAPROT (const pllBoolean fastScaling, int *ex1, int *ex2, int *wptr,
-                                    double *x1, double *x2,  
-                                    double *tipVector, 
-                                    unsigned char *tipX1, int n, double *diagptable);
-
-/* CAT for proteins */
-
-static double evaluateGTRCATPROT (const pllBoolean fastScaling, int *ex1, int *ex2, int *cptr, int *wptr,
-                                  double *x1, double *x2, double *tipVector,
-                                  unsigned char *tipX1, int n, double *diagptable_start);
-
-
-/* CAT for proteins with memory saving */
-
-static double evaluateGTRCATPROT_SAVE (const pllBoolean fastScaling, int *ex1, int *ex2, int *cptr, int *wptr,
-                                       double *x1, double *x2, double *tipVector,
-                                       unsigned char *tipX1, int n, double *diagptable_start, 
-                                       double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap);
-
-/* analogous DNA fuctions */
-
-static double evaluateGTRCAT_SAVE (const pllBoolean fastScaling, int *ex1, int *ex2, int *cptr, int *wptr,
-                                   double *x1_start, double *x2_start, double *tipVector,                     
-                                   unsigned char *tipX1, int n, double *diagptable_start,
-                                   double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap);
-
-static double evaluateGTRGAMMA_GAPPED_SAVE(const pllBoolean fastScaling, int *ex1, int *ex2, int *wptr,
-                                           double *x1_start, double *x2_start, 
-                                           double *tipVector, 
-                                           unsigned char *tipX1, const int n, double *diagptable,
-                                           double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap);
-
-static double evaluateGTRGAMMA(const pllBoolean fastScaling, int *ex1, int *ex2, int *wptr,
-                               double *x1_start, double *x2_start, 
-                               double *tipVector, 
-                               unsigned char *tipX1, const int n, double *diagptable);
-
-
-static double evaluateGTRCAT (const pllBoolean fastScaling, int *ex1, int *ex2, int *cptr, int *wptr,
-                              double *x1_start, double *x2_start, double *tipVector,                  
-                              unsigned char *tipX1, int n, double *diagptable_start);
-
-
-#endif
-
-#if (defined(__AVX) || defined(__SSE3))
-static double evaluateGTRGAMMA_BINARY(int *ex1, int *ex2, int *wptr,
-                                      double *x1_start, double *x2_start, 
-                                      double *tipVector, 
-                                      unsigned char *tipX1, const int n, double *diagptable, const pllBoolean fastScaling);
-
-static double evaluateGTRCAT_BINARY (int *ex1, int *ex2, int *cptr, int *wptr,
-                                     double *x1_start, double *x2_start, double *tipVector,                   
-                                     unsigned char *tipX1, int n, double *diagptable_start, const pllBoolean fastScaling);
-#endif
-
-
-/* 
-   global variables of pthreads version, reductionBuffer is the global array 
-   that is used for implementing deterministic reduction operations, that is,
-   the total log likelihood over the partial log lieklihoods for the sites that each thread has computed 
-
-   NumberOfThreads is just the number of threads.
-
-   Note the volatile modifier here, that guarantees that the compiler will not do weird optimizations 
-   rearraengements of the code accessing those variables, because it does not know that several concurrent threads 
-   will access those variables simulatenously 
-
-   UPDATE: reductionBuffer is now merged with globalResult
-   */
-
-
-/* a pre-computed 32-bit integer mask */
-
-extern const unsigned int mask32[32];
-
-/* the function below computes the P matrix from the decomposition of the Q matrix and the respective rate categories for a single partition */
-
-/** @brief Compute the diagonal of P matrix for a specific edge
-
-    This function computes the diagonal of P matrix for a branch of length \a z
-    from the decomposition of the Q matrix specified in \a EIGN and the respective
-    rate categories \a rptr for a single partition. The diagonal is then stored in
-    \a diagptable. 
-
-    @param z                  Length of edge
-    @param states             Number of states
-    @param numberOfCategories Number of categories in the rate heterogeneity rate arrays
-    @param rptr               Rate heterogeneity rate arrays
-    @param EIGN               Eigenvalues
-    @param diagptable         Where to store the resulting P matrix
-*/
-static void calcDiagptable(const double z, const int states, const int numberOfCategories, const double *rptr, const double *EIGN, double *diagptable)
-{
-  int 
-    i, 
-    l;
-
-  double 
-    lz,
-    *lza = (double *)rax_malloc(sizeof(double) * states);
-
-  /* transform the root branch length to the log and check if it is not too small */
-
-  if (z < PLL_ZMIN) 
-    lz = log(PLL_ZMIN);
-  else
-    lz = log(z);
-
-  /* do some pre-computations to avoid redundant computations further below */
-
-  for(i = 1; i < states; i++)      
-    lza[i] = EIGN[i] * lz; 
-
-  /* loop over the number of per-site or discrete gamma rate categories */
-
-  for(i = 0; i < numberOfCategories; i++)
-  {                    
-    /* 
-       diagptable is a pre-allocated array of doubles that stores the P-Matrix 
-       the first entry is always 1.0 
-       */
-    diagptable[i * states] = 1.0;
-
-    /* compute the P matrix for all remaining states of the model */
-
-    for(l = 1; l < states; l++)
-      diagptable[i * states + l] = exp(rptr[i] * lza[l]);
-  }
-
-  rax_free(lza);
-}
-
-/** @brief Compute the diagonal of P matrix for a specific edge for the LG4 model
-
-    This function computes the diagonal of P matrix for a branch of length \a z
-    from the decomposition of the 4 LG4 Q matrices specified in \a EIGN and the respective
-    rate categories \a rptr for a single partition. The diagonal is then stored in
-    \a diagptable. 
-
-    @param z
-      Length of edge
-
-    @param states
-      Number of states
-
-    @param numberOfCategories
-      Number of categories in the rate heterogeneity rate arrays
-
-    @param rptr
-      Rate heterogeneity rate arrays
-
-    @param EIGN
-      Eigenvalues of the 4 Q matrices
-
-    @param diagptable
-      Where to store the resulting P matrix
-
-    @param numStates
-      Number of states
-*/
-static void calcDiagptableFlex_LG4(double z, int numberOfCategories, double *rptr, double *EIGN[4], double *diagptable, const int numStates)
-{
-  int 
-    i, 
-    l;
-  
-  double 
-    lz;
-  
-  assert(numStates <= 64);
-  
-  if (z < PLL_ZMIN) 
-    lz = log(PLL_ZMIN);
-  else
-    lz = log(z);
-
-  for(i = 0; i <  numberOfCategories; i++)
-    {                  
-      diagptable[i * numStates + 0] = 1.0;
-
-      for(l = 1; l < numStates; l++)
-        diagptable[i * numStates + l] = exp(rptr[i] * EIGN[i][l] * lz);                   
-    }        
-}
-
-static void ascertainmentBiasSequence(unsigned char tip[32], int numStates)
-{ 
-  assert(numStates <= 32 && numStates > 1);
-
-  switch(numStates)
-    {
-    case 2:     
-      tip[0] = 1;
-      tip[1] = 2;
-      break;
-    case 4:
-      tip[0] = 1;
-      tip[1] = 2;
-      tip[2] = 4;
-      tip[3] = 8;
-      break;
-    default:
-      {
-	int 
-	  i;
-	for(i = 0; i < numStates; i++)
-	  {
-	    tip[i] = i;
-	    //printf("%c ", inverseMeaningPROT[i]);
-	  }
-	//printf("\n");
-      }
-      break;
-    }
-}
-
-static double evaluateCatAsc(int *ex1, int *ex2,
-			     double *x1, double *x2,  
-			     double *tipVector, 
-			     unsigned char *tipX1, int n, double *diagptable, const int numStates)
-{
-  double
-    exponent,
-    sum = 0.0, 
-    unobserved,
-    term,
-    *left, 
-    *right;
-  
-  int     
-    i,    
-    l;   
-         
-  unsigned char 
-    tip[32];
-
-  ascertainmentBiasSequence(tip, numStates);
-   
-  if(tipX1)
-    {               
-      for (i = 0; i < n; i++) 
-	{
-	  left = &(tipVector[numStates * tip[i]]);	  	  
-	  right = &(x2[i * numStates]);
-
-	  term = 0.0;
-	         	      
-	  for(l = 0; l < numStates; l++)
-	    term += left[l] * right[l] * diagptable[l];	      	 	 	  	 
-
-	  /* assumes that pow behaves as expected/specified for underflows
-	     from the man page:
-	       If result underflows, and is not representable,
-	       a range error occurs and 0.0 is returned.
-	 */
-
-	  exponent = pow(PLL_MINLIKELIHOOD, (double)ex2[i]);
-
-	  unobserved = fabs(term) * exponent;
-
-#ifdef _DEBUG_ASC
-	  if(ex2[i] > 0)
-	    {
-	      printf("s %d\n", ex2[i]);
-	      assert(0);
-	    }
-#endif	  
-	    
-	  sum += unobserved;
-	}              
-    }              
-  else
-    {           
-      for (i = 0; i < n; i++) 
-	{	  	 
-	  term = 0.0;
-	  	 
-	  left  = &(x1[i * numStates]);
-	  right = &(x2[i * numStates]);	    
-	      
-	  for(l = 0; l < numStates; l++)
-	    term += left[l] * right[l] * diagptable[l];		  
-	  
-	  /* assumes that pow behaves as expected/specified for underflows
-	     from the man page:
-	       If result underflows, and is not representable,
-	       a range error occurs and 0.0 is returned.
-	  */
-
-	  exponent = pow(PLL_MINLIKELIHOOD, (double)(ex1[i] + ex2[i]));
-
-	  unobserved = fabs(term) * exponent;
-	  
-#ifdef _DEBUG_ASC
-	  if(ex2[i] > 0 || ex1[i] > 0)
-	    {
-	      printf("s %d %d\n", ex1[i], ex2[i]);
-	      assert(0);
-	    }
-#endif
-
-	  sum += unobserved;
-	}             
-    }        
-
-  return  sum;
-}
-
-
-static double evaluateGammaAsc(int *ex1, int *ex2,
-				double *x1, double *x2,  
-				double *tipVector, 
-				unsigned char *tipX1, int n, double *diagptable, const int numStates)
-{
-  double
-    exponent,
-    sum = 0.0, 
-    unobserved,
-    term,
-    *left, 
-    *right;
-  
-  int     
-    i, 
-    j, 
-    l;   
-  
-  const int 
-    gammaStates = numStates * 4;
-         
-  unsigned char 
-    tip[32];
-
-  ascertainmentBiasSequence(tip, numStates);
-   
-  if(tipX1)
-    {               
-      for (i = 0; i < n; i++) 
-	{
-	  left = &(tipVector[numStates * tip[i]]);	  	  
-	  
-	  for(j = 0, term = 0.0; j < 4; j++)
-	    {
-	      right = &(x2[gammaStates * i + numStates * j]);
-	      
-	      for(l = 0; l < numStates; l++)
-		term += left[l] * right[l] * diagptable[j * numStates + l];	      
-	    }	 	  	 
-
-      /* assumes that pow behaves as expected/specified for underflows
-         from the man page:
-           If result underflows, and is not representable,
-           a range error occurs and 0.0 is returned.
-      */
-
-      exponent = pow(PLL_MINLIKELIHOOD, (double)ex2[i]);
-
-      unobserved = fabs(term) * exponent;
-	  
-#ifdef _DEBUG_ASC
-	  if(ex2[i] > 0)
-	    {
-	      printf("s %d\n", ex2[i]);
-	      assert(0);
-	    }
-#endif	  
-	    
-	  sum += unobserved;
-	}              
-    }              
-  else
-    {           
-      for (i = 0; i < n; i++) 
-	{	  	 	             
-	  
-	  for(j = 0, term = 0.0; j < 4; j++)
-	    {
-	      left  = &(x1[gammaStates * i + numStates * j]);
-	      right = &(x2[gammaStates * i + numStates * j]);	    
-	      
-	      for(l = 0; l < numStates; l++)
-		term += left[l] * right[l] * diagptable[j * numStates + l];	
-	    }
-	  
-	  /* assumes that pow behaves as expected/specified for underflows
-	     from the man page:
-	       If result underflows, and is not representable,
-	       a range error occurs and 0.0 is returned.
-	  */
-
-	  exponent = pow(PLL_MINLIKELIHOOD, (double)(ex1[i] + ex2[i]));
-
-	  unobserved = fabs(term) * exponent;
-	  
-#ifdef _DEBUG_ASC
-	  if(ex2[i] > 0 || ex1[i] > 0)
-	    {
-	      printf("s %d %d\n", ex1[i], ex2[i]);
-	      assert(0);
-	    }
-#endif
-
-	  sum += unobserved;
-	}             
-    }        
-
-  return  sum;
-}
-
-
-/** @ingroup evaluateLikelihoodGroup
-    @brief A generic (and slow) implementation of log likelihood evaluation of a tree using the GAMMA model of rate heterogeneity
-    
-    Computes the log likelihood of the topology for a specific partition, assuming
-    that the GAMMA model of rate heterogeneity is used. The likelihood is computed at
-    a virtual root placed at an edge whose two end-points (nodes) have the conditional
-    likelihood vectors \a x1 and \a x2. 
-    Furthermore, if \a getPerSiteLikelihoods is set to \b PLL_TRUE, then the log
-    likelihood for each site is also computed and stored at the corresponding position
-    in the array \a perSiteLikelihoods.
-
-    @param fastScaling
-      If set to \b PLL_FALSE, then the likelihood of each site is also multiplied by \a log(PLL_MINLIKELIHOOD) times the number
-      of times it has been scaled down
-
-    @param ex1
-      An array that holds how many times a site has been scaled and points at the entries for node \a p. This
-      parameter is used if \a fastScaling is set to \b PLL_FALSE.
-
-    @param ex2
-      An array that holds how many times a site has been scaled and points at the entries for node \a q. This
-      parameter is used if \a fastScaling is set to \b PLL_TRUE.
-
-    @param wptr
-      Array holding the weight for each site in the compressed partition alignment
-
-    @param x1_start
-      Conditional likelihood vectors for one of the two end-points of the specific edge for which we are evaluating the likelihood
-
-    @param x2_start
-      Conditional likelihood vectors for the other end-point of the specific edge for which we are evaluating the likelihood
-
-    @param tipVector
-      Precomputed table where the number of rows is equal to the number of possible basepair characters for the current data 
-      type, i.e.16 for DNA and 23 for AA, and each rows contains \a states elements each of which contains transition
-      probabilities computed from the eigenvectors of the decomposed Q matrix.
-
-    @param tipX1
-      If one of the two end-points (nodes) of the specific edge (for which we are evaluating the likelihood) is a tip, then
-      this holds a pointer to the sequence data (basepairs) already converted in the internal integer representation, and \a x2
-      holds the conditional likelihood vectors for the internal node.
-
-    @param n
-      Number of sites for which we are doing the evaluation. For the single-thread version this is the 
-      number of sites in the current partition, for multi-threads this is the number of sites assigned
-      to the running thread from the current partition.
-
-    @param diagptable
-      Start of the array that contains the P-Matrix diagonal of the specific edge for which we are
-      evaluating the likehood, and for each category of the GAMMA model
-
-    @param states
-      Number of states (4 for DNA, 20 for AA)
-
-    @param perSiteLikelihoods
-      Array to store per-site log likelihoods if \a getPerSiteLikelihoods is set to \b PLL_TRUE
-
-    @param getPerSiteLikelihoods
-      If set to \b PLL_TRUE then per-site log likelihoods are also computed and stored in \a perSiteLikelihoods
-
-    @return
-      The evaluated log likelihood of the tree topology
-*/
-static double evaluateGAMMA_FLEX(const pllBoolean fastScaling, int *ex1, int *ex2, int *wptr,
-                                 double *x1_start, double *x2_start, 
-                                 double *tipVector, 
-                                 unsigned char *tipX1, const int n, double *diagptable, const int states, double *perSiteLikelihoods, pllBoolean getPerSiteLikelihoods)
-{
-  double   
-    sum = 0.0, 
-    term,
-    *x1,
-    *x2;
-
-  int     
-    i, 
-    j,
-    k;
-
-  /* span is the offset within the likelihood array at an inner node that gets us from the values 
-     of site i to the values of site i + 1 */
-
-  const int 
-    span = states * 4;
-
-
-  /* we distingusih between two cases here: one node of the two nodes defining the branch at which we put the virtual root is 
-     a tip. Both nodes can not be tips because we do not allow for two-taxon trees ;-) 
-     Nota that, if a node is a tip, this will always be tipX1. This is done for code simplicity and the flipping of the nodes
-     is done before when we compute the traversal descriptor.     
-     */
-
-  /* the left node is a tip */
-  if(tipX1)
-  {             
-    /* loop over the sites of this partition */
-    for (i = 0; i < n; i++)
-    {
-      /* access pre-computed tip vector values via a lookup table */
-      x1 = &(tipVector[states * tipX1[i]]);      
-      /* access the other(inner) node at the other end of the branch */
-      x2 = &(x2_start[span * i]);        
-
-      /* loop over GAMMA rate categories, hard-coded as 4 in RAxML */
-      for(j = 0, term = 0.0; j < 4; j++)
-        /* loop over states and multiply them with the P matrix */
-        for(k = 0; k < states; k++)
-          term += x1[k] * x2[j * states + k] * diagptable[j * states + k];                                                        
-
-      /* take the log of the likelihood and multiply the per-gamma rate likelihood by 1/4.
-         Under the GAMMA model the 4 discrete GAMMA rates all have the same probability 
-         of 0.25 */
-
-      if(!fastScaling)
-        term = log(0.25 * fabs(term)) + (ex2[i] * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(0.25 * fabs(term));
-
-      /* if required get the per-site log likelihoods.
-         note that these are the plain per site log-likes, not 
-         multiplied with the pattern weight value */
-      
-      if(getPerSiteLikelihoods)
-        perSiteLikelihoods[i] = term;
-
-      sum += wptr[i] * term;
-    }     
-  }
-  else
-  {        
-    for (i = 0; i < n; i++) 
-    {
-      /* same as before, only that now we access two inner likelihood vectors x1 and x2 */
-
-      x1 = &(x1_start[span * i]);
-      x2 = &(x2_start[span * i]);                 
-
-      for(j = 0, term = 0.0; j < 4; j++)
-        for(k = 0; k < states; k++)
-          term += x1[j * states + k] * x2[j * states + k] * diagptable[j * states + k];
-
-      if(!fastScaling)
-        term = log(0.25 * fabs(term)) + ((ex1[i] + ex2[i])*log(PLL_MINLIKELIHOOD));
-      else
-        term = log(0.25 * fabs(term));
-      
-      if(getPerSiteLikelihoods)
-        perSiteLikelihoods[i] = term;
-
-      sum += wptr[i] * term;
-    }                           
-  }
-
-  return sum;
-} 
-
-#if (defined(__SSE3) || defined(__AVX))
-/** @ingroup evaluateLikelihoodGroup
-    @brief Memory saving version of the generic (and slow) implementation of log likelihood evaluation of a tree using the GAMMA model of rate heterogeneity
-
-    Computes the log likelihood of the topology for a specific partition, assuming
-    that the GAMMA model of rate heterogeneity is used and memory saving technique
-    is enabled. The likelihood is computed at a virtual root placed at an edge whose
-    two end-points (nodes) have the conditional likelihood vectors \a x1 and \a x2. 
-    Furthermore, if \a getPerSiteLikelihoods is set to \b PLL_TRUE, then the log
-    likelihood for each site is also computed and stored at the corresponding position
-    in the array \a perSiteLikelihoods.
-
-    @param fastScaling
-      If set to \b PLL_FALSE, then the likelihood of each site is also multiplied by \a log(PLL_MINLIKELIHOOD) times the number
-      of times it has been scaled down
-
-    @param ex1
-      An array that holds how many times a site has been scaled and points at the entries for node \a p. This
-      parameter is used if \a fastScaling is set to \b PLL_FALSE.
-
-    @param ex2
-      An array that holds how many times a site has been scaled and points at the entries for node \a q. This
-      parameter is used if \a fastScaling is set to \b PLL_TRUE.
-
-    @param wptr
-      Array holding the weight for each site in the compressed partition alignment
-
-    @param x1_start
-      Conditional likelihood vectors for one of the two end-points of the specific edge for which we are evaluating the likelihood
-
-    @param x2_start
-      Conditional likelihood vectors for the other end-point of the specific edge for which we are evaluating the likelihood
-
-    @param tipVector
-      Precomputed table where the number of rows is equal to the number of possible basepair characters for the current data 
-      type, i.e.16 for DNA and 23 for AA, and each rows contains \a states elements each of which contains transition
-      probabilities computed from the eigenvectors of the decomposed Q matrix.
-
-    @param tipX1
-      If one of the two end-points (nodes) of the specific edge (for which we are evaluating the likelihood) is a tip, then
-      this holds a pointer to the sequence data (basepairs) already converted in the internal integer representation, and \a x2
-      holds the conditional likelihood vectors for the internal node.
-
-    @param n
-      Number of sites for which we are doing the evaluation. For the single-thread version this is the 
-      number of sites in the current partition, for multi-threads this is the number of sites assigned
-      to the running thread from the current partition.
-
-    @param diagptable
-      Start of the array that contains the P-Matrix diagonal of the specific edge for which we are
-      evaluating the likehood, and for each category of the GAMMA model
-
-    @param states
-      Number of states (4 for DNA, 20 for AA)
-
-    @param perSiteLikelihoods
-      Array to store per-site log likelihoods if \a getPerSiteLikelihoods is set to \b PLL_TRUE
-
-    @param getPerSiteLikelihoods
-      If set to \b PLL_TRUE then per-site log likelihoods are also computed and stored in \a perSiteLikelihoods
-
-    @param x1_gapColumn
-
-    @param x2_gapColumn
-
-    @param x1_gap
-      Gap bitvector for the left child node
-
-    @param x2_gap
-      Gap bitvector for the right child node
-
-    @return
-      The evaluated log likelihood of the tree topology
-
-    @todo
-      Document x1_gapColumn, x2_gapColumn, x1_gap, x2_gap and add a brief description of how this technique works
-*/
-static double evaluateGAMMA_FLEX_SAVE(const pllBoolean fastScaling, int *ex1, int *ex2, int *wptr,
-                                      double *x1_start, double *x2_start, 
-                                      double *tipVector, 
-                                      unsigned char *tipX1, const int n, double *diagptable, const int states, double *perSiteLikelihoods, pllBoolean getPerSiteLikelihoods,
-                                      double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap)
-{
-  double   
-    sum = 0.0, 
-    term,
-    *x1,
-    *x2,
-    *x1_ptr = x1_start,
-    *x2_ptr = x2_start;
-    
-  int     
-    i, 
-    j,
-    k;
-
-  /* span is the offset within the likelihood array at an inner node that gets us from the values 
-     of site i to the values of site i + 1 */
-
-  const int 
-    span = states * 4;
-
-
-  /* we distingusih between two cases here: one node of the two nodes defining the branch at which we put the virtual root is 
-     a tip. Both nodes can not be tips because we do not allow for two-taxon trees ;-) 
-     Nota that, if a node is a tip, this will always be tipX1. This is done for code simplicity and the flipping of the nodes
-     is done before when we compute the traversal descriptor.     
-     */
-
-  /* the left node is a tip */
-  if(tipX1)
-  {             
-    /* loop over the sites of this partition */
-    for (i = 0; i < n; i++)
-    {
-      /* access pre-computed tip vector values via a lookup table */
-      x1 = &(tipVector[states * tipX1[i]]);      
-      /* access the other(inner) node at the other end of the branch */
-
-      if(x2_gap[i / 32] & mask32[i % 32])
-        x2 = x2_gapColumn;
-      else
-        {
-          x2 = x2_ptr;
-          x2_ptr += span;
-        }
-
-      /* loop over GAMMA rate categories, hard-coded as 4 in RAxML */
-      for(j = 0, term = 0.0; j < 4; j++)
-        /* loop over states and multiply them with the P matrix */
-        for(k = 0; k < states; k++)
-          term += x1[k] * x2[j * states + k] * diagptable[j * states + k];                                                        
-
-      /* take the log of the likelihood and multiply the per-gamma rate likelihood by 1/4.
-         Under the GAMMA model the 4 discrete GAMMA rates all have the same probability 
-         of 0.25 */
-
-      if(!fastScaling)
-        term = log(0.25 * fabs(term)) + (ex2[i] * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(0.25 * fabs(term));
-
-      /* if required get the per-site log likelihoods.
-         note that these are the plain per site log-likes, not 
-         multiplied with the pattern weight value */
-      
-      if(getPerSiteLikelihoods)
-        perSiteLikelihoods[i] = term;
-
-      sum += wptr[i] * term;
-    }     
-  }
-  else
-  {        
-    for (i = 0; i < n; i++) 
-    {
-      /* same as before, only that now we access two inner likelihood vectors x1 and x2 */
-      
-      if(x1_gap[i / 32] & mask32[i % 32])
-        x1 = x1_gapColumn;
-      else
-        {
-          x1 = x1_ptr;
-          x1_ptr += span;
-        }    
-
-      if(x2_gap[i / 32] & mask32[i % 32])
-        x2 = x2_gapColumn;
-      else
-        {
-          x2 = x2_ptr;
-          x2_ptr += span;
-        }                 
-
-      for(j = 0, term = 0.0; j < 4; j++)
-        for(k = 0; k < states; k++)
-          term += x1[j * states + k] * x2[j * states + k] * diagptable[j * states + k];
-
-      if(!fastScaling)
-        term = log(0.25 * fabs(term)) + ((ex1[i] + ex2[i])*log(PLL_MINLIKELIHOOD));
-      else
-        term = log(0.25 * fabs(term));
-      
-      if(getPerSiteLikelihoods)
-        perSiteLikelihoods[i] = term;
-
-      sum += wptr[i] * term;
-    }                           
-  }
-
-  return sum;
-} 
-#endif
-
-/** @ingroup evaluateLikelihoodGroup
-    @brief A generic (and slow) implementation of log likelihood evaluation of a tree using the CAT model of rate heterogeneity
-    
-    Computes the log likelihood of the topology for a specific partition, assuming
-    that the CAT model of rate heterogeneity is used. The likelihood is computed at
-    a virtual root placed at an edge whose two end-points (nodes) have the conditional
-    likelihood vectors \a x1 and \a x2. 
-    Furthermore, if \a getPerSiteLikelihoods is set to \b PLL_TRUE, then the log
-    likelihood for each site is also computed and stored at the corresponding position
-    in the array \a perSiteLikelihoods.
-
-    @param fastScaling
-      If set to \b PLL_FALSE, then the likelihood of each site is also multiplied by \a log(PLL_MINLIKELIHOOD) times the number
-      of times it has been scaled down
-
-    @param ex1
-      An array that holds how many times a site has been scaled and points at the entries for node \a p. This
-      parameter is used if \a fastScaling is set to \b PLL_FALSE.
-
-    @param ex2
-      An array that holds how many times a site has been scaled and points at the entries for node \a q. This
-      parameter is used if \a fastScaling is set to \b PLL_TRUE.
-
-    @param cptr
-      Array holding the rate for each site in the compressed partition alignment
-
-    @param wptr
-      Array holding the weight for each site in the compressed partition alignment
-
-    @param x1
-      Conditional likelihood vectors for one of the two end-points of the specific edge for which we are evaluating the likelihood
-
-    @param x2
-      Conditional likelihood vectors for the other end-point of the specific edge for which we are evaluating the likelihood
-
-    @param tipVector
-      Precomputed table where the number of rows is equal to the number of possible basepair characters for the current data type, 
-      i.e.16 for DNA and 23 for AA, and each rows contains \a states elements each of which contains transition probabilities 
-      computed from the eigenvectors of the decomposed Q matrix.
-
-    @param tipX1
-      If one of the two end-points (nodes) of the specific edge (for which we are evaluating the likelihood) is a tip, then
-      this holds a pointer to the sequence data (basepairs) already converted in the internal integer representation, and \a x2
-      holds the conditional likelihood vectors for the internal node.
-
-    @param n
-      Number of sites for which we are doing the evaluation. For the single-thread version this is the number of sites in the
-      current partition, for multi-threads this is the number of sites assigned to the running thread from the current partition.
-
-    @param diagptable_start
-      Start of the array that contains the P-Matrix diagonal of the specific edge for which we are evaluating the likehood,
-      and for each category of the CAT model
-
-    @param states
-      Number of states (4 for DNA, 20 for AA)
-
-    @param perSiteLikelihoods
-      Array to store per-site log likelihoods if \a getPerSiteLikelihoods is set to \b PLL_TRUE
-
-    @param getPerSiteLikelihoods
-      If set to \b PLL_TRUE then per-site log likelihoods are also computed and stored in \a perSiteLikelihoods
-
-    @return
-      The evaluated log likelihood of the tree topology
-*/
-static double evaluateCAT_FLEX (const pllBoolean fastScaling, int *ex1, int *ex2, int *cptr, int *wptr,
-                                double *x1, double *x2, double *tipVector,
-                                unsigned char *tipX1, int n, double *diagptable_start, const int states, double *perSiteLikelihoods, pllBoolean getPerSiteLikelihoods)
-{
-  double   
-    sum = 0.0, 
-    term,
-    *diagptable,  
-    *left, 
-    *right;
-
-  int     
-    i, 
-    l;                           
-
-  /* chosing between tip vectors and non tip vectors is identical in all flavors of this function ,regardless 
-     of whether we are using CAT, GAMMA, DNA or protein data etc */
-
-  if(tipX1)
-  {                 
-    for (i = 0; i < n; i++) 
-    {
-      /* same as in the GAMMA implementation */
-      left = &(tipVector[states * tipX1[i]]);
-      right = &(x2[states * i]);
-
-      /* important difference here, we do not have, as for GAMMA 
-         4 P matrices assigned to each site, but just one. However those 
-         P-Matrices can be different for the sites.
-         Hence we index into the precalculated P-matrices for individual sites 
-         via the category pointer cptr[i]
-         */
-      diagptable = &diagptable_start[states * cptr[i]];                  
-
-      /* similar to gamma, with the only difference that we do not integrate (sum)
-         over the discrete gamma rates, but simply compute the likelihood of the 
-         site and the given P-matrix */
-
-      for(l = 0, term = 0.0; l < states; l++)
-        term += left[l] * right[l] * diagptable[l];                        
-
-      /* take the log */
-       if(!fastScaling)
-         term = log(fabs(term)) + (ex2[i] * log(PLL_MINLIKELIHOOD));
-       else
-         term = log(fabs(term));
-
-       /* if required get the per-site log likelihoods.
-          note that these are the plain per site log-likes, not 
-          multiplied with the pattern weight value */
-
-       if(getPerSiteLikelihoods)
-         perSiteLikelihoods[i] = term;
-
-      /* 
-         multiply the log with the pattern weight of this site. 
-         The site pattern for which we just computed the likelihood may 
-         represent several alignment columns sites that have been compressed 
-         into one site pattern if they are exactly identical AND evolve under the same model,
-         i.e., form part of the same partition.
-         */                  
-
-      sum += wptr[i] * term;
-    }      
-  }    
-  else
-  {    
-    for (i = 0; i < n; i++) 
-    {   
-      /* as before we now access the likelihood arrayes of two inner nodes */
-      left  = &x1[states * i];
-      right = &x2[states * i];
-
-      diagptable = &diagptable_start[states * cptr[i]];         
-
-      for(l = 0, term = 0.0; l < states; l++)
-        term += left[l] * right[l] * diagptable[l];
-      
-      if(!fastScaling)
-        term = log(fabs(term)) + ((ex1[i] + ex2[i]) * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(fabs(term));  
-
-      if(getPerSiteLikelihoods)
-        perSiteLikelihoods[i] = term;
-
-      sum += wptr[i] * term;      
-    }
-  }
-
-  return  sum;         
-} 
-
-#if (defined(__SSE3) || defined(__AVX))
-/** @ingroup evaluateLikelihoodGroup
-    @brief A generic (and slow) implementation of log likelihood evaluation of a tree using the CAT model of rate heterogeneity with memory saving
-    
-    This is the same as ::evaluateCAT_FLEX but with the memory saving technique enabled.
-    Please check ::evaluateCAT_FLEX for more information and a description of the common
-    input parameters
-    
-    @param x1_gapColumn
-
-    @param x2_gapColumn
-
-    @param x1_gap
-      Gap bitvector for the left child node
-
-    @param x2_gap
-      Gap bitvector for the right child node
-    
-    @todo
-      Comment on x1_gapColumn and x2_gapColumn
-*/
-static double evaluateCAT_FLEX_SAVE (const pllBoolean fastScaling, int *ex1, int *ex2, int *cptr, int *wptr,
-                                     double *x1, double *x2, double *tipVector,
-                                     unsigned char *tipX1, int n, double *diagptable_start, const int states, double *perSiteLikelihoods, pllBoolean getPerSiteLikelihoods,
-                                     double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap)
-{
-  double   
-    sum = 0.0, 
-    term,
-    *diagptable,  
-    *left, 
-    *right,
-    *left_ptr = x1,
-    *right_ptr = x2;
-
-  int     
-    i, 
-    l;                           
-
-  /* chosing between tip vectors and non tip vectors is identical in all flavors of this function ,regardless 
-     of whether we are using CAT, GAMMA, DNA or protein data etc */
-
-  if(tipX1)
-  {                 
-    for (i = 0; i < n; i++) 
-    {
-      /* same as in the GAMMA implementation */
-      left = &(tipVector[states * tipX1[i]]);
-   
-      if(isGap(x2_gap, i))
-        right = x2_gapColumn;
-      else
-        {
-          right = right_ptr;
-          right_ptr += states;
-        }         
-      /* important difference here, we do not have, as for GAMMA 
-         4 P matrices assigned to each site, but just one. However those 
-         P-Matrices can be different for the sites.
-         Hence we index into the precalculated P-matrices for individual sites 
-         via the category pointer cptr[i]
-         */
-      diagptable = &diagptable_start[states * cptr[i]];                  
-
-      /* similar to gamma, with the only difference that we do not integrate (sum)
-         over the discrete gamma rates, but simply compute the likelihood of the 
-         site and the given P-matrix */
-
-      for(l = 0, term = 0.0; l < states; l++)
-        term += left[l] * right[l] * diagptable[l];                        
-
-      /* take the log */
-       if(!fastScaling)
-         term = log(fabs(term)) + (ex2[i] * log(PLL_MINLIKELIHOOD));
-       else
-         term = log(fabs(term));
-
-       /* if required get the per-site log likelihoods.
-          note that these are the plain per site log-likes, not 
-          multiplied with the pattern weight value */
-
-       if(getPerSiteLikelihoods)
-         perSiteLikelihoods[i] = term;
-
-      /* 
-         multiply the log with the pattern weight of this site. 
-         The site pattern for which we just computed the likelihood may 
-         represent several alignment columns sites that have been compressed 
-         into one site pattern if they are exactly identical AND evolve under the same model,
-         i.e., form part of the same partition.
-         */                  
-
-      sum += wptr[i] * term;
-    }      
-  }    
-  else
-  {    
-    for (i = 0; i < n; i++) 
-    {   
-      /* as before we now access the likelihood arrayes of two inner nodes */     
-
-      if(isGap(x1_gap, i))
-        left = x1_gapColumn;
-      else
-        {
-          left = left_ptr;
-          left_ptr += states;
-        }       
-
-      if(isGap(x2_gap, i))
-        right = x2_gapColumn;
-      else
-        {
-          right = right_ptr;
-          right_ptr += states;
-        }       
-
-      diagptable = &diagptable_start[states * cptr[i]];         
-
-      for(l = 0, term = 0.0; l < states; l++)
-        term += left[l] * right[l] * diagptable[l];
-      
-      if(!fastScaling)
-        term = log(fabs(term)) + ((ex1[i] + ex2[i]) * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(fabs(term));  
-
-      if(getPerSiteLikelihoods)
-        perSiteLikelihoods[i] = term;
-
-      sum += wptr[i] * term;      
-    }
-  }
-
-  return  sum;         
-} 
-#endif
-
-
-/* This is the core function for computing the log likelihood at a branch */
-/** @ingroup evaluateLikelihoodGroup
-    @brief Evaluate the log likelihood of a specific branch of the topology
-    
-    Evaluates the likelihood of the tree topology assuming a virtual root is
-    placed at the edge whose end-points are node with number \a pNumber and \a
-    qNumber in the first slot of the traversal descriptor. The function first
-    computes the conditional likelihoods for all necessary nodes (the ones in
-    the traversal descriptor list) by calling the function \a pllNewviewIterative
-    and then evaluates the likelihood at the root. In addition, if \a
-    getPerSiteLikelihoods is set to \b PLL_TRUE, the per-site likelihoods are
-    stored in \a tr->lhs.
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param getPerSiteLikelihoods
-      If set to \b PLL_TRUE, compute the log likelihood for each site. 
-
-    @note
-      This is an internal function and should not be called by the user. It assumes
-      that a valid traversal descriptor has already been computed. It also assumes
-      that the edge we are referring to is an edge that leads to a tip, i.e. either
-      p or q of the first entry of traversal descriptor are tips.
-*/
-void pllEvaluateIterative(pllInstance *tr, partitionList *pr, pllBoolean getPerSiteLikelihoods)
-{
-  /* the branch lengths and node indices of the virtual root branch are always the first one that 
-     are stored in the very important traversal array data structure that describes a partial or full tree traversal */
-
-  /* get the branch length at the root */
-  double 
-    *pz = tr->td[0].ti[0].qz;   
-
-  /* get the node number of the node to the left and right of the branch that defines the virtual rooting */
-
-  int    
-    pNumber = tr->td[0].ti[0].pNumber, 
-    qNumber = tr->td[0].ti[0].qNumber, 
-    p_slot,
-    q_slot,
-    model;
-  
-  pllBoolean
-    fastScaling = tr->fastScaling;
-
-  /* the slots are the entries in xVector where the LH vector is available */
-  if(tr->useRecom)
-    {
-      p_slot = tr->td[0].ti[0].slot_p;
-      q_slot = tr->td[0].ti[0].slot_q;
-    }
-  else
-    {
-      p_slot = pNumber - tr->mxtips - 1;
-      q_slot = qNumber - tr->mxtips - 1;
-    }
-  
-  /* before we can compute the likelihood at the virtual root, we need to do a partial or full tree traversal to compute 
-     the conditional likelihoods of the vectors as specified in the traversal descriptor. Maintaining this tarversal descriptor consistent 
-     will unfortunately be the responsibility of users. This is tricky, if as planned for here, we use a rooted view (described somewhere in Felsenstein's book)
-     for the conditional vectors with respect to the tree
-     */
-
-  /* iterate over all valid entries in the traversal descriptor */
-
-  pllNewviewIterative(tr, pr, 1);
-
-  /* after the above call we are sure that we have properly and consistently computed the 
-     conditionals to the right and left of the virtual root and we can now invoke the 
-     the log likelihood computation */
-
-  /* we need to loop over all partitions. Note that we may have a mix of DNA, protein binary data etc partitions */
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-    {    
-      /* whats' the number of sites of this partition (at the current thread) */
-      int           
-        width = pr->partitionData[model]->width;
-      
-      /* 
-         Important part of the tarversal descriptor: 
-         figure out if we need to recalculate the likelihood of this 
-         partition: 
-         
-         The reasons why this is important in terms of performance are given in this paper 
-         here which you should actually read:
-         
-         A. Stamatakis, M. Ott: "Load Balance in the Phylogenetic Likelihood Kernel". Proceedings of ICPP 2009, accepted for publication, Vienna, Austria, September 2009
-         
-         The width > 0 check is for checking if under the cyclic data distribution of per-partition sites to threads this thread does indeed have a site 
-         of the current partition.
-         
-      */
-
-      if(tr->td[0].executeModel[model] && width > 0)
-        {       
-          int 
-#if (defined(__SSE3) || defined(__AVX))
-            rateHet = (int)discreteRateCategories(tr->rateHetModel),
-#endif
-            categories,
-            ascWidth = pr->partitionData[model]->states,
-            
-            /* get the number of states in the partition, e.g.: 4 = DNA, 20 = Protein */
-            
-            states = pr->partitionData[model]->states,
-            *ex1 = NULL,
-            *ex2 = NULL,
-            *ex1_asc = NULL,
-            *ex2_asc = NULL;
-          
-          double 
-            *rateCategories = (double*)NULL,
-            z, 
-            partitionLikelihood = 0.0,
-            *x1_start           = NULL,
-            *x2_start           = NULL,
-            *diagptable         = NULL,
-            *x1_start_asc       = NULL,
-            *x2_start_asc       = NULL;
-
-#if (defined(__SSE3) || defined(__AVX))
-          double
-            *x1_gapColumn = (double*)NULL,
-            *x2_gapColumn = (double*)NULL;
-#endif
-          
-#if (defined(__SSE3) || defined(__AVX))
-          unsigned int
-            *x1_gap = (unsigned int*)NULL,
-            *x2_gap = (unsigned int*)NULL;       
-#endif
-          
-          unsigned char 
-            *tip = (unsigned char*)NULL;          
-          
-          /* 
-             figure out if we are using the CAT or GAMMA model of rate heterogeneity 
-             and set pointers to the rate heterogeneity rate arrays and also set the 
-             number of distinct rate categories appropriately.
-             
-             Under GAMMA this is constant and hard-coded as 4, weheras under CAT 
-             the number of site-wise rate categories can vary in the course of computations 
-             up to a user defined maximum value of site categories (default: 25)
-          */
-
-          if(tr->rateHetModel == PLL_CAT)
-            {        
-              rateCategories = pr->partitionData[model]->perSiteRates;
-              categories = pr->partitionData[model]->numberOfCategories;
-            }
-          else  /* GAMMA */
-            {        
-              rateCategories = pr->partitionData[model]->gammaRates;
-              categories = 4;
-            }
-          
-          /* set this pointer to the memory area where space has been reserved a priori for storing the 
-             P matrix at the root */
-          
-          diagptable = pr->partitionData[model]->left;
-          
-          /* figure out if we need to address tip vectors (a char array that indexes into a precomputed tip likelihood 
-             value array) or if we need to address inner vectors */
-          
-          /* either node p or node q is a tip */
-          
-          if(isTip(pNumber, tr->mxtips) || isTip(qNumber, tr->mxtips))
-            {                       
-              /* q is a tip */
-              
-              if(isTip(qNumber, tr->mxtips))
-                {       
-                  /* get the start address of the inner likelihood vector x2 for partition model,
-                     note that inner nodes are enumerated/indexed starting at 0 to save allocating some 
-                     space for additional pointers */
-
-                  x2_start = pr->partitionData[model]->xVector[p_slot];
-                  
-                  /* get the corresponding tip vector */
-                  
-                  tip      = pr->partitionData[model]->yVector[qNumber];
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-                  if (tr->threadID == 0 && pr->partitionData[model]->ascBias)
-#else
-                  if (pr->partitionData[model]->ascBias)
-#endif
-                   {
-                     x2_start_asc  = &pr->partitionData[model]->ascVector[(pNumber - tr->mxtips - 1) * pr->partitionData[model]->ascOffset];
-                     ex2_asc       = &pr->partitionData[model]->ascExpVector[(pNumber - tr->mxtips - 1) * ascWidth];
-                   }
-
-                  
-                  /* memory saving stuff, let's deal with this later or ask Fernando ;-) */
-                  
-#if (defined(__SSE3) || defined(__AVX))
-                  if(tr->saveMemory)
-                    {
-                      x2_gap         = &(pr->partitionData[model]->gapVector[pNumber * pr->partitionData[model]->gapVectorLength]);
-                      x2_gapColumn   = &(pr->partitionData[model]->gapColumn[(pNumber - tr->mxtips - 1) * states * rateHet]);
-                    }
-#endif
-                  /* per site likelihood scaling */
-
-                  if(!fastScaling)                  
-                    ex2 = pr->partitionData[model]->expVector[p_slot];              
-                }           
-              else
-                {       
-                  /* p is a tip, same as above */
-                  
-                  x2_start = pr->partitionData[model]->xVector[q_slot];
-                  tip = pr->partitionData[model]->yVector[pNumber];
-
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-                  if (tr->threadID == 0 && pr->partitionData[model]->ascBias)
-#else
-                  if (pr->partitionData[model]->ascBias)
-#endif
-                   {
-                     x2_start_asc  = &pr->partitionData[model]->ascVector[(qNumber - tr->mxtips - 1) * pr->partitionData[model]->ascOffset];
-                     ex2_asc       = &pr->partitionData[model]->ascExpVector[(qNumber - tr->mxtips - 1) * ascWidth];
-                   }
-                  
-#if (defined(__SSE3) || defined(__AVX))
-                  if(tr->saveMemory)
-                    {
-                      x2_gap         = &(pr->partitionData[model]->gapVector[qNumber * pr->partitionData[model]->gapVectorLength]);
-                      x2_gapColumn   = &(pr->partitionData[model]->gapColumn[(qNumber - tr->mxtips - 1) * states * rateHet]);
-                    }
-#endif
-
-                  /* per site likelihood scaling */
-
-                  if(!fastScaling)                  
-                    ex2 = pr->partitionData[model]->expVector[q_slot];             
-                }
-            }
-          else
-            {  
-              
-              assert(p_slot != q_slot);
-              /* neither p nor q are tips, hence we need to get the addresses of two inner vectors */
-              
-              x1_start = pr->partitionData[model]->xVector[p_slot];
-              x2_start = pr->partitionData[model]->xVector[q_slot];
-              
-              /* memory saving option */
-              
-#if (defined(__SSE3) || defined(__AVX))
-              if(tr->saveMemory)
-                {
-                  x1_gap = &(pr->partitionData[model]->gapVector[pNumber * pr->partitionData[model]->gapVectorLength]);
-                  x2_gap = &(pr->partitionData[model]->gapVector[qNumber * pr->partitionData[model]->gapVectorLength]);
-                  x1_gapColumn   = &pr->partitionData[model]->gapColumn[(pNumber - tr->mxtips - 1) * states * rateHet];
-                  x2_gapColumn   = &pr->partitionData[model]->gapColumn[(qNumber - tr->mxtips - 1) * states * rateHet];
-                }
-#endif
-                      
-              /* per site likelihood scaling */
-
-              if(!fastScaling)
-                {
-                  ex1      = pr->partitionData[model]->expVector[p_slot];
-                  ex2      = pr->partitionData[model]->expVector[q_slot];     
-                }
-              
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-              if (tr->threadID == 0 && pr->partitionData[model]->ascBias)
-#else
-              if (pr->partitionData[model]->ascBias)
-#endif
-               {
-                 x1_start_asc  = &pr->partitionData[model]->ascVector[(pNumber - tr->mxtips - 1) * pr->partitionData[model]->ascOffset];
-                 x2_start_asc  = &pr->partitionData[model]->ascVector[(qNumber - tr->mxtips - 1) * pr->partitionData[model]->ascOffset];
-
-                 ex1_asc       = &pr->partitionData[model]->ascExpVector[(pNumber - tr->mxtips - 1) * ascWidth];
-                 ex2_asc       = &pr->partitionData[model]->ascExpVector[(qNumber - tr->mxtips - 1) * ascWidth];
-               }
-
-
-
-            }
-          
-          
-          /* if we are using a per-partition branch length estimate, the branch has an index, otherwise, for a joint branch length
-             estimate over all partitions we just use the branch length value with index 0 */
-          
-          if(pr->perGeneBranchLengths)
-            z = pz[model];
-          else
-            z = pz[0];
-          
-          /* calc P-Matrix at root for branch z connecting nodes p and q */
-          
-          if(pr->partitionData[model]->dataType == PLL_AA_DATA && (pr->partitionData[model]->protModels == PLL_LG4M || pr->partitionData[model]->protModels == PLL_LG4X))
-            calcDiagptableFlex_LG4(z, 4, pr->partitionData[model]->gammaRates, pr->partitionData[model]->EIGN_LG4, diagptable, 20);
-          else
-            calcDiagptable(z, states, categories, rateCategories, pr->partitionData[model]->EIGN, diagptable);
-          
-#if (!defined(__SSE3) && !defined(__AVX) && !defined(__MIC_NATIVE))
-          
-          /* generic slow functions, memory saving option is not implemented for these */
-          
-          assert(!tr->saveMemory);
-          
-          /* decide wheter CAT or GAMMA is used and compute log like */
-          if(tr->rateHetModel == PLL_CAT)
-            partitionLikelihood = evaluateCAT_FLEX(fastScaling, ex1, ex2, pr->partitionData[model]->rateCategory, pr->partitionData[model]->wgt, 
-                                                x1_start, x2_start, pr->partitionData[model]->tipVector,
-                                                tip, width, diagptable, states, pr->partitionData[model]->perSiteLikelihoods, getPerSiteLikelihoods);
-          else
-            partitionLikelihood = evaluateGAMMA_FLEX(fastScaling, ex1, ex2, pr->partitionData[model]->wgt,
-                                                x1_start, x2_start, pr->partitionData[model]->tipVector,
-                                                tip, width, diagptable, states, pr->partitionData[model]->perSiteLikelihoods, getPerSiteLikelihoods);
-#else
-   
-          /* if we want to compute the per-site likelihoods, we use the generic evaluate function implementations 
-             for this, because the slowdown is not that dramatic */
-
-          if(getPerSiteLikelihoods)
-            {         
-#ifdef __MIC_NATIVE
-                          // not supported on MIC!
-                          assert(0 && "Per-site LH calculations is not implemented on Intel MIC");
-#else
-               if(tr->rateHetModel == PLL_CAT)
-                {
-                   if(tr->saveMemory)
-                     partitionLikelihood = evaluateCAT_FLEX_SAVE(fastScaling, ex1, ex2, pr->partitionData[model]->rateCategory, pr->partitionData[model]->wgt,
-                                                                 x1_start, x2_start, pr->partitionData[model]->tipVector,
-                                                                 tip, width, diagptable, states, pr->partitionData[model]->perSiteLikelihoods, PLL_TRUE,
-                                                                 x1_gapColumn, x2_gapColumn, x1_gap, x2_gap);
-                   else
-                     partitionLikelihood = evaluateCAT_FLEX(fastScaling, ex1, ex2, pr->partitionData[model]->rateCategory, pr->partitionData[model]->wgt,
-                                                            x1_start, x2_start, pr->partitionData[model]->tipVector,
-                                                            tip, width, diagptable, states, pr->partitionData[model]->perSiteLikelihoods, PLL_TRUE);
-                }
-              else
-                {
-                  if(tr->saveMemory)
-                    partitionLikelihood = evaluateGAMMA_FLEX_SAVE(fastScaling, ex1, ex2, pr->partitionData[model]->wgt,
-                                                                  x1_start, x2_start, pr->partitionData[model]->tipVector,
-                                                                  tip, width, diagptable, states, pr->partitionData[model]->perSiteLikelihoods, PLL_TRUE, 
-                                                                  x1_gapColumn, x2_gapColumn, x1_gap, x2_gap);              
-                  else
-                    partitionLikelihood = evaluateGAMMA_FLEX(fastScaling, ex1, ex2, pr->partitionData[model]->wgt,
-                                                             x1_start, x2_start, pr->partitionData[model]->tipVector,
-                                                             tip, width, diagptable, states, pr->partitionData[model]->perSiteLikelihoods, PLL_TRUE);
-                }
-#endif
-            }
-          else
-            {
-              /* for the optimized functions we have a dedicated, optimized function implementation 
-                 for each rate heterogeneity and data type combination, we switch over the number of states 
-                 and the rate heterogeneity model */
-              
-              switch(states)
-                {         
-                case 2: /* binary */
-                  assert (!tr->saveMemory);
-                  if (tr->rateHetModel == PLL_CAT)
-                   {
-                     partitionLikelihood =  evaluateGTRCAT_BINARY(ex1, ex2, pr->partitionData[model]->rateCategory, pr->partitionData[model]->wgt,
-                                                                  x1_start, x2_start, pr->partitionData[model]->tipVector, 
-                                                                  tip, width, diagptable, fastScaling);
-                   }
-                  else
-                   {
-                     partitionLikelihood = evaluateGTRGAMMA_BINARY(ex1, ex2, pr->partitionData[model]->wgt,
-                                                                   x1_start, x2_start, pr->partitionData[model]->tipVector,
-                                                                   tip, width, diagptable, fastScaling);                 
-                   }
-                  break;
-                case 4: /* DNA */
-                  {
-
-#ifdef __MIC_NATIVE
-
-                  /* CAT & memory saving are not supported on MIC */
-
-                  assert(!tr->saveMemory);
-                  assert(tr->rateHetModel == PLL_GAMMA);
-
-                  partitionLikelihood =  evaluateGTRGAMMA_MIC(ex1, ex2, pr->partitionData[model]->wgt,
-                                              x1_start, x2_start, pr->partitionData[model]->tipVector,
-                                              tip, width, diagptable, fastScaling);
-#else
-                    if(tr->rateHetModel == PLL_CAT)
-                      {                           
-                        if(tr->saveMemory)
-                          partitionLikelihood =  evaluateGTRCAT_SAVE(fastScaling, ex1, ex2, pr->partitionData[model]->rateCategory, pr->partitionData[model]->wgt,
-                                                                     x1_start, x2_start, pr->partitionData[model]->tipVector,
-                                                                     tip, width, diagptable, x1_gapColumn, x2_gapColumn, x1_gap, x2_gap);
-                        else
-                          partitionLikelihood =  evaluateGTRCAT(fastScaling, ex1, ex2, pr->partitionData[model]->rateCategory, pr->partitionData[model]->wgt,
-                                                                x1_start, x2_start, pr->partitionData[model]->tipVector,
-                                                                tip, width, diagptable);
-                      }
-                    else
-                      {         
-                        if(tr->saveMemory)                 
-                          partitionLikelihood =  evaluateGTRGAMMA_GAPPED_SAVE(fastScaling, ex1, ex2, pr->partitionData[model]->wgt,
-                                                                              x1_start, x2_start, pr->partitionData[model]->tipVector,
-                                                                              tip, width, diagptable,
-                                                                              x1_gapColumn, x2_gapColumn, x1_gap, x2_gap);                  
-                        else
-                          partitionLikelihood =  evaluateGTRGAMMA(fastScaling, ex1, ex2, pr->partitionData[model]->wgt,
-                                                                  x1_start, x2_start, pr->partitionData[model]->tipVector,
-                                                                  tip, width, diagptable);                                
-                      }
-#endif
-                  }
-                  break;                                   
-                case 20: /* proteins */
-                  {
-
-#ifdef __MIC_NATIVE
-
-                  /* CAT & memory saving are not supported on MIC */
-
-                  assert(!tr->saveMemory);
-                  assert(tr->rateHetModel == PLL_GAMMA);
-
-                  if(pr->partitionData[model]->protModels == PLL_LG4M || pr->partitionData[model]->protModels == PLL_LG4X)
-                    partitionLikelihood =  evaluateGTRGAMMAPROT_LG4_MIC(pr->partitionData[model]->wgt,
-                                                                    x1_start, x2_start, pr->partitionData[model]->tipVector_LG4,
-                                                                    tip, width, diagptable, pr->partitionData[model]->lg4x_weights);
-                  else
-                        partitionLikelihood =  evaluateGTRGAMMAPROT_MIC(ex1, ex2, pr->partitionData[model]->wgt,
-                                              x1_start, x2_start, pr->partitionData[model]->tipVector,
-                                              tip, width, diagptable, fastScaling);
-
-//                  printf("tip: %p, width: %d,  lh: %f\n", tip, width, partitionLikelihood);
-//                  int g;
-//                  if (x1_start)
-//                                        for (g = 0; g < 20; ++g)
-//                                                printf("%f \t", x1_start[g]);
-//                  printf("\n");
-//                  if (x2_start)
-//                                        for (g = 0; g < 20; ++g)
-//                                                printf("%f \t", x2_start[g]);
-#else
-
-                      if(tr->rateHetModel == PLL_CAT)
-                      {                           
-                        if(tr->saveMemory)
-                          partitionLikelihood = evaluateGTRCATPROT_SAVE(fastScaling, ex1, ex2, pr->partitionData[model]->rateCategory, pr->partitionData[model]->wgt,
-                                                                        x1_start, x2_start, pr->partitionData[model]->tipVector,
-                                                                        tip, width, diagptable,  x1_gapColumn, x2_gapColumn, x1_gap, x2_gap);
-                        else
-                          partitionLikelihood = evaluateGTRCATPROT(fastScaling, ex1, ex2, pr->partitionData[model]->rateCategory, pr->partitionData[model]->wgt,
-                                                                   x1_start, x2_start, pr->partitionData[model]->tipVector,
-                                                                   tip, width, diagptable);               
-                      }
-                    else
-                      {                                               
-                        if(tr->saveMemory)
-                          partitionLikelihood = evaluateGTRGAMMAPROT_GAPPED_SAVE(fastScaling, ex1, ex2, pr->partitionData[model]->wgt,
-                                                                                 x1_start, x2_start, pr->partitionData[model]->tipVector,
-                                                                                 tip, width, diagptable,
-                                                                                 x1_gapColumn, x2_gapColumn, x1_gap, x2_gap);
-                        else
-                      {
-                        if(pr->partitionData[model]->protModels == PLL_LG4M || pr->partitionData[model]->protModels == PLL_LG4X)
-                          partitionLikelihood =  evaluateGTRGAMMAPROT_LG4((int *)NULL, (int *)NULL, pr->partitionData[model]->wgt,
-                                                                          x1_start, x2_start, pr->partitionData[model]->tipVector_LG4,
-                                                                          tip, width, diagptable, PLL_TRUE, pr->partitionData[model]->lg4x_weights);
-                        else
-                          partitionLikelihood = evaluateGTRGAMMAPROT(fastScaling, ex1, ex2, pr->partitionData[model]->wgt,
-                                                                     x1_start, x2_start, pr->partitionData[model]->tipVector,
-                                                                     tip, width, diagptable);           
-                      }
-                      }
-#endif
-                  }
-                  break;                            
-                default:
-                  assert(0);        
-                }
-            }
-#endif
-              
-          /* check that there was no major numerical screw-up, the log likelihood should be < 0.0 always */
-          
-          assert(partitionLikelihood < 0.0);
-          
-          /* now here is a nasty part, for each partition and each node we maintain an integer counter to count how often 
-             how many entries per node were scaled by a constant factor. Here we use this information generated during Felsenstein's 
-             pruning algorithm by the newview() functions to undo the preceding scaling multiplications at the root, for mathematical details 
-             you should actually read:
-             
-             A. Stamatakis: "Orchestrating the Phylogenetic Likelihood Function on Emerging Parallel Architectures". 
-             In B. Schmidt, editor, Bioinformatics: High Performance Parallel Computer Architectures, 85-115, CRC Press, Taylor & Francis, 2010.
-             
-             There's a copy of this book in my office 
-          */
-          
-          if(fastScaling)
-            partitionLikelihood += (pr->partitionData[model]->globalScaler[pNumber] + pr->partitionData[model]->globalScaler[qNumber]) * log(PLL_MINLIKELIHOOD);
-          
-          /* now we have the correct log likelihood for the current partition after undoing scaling multiplications */           
-          
-          /* finally, we also store the per partition log likelihood which is important for optimizing the alpha parameter 
-             of this partition for example */
-
-          /* asc bias stuff */
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-          if (tr->threadID == 0 && pr->partitionData[model]->ascBias)
-#else
-          if (pr->partitionData[model]->ascBias)
-#endif
-           {
-             size_t
-               i;
-             
-             int        
-               w = 0;
-             
-             double                                
-               correction;
-
-             switch(tr->rateHetModel)
-               {
-               case PLL_CAT:
-                 {
-                   double 
-                     rates = 1.0;
-                   
-                   //need to re-calculate P-matrix for the correction here assuming a rate of 1.0 
-                   calcDiagptable(z, states, 1, &rates, pr->partitionData[model]->EIGN, diagptable);
-                   
-                   
-                   correction = evaluateCatAsc(ex1_asc, ex2_asc, x1_start_asc, x2_start_asc, pr->partitionData[model]->tipVector,
-                                               tip, ascWidth, diagptable, ascWidth);
-                 }
-                 break;
-               case PLL_GAMMA:                       
-                 correction = evaluateGammaAsc(ex1_asc, ex2_asc, x1_start_asc, x2_start_asc, pr->partitionData[model]->tipVector,
-                                               tip, ascWidth, diagptable, ascWidth);
-                 break;
-               default:
-                 assert(0);
-               }
-             
-             
-             
-             for(i = (size_t)pr->partitionData[model]->lower; i < (size_t)pr->partitionData[model]->upper; i++)
-               w += tr->aliaswgt[i];
-
-             partitionLikelihood = partitionLikelihood - (double)w * log(1.0 - correction);                  
-              
-           }
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-          if(!(pr->partitionData[model]->ascBias && tr->threadID == 0))
-           {
-#endif
-             if(partitionLikelihood >= 0.0)
-               {
-                 printf("positive log like: %f for partition %d\n", partitionLikelihood, model);
-                 assert(0);
-               }
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-           }
-#endif
-
-          
-          pr->partitionData[model]->partitionLH = partitionLikelihood;
-        }
-      else
-        {
-          /* if the current thread does not have a single site of this partition
-             it is important to set the per partition log like to 0.0 because 
-             of the reduction operation that will take place later-on.
-             That is, the values of tr->perPartitionLH across all threads 
-             need to be in a consistent state, always !
-          */
-          
-          if(width == 0)            
-            pr->partitionData[model]->partitionLH = 0.0;
-        }
-    }
-
-
-#ifdef DEBUG_PERSITE_LNL
-  /* per persite-stuff */
-  {
-    int model = 0; 
-    for(model = 0; model < pr->numberOfPartitions ; ++model)
-      {
-        int j= 0; 
-        pInfo *partition  =  pr->partitionData[model]; 
-        for(j = 0;  j < partition->width; ++j)
-          printf("[%d] lnl[%d]=%f\n", tr->threadID, j, partition->perSiteLikelihoods[j]); 
-
-      }
-  }
-
-#endif
-}
-
-
-
-/** @ingroup evaluateLikelihoodGroup
-    @brief Evaluate the log likelihood of the tree topology
-
-    Evaluate the log likelihood of the tree topology of instance \a tr by
-    assuming a virtual root between nodes \a p and \a p->back. If
-    \a fullTraversal is set to \b PLL_TRUE then the log likelihood vectors for
-    each node are recomputed from scratch.
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param p
-      Specifies the virtual root, which is assumed to be a (virtual node) connecting \a p and \a p->back
-
-    @param fullTraversal
-      If set to \b PLL_TRUE, then the likelihood vectors at all nodes are recomputed, otherwise only the
-      necessary vectors (those that are not oriented in the right direction) are recomputed.
-
-    @param getPerSiteLikelihoods
-      Also compute and store (in \a tr->lhs) the log likelihood of each site of the (compressed) alignment
-
-    @note
-      If \a getPerSiteLikelihoods is set to \b PLL_TRUE, then make sure that \a tr->fastScaling is set to
-      \b PLL_FALSE, otherwise an assertion will fail.
-*/
-void pllEvaluateLikelihood (pllInstance *tr, partitionList *pr, nodeptr p, pllBoolean fullTraversal, pllBoolean getPerSiteLikelihoods)
-{
-  /* now this may be the entry point of the library to compute 
-     the log like at a branch defined by p and p->back == q */
-
-  volatile double 
-    result = 0.0;
-
-  nodeptr 
-    q = p->back; 
-  
-
-  pllBoolean
-        p_recom = PLL_FALSE, /* if one of was missing, we will need to force recomputation */
-        q_recom = PLL_FALSE;
-
-  int
-    i,
-    model,
-    numBranches = pr->perGeneBranchLengths?pr->numberOfPartitions : 1;
-
-  /* if evaluate shall return the per-site log likelihoods 
-     fastScaling needs to be disabled, otherwise this will 
-     not work */
-
-  if(getPerSiteLikelihoods)          
-    assert(!(tr->fastScaling)); 
-
-  /* set the first entry of the traversal descriptor to contain the indices
-     of nodes p and q */
-
-  tr->td[0].ti[0].pNumber = p->number;
-  tr->td[0].ti[0].qNumber = q->number;          
-
-  /* copy the branch lengths of the tree into the first entry of the traversal descriptor.
-     if -M is not used tr->numBranches must be 1 */
-
-  for(i = 0; i < numBranches; i++)
-    tr->td[0].ti[0].qz[i] =  q->z[i];
-
-  /* recom part */
-  if(tr->useRecom)
-  {
-    int slot = -1;
-    if(!isTip(q->number, tr->mxtips))
-    {
-      q_recom = getxVector(tr->rvec, q->number, &slot, tr->mxtips);
-      tr->td[0].ti[0].slot_q = slot;
-    }
-    if(!isTip(p->number, tr->mxtips))
-    {
-      p_recom = getxVector(tr->rvec, p->number, &slot, tr->mxtips);
-      tr->td[0].ti[0].slot_p = slot;
-    }
-    if(!isTip(p->number, tr->mxtips) &&  !isTip(q->number, tr->mxtips))
-      assert(tr->td[0].ti[0].slot_q != tr->td[0].ti[0].slot_p);
-  }
-
-
-  /* now compute how many conditionals must be re-computed/re-oriented by newview
-     to be able to calculate the likelihood at the root defined by p and q.
-     */
-
-  /* one entry in the traversal descriptor is already used, hence set the tarversal length counter to 1 */
-  tr->td[0].count = 1;
-
-  if(fullTraversal)
-  {
-    assert(isTip(q->back->number, tr->mxtips));
-    computeTraversal(tr, q, PLL_FALSE, numBranches);
-  }
-  else
-  {
-    if(p_recom || needsRecomp(tr->useRecom, tr->rvec, p, tr->mxtips))
-      computeTraversal(tr, p, PLL_TRUE, numBranches);
-
-    if(q_recom || needsRecomp(tr->useRecom, tr->rvec, q, tr->mxtips))
-      computeTraversal(tr, q, PLL_TRUE, numBranches);
-  }
-
-
-  /* now we copy this partition execute mask into the traversal descriptor which must come from the 
-     calling program, the logic of this should not form part of the library */
-
-  storeExecuteMaskInTraversalDescriptor(tr, pr);
-
-  /* also store in the traversal descriptor that something has changed i.e., in the parallel case that the 
-     traversal descriptor list of nodes needs to be broadcast once again */
-
-  tr->td[0].traversalHasChanged = PLL_TRUE;
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-
-  /* now here we enter the fork-join region for Pthreads */
-
-
-  /* start the parallel region and tell all threads to compute the log likelihood for 
-     their fraction of the data. This call is implemented in the case switch of execFunction in axml.c
-     */
-  if(getPerSiteLikelihoods)
-    {
-      memset(tr->lhs, 0, sizeof(double) * tr->originalCrunchedLength); 
-      pllMasterBarrier(tr, pr, PLL_THREAD_EVALUATE_PER_SITE_LIKES);
-    }
-  else
-    pllMasterBarrier (tr, pr, PLL_THREAD_EVALUATE);
-
-  /* and now here we explicitly do the reduction operation , that is add over the 
-     per-thread and per-partition log likelihoods to obtain the overall log like 
-     over all sites and partitions */
-
- 
-  /* 
-     for unpartitioned data that's easy, we just sum over the log likes computed 
-     by each thread, thread 0 stores his results in reductionBuffer[0] thread 1 in 
-     reductionBuffer[1] and so on 
-     */
-
-  /* This reduction for the partitioned case is more complicated because each thread 
-     needs to store the partial log like of each partition and we then need to collect 
-     and add everything */
-
-#else
-  /* and here is just the sequential case, we directly call pllEvaluateIterative() above 
-     without having to tell the threads/processes that they need to compute this function now */
-
-  pllEvaluateIterative(tr, pr, getPerSiteLikelihoods); //PLL_TRUE
-
-  /*
-    if we want to obtain per-site rates they have initially been stored 
-     in arrays that are associated to the partition, now we 
-     copy them into the vector tr->lhs[].
-     We may also chose that the user needs to rpovide an array, but this can be decided later-on.
-  */
-
-  if(getPerSiteLikelihoods) //PLL_TRUE
-    {
-      for(model = 0; model < pr->numberOfPartitions; model++)
-        memcpy(&(tr->lhs[pr->partitionData[model]->lower]), pr->partitionData[model]->perSiteLikelihoods, pr->partitionData[model]->width  * sizeof(double));
-    }
-
-#endif
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-    result += pr->partitionData[model]->partitionLH;
-
-  /* set the tree data structure likelihood value to the total likelihood */
-
-  tr->likelihood = result;    
-
-  /* the code below is mainly for testing if the per-site log 
-     likelihoods we have stored in tr->lhs yield the same 
-     likelihood as the likelihood we computed. 
-     For numerical reasons we need to make a dirt PLL_ABS(difference) < epsilon
-     comparison */
-     
-  if(getPerSiteLikelihoods) //PLL_TRUE
-    {
-      double 
-        likelihood = 0;
-      int i; 
-
-      /* note that in tr->lhs, we just store the likelihood of 
-         one representative of a potentially compressed pattern,
-         hence, we need to multiply the elemnts with the pattern 
-         weight vector */
-
-
-      for(i = 0; i < tr->originalCrunchedLength; i++)
-        {
-//          printf("lhs[%d]=%f * %d\n", i, tr->lhs[i], tr->aliaswgt[i]); 
-          likelihood += (tr->lhs[i]   * tr->aliaswgt[i] );
-        }
-         
-      if( PLL_ABS(tr->likelihood - likelihood) > 0.00001)
-        {
-  //        printf("likelihood was %f\t summed/weighted per-site-lnl was %f\n", tr->likelihood, likelihood); 
-        }
-
-        assert(PLL_ABS(tr->likelihood - likelihood) < 0.00001);
-    }
-
-
-  if(tr->useRecom)
-  {
-    unpinNode(tr->rvec, p->number, tr->mxtips);
-    unpinNode(tr->rvec, q->number, tr->mxtips);
-  }
-
-  /* do some bookkeeping to have traversalHasChanged in a consistent state */
-
-  tr->td[0].traversalHasChanged = PLL_FALSE;
-}
-
-
-void perSiteLogLikelihoods(pllInstance *tr, partitionList *pr, double *logLikelihoods)
-{
-#if (!defined(_USE_PTHREADS) && !defined(_FINE_GRAIN_MPI))
-  double 
-    //likelihood,
-    accumulatedPerSiteLikelihood = 0.0;
-
-  size_t
-    localCount,
-    i,
-    //globalCounter,
-    lower,
-    upper;
-  int model;
-#endif
-  /* compute the likelihood of the tree with the standard function to:
-     1. obtain the current score for error checking
-     2. store a full tree traversal in the traversal descriptor that 
-     will then be used for calculating per-site log likelihoods 
-     for each site individually and independently */
-
-  pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-
-  //likelihood = tr->likelihood;
-
-  /* now compute per-site log likelihoods using the respective functions */
-
-#if (defined( _USE_PTHREADS ) || defined(_FINE_GRAIN_MPI))
-  /* here we need a barrier to invoke a parallel region that calls 
-     function 
-     perSiteLogLikelihoodsPthreads(tree *tr, partitionList *pr, double *lhs, int n, int tid)
-     defined above and subsequently collects the per-site log likelihoods 
-     computed by the threads and stored in local per-thread memory 
-     and stores them in buffer tr->lhs.
-     This corresponds to a gather operation in MPI.
-     */
-
-  pllMasterBarrier (tr, pr, PLL_THREAD_PER_SITE_LIKELIHOODS);
-
-  /* 
-     when the parallel region has terminated, the per-site log likelihoods 
-     are stored in array tr->lhs of the master thread which we copy to the result buffer
-  */
-  
-  memcpy(logLikelihoods, tr->lhs, sizeof(double) * tr->originalCrunchedLength);
-
-
-#else
-
-  /* sequential case: just loop over all partitions and compute per site log likelihoods */
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-  {
-    lower = pr->partitionData[model]->lower;
-    upper = pr->partitionData[model]->upper;
-
-    for(i = lower, localCount = 0; i < upper; i++, localCount++)
-    {
-      double 
-        l;
-
-      /* 
-         we need to switch of rate heterogeneity implementations here.
-         when we have PSR we actually need to provide the per-site rate 
-         to the function evaluatePartialGeneric() that computes the 
-         per-site log likelihood.
-         Under GAMMA, the rate will just be ignored, here we just set it to 1.0
-         */
-
-      switch(tr->rateHetModel)
-      {
-        case PLL_CAT:
-          l = evaluatePartialGeneric (tr, pr, i, pr->partitionData[model]->perSiteRates[pr->partitionData[model]->rateCategory[localCount]], model);
-          break;
-        case PLL_GAMMA:
-          l = evaluatePartialGeneric (tr, pr, i, 1.0, model);
-          break;
-        default:
-          assert(0);
-      }
-
-      /* store value in result array and add the likelihood of this site to the overall likelihood */
-
-      logLikelihoods[i] = l;
-      accumulatedPerSiteLikelihood += l;
-    } 
-  }
-
-
-  /* error checking. We need a dirt PLL_ABS() < epsilon here, because the implementations 
-     (standard versus per-site) are pretty different and hence slight numerical 
-     deviations are expected */
-
-  assert(PLL_ABS(tr->likelihood - accumulatedPerSiteLikelihood) < 0.00001);
-  
-#endif
-  
-
-
-}
-
-#if (defined(__SSE3) || defined(__AVX))
-static double evaluateGTRCAT_BINARY (int *ex1, int *ex2, int *cptr, int *wptr,
-                                     double *x1_start, double *x2_start, double *tipVector,                   
-                                     unsigned char *tipX1, int n, double *diagptable_start, const pllBoolean fastScaling)
-{
-  double  sum = 0.0, term;       
-  int     i;
-#if (!defined(__SSE3) && !defined(__AVX))
-  int j;  
-#endif
-  double  *diagptable, *x1, *x2;                            
- 
-  if(tipX1)
-    {          
-      for (i = 0; i < n; i++) 
-        {
-#if (defined(__SSE3) || defined(__AVX))
-          PLL_ALIGN_BEGIN double t[2] PLL_ALIGN_END;
-#endif
-          x1 = &(tipVector[2 * tipX1[i]]);
-          x2 = &(x2_start[2 * i]);
-          
-          diagptable = &(diagptable_start[2 * cptr[i]]);                          
-        
-#if (defined(__SSE3) || defined(__AVX))
-          _mm_store_pd(t, _mm_mul_pd(_mm_load_pd(x1), _mm_mul_pd(_mm_load_pd(x2), _mm_load_pd(diagptable))));
-          
-          if(fastScaling)
-            term = log(fabs(t[0] + t[1]));
-          else
-            term = log(fabs(t[0] + t[1])) + (ex2[i] * log(PLL_MINLIKELIHOOD));                           
-#else               
-          for(j = 0, term = 0.0; j < 2; j++)                         
-            term += x1[j] * x2[j] * diagptable[j];            
-                 
-          if(fastScaling)
-            term = log(fabs(term));
-          else
-            term = log(fabs(term)) + (ex2[i] * log(PLL_MINLIKELIHOOD));                                                      
-#endif    
-
-          sum += wptr[i] * term;
-        }       
-    }               
-  else
-    {
-      for (i = 0; i < n; i++) 
-        {       
-#if (defined(__SSE3) || defined(__AVX))
-		  PLL_ALIGN_BEGIN double t[2] PLL_ALIGN_END;
-#endif                  
-          x1 = &x1_start[2 * i];
-          x2 = &x2_start[2 * i];
-          
-          diagptable = &diagptable_start[2 * cptr[i]];            
-#if (defined(__SSE3) || defined(__AVX))
-          _mm_store_pd(t, _mm_mul_pd(_mm_load_pd(x1), _mm_mul_pd(_mm_load_pd(x2), _mm_load_pd(diagptable))));
-          
-          if(fastScaling)
-            term = log(fabs(t[0] + t[1]));
-          else
-            term = log(fabs(t[0] + t[1])) + ((ex1[i] + ex2[i]) * log(PLL_MINLIKELIHOOD));                        
-#else     
-          for(j = 0, term = 0.0; j < 2; j++)
-            term += x1[j] * x2[j] * diagptable[j];   
-          
-          if(fastScaling)
-            term = log(fabs(term));
-          else
-            term = log(fabs(term)) + ((ex1[i] + ex2[i]) * log(PLL_MINLIKELIHOOD));
-#endif
-          
-          sum += wptr[i] * term;
-        }          
-    }
-       
-  return  sum;         
-} 
-
-
-static double evaluateGTRGAMMA_BINARY(int *ex1, int *ex2, int *wptr,
-                                      double *x1_start, double *x2_start, 
-                                      double *tipVector, 
-                                      unsigned char *tipX1, const int n, double *diagptable, const pllBoolean fastScaling)
-{
-  double   sum = 0.0, term;    
-  int     i, j;
-#if (!defined(__SSE3) && !defined(__AVX))
-  int k;
-#endif 
-  double  *x1, *x2;             
-
-  if(tipX1)
-    {          
-      for (i = 0; i < n; i++)
-        {
-#if (defined(__SSE3) || defined(__AVX))
-		  PLL_ALIGN_BEGIN double t[2] PLL_ALIGN_END;
-          __m128d termv, x1v, x2v, dv;
-#endif
-          x1 = &(tipVector[2 * tipX1[i]]);       
-          x2 = &x2_start[8 * i];                                
-#if (defined(__SSE3) || defined(__AVX))
-          termv = _mm_set1_pd(0.0);                
-          
-          for(j = 0; j < 4; j++)
-            {
-              x1v = _mm_load_pd(&x1[0]);
-              x2v = _mm_load_pd(&x2[j * 2]);
-              dv   = _mm_load_pd(&diagptable[j * 2]);
-              
-              x1v = _mm_mul_pd(x1v, x2v);
-              x1v = _mm_mul_pd(x1v, dv);
-              
-              termv = _mm_add_pd(termv, x1v);                 
-            }
-          
-          _mm_store_pd(t, termv);               
-          
-          if(fastScaling)
-            term = log(0.25 * (fabs(t[0] + t[1])));
-          else
-            term = log(0.25 * (fabs(t[0] + t[1]))) + (ex2[i] * log(PLL_MINLIKELIHOOD));       
-#else
-          for(j = 0, term = 0.0; j < 4; j++)
-            for(k = 0; k < 2; k++)
-              term += x1[k] * x2[j * 2 + k] * diagptable[j * 2 + k];                                                
-          
-          if(fastScaling)
-            term = log(0.25 * fabs(term));
-          else
-            term = log(0.25 * fabs(term)) + ex2[i] * log(PLL_MINLIKELIHOOD);
-#endif   
-          
-          sum += wptr[i] * term;
-        }         
-    }
-  else
-    {         
-      for (i = 0; i < n; i++) 
-        {
-#if (defined(__SSE3) || defined(__AVX))
-		  PLL_ALIGN_BEGIN double t[2] PLL_ALIGN_END;
-          __m128d termv, x1v, x2v, dv;
-#endif                            
-          x1 = &x1_start[8 * i];
-          x2 = &x2_start[8 * i];
-                  
-#if (defined(__SSE3) || defined(__AVX))
-          termv = _mm_set1_pd(0.0);                
-          
-          for(j = 0; j < 4; j++)
-            {
-              x1v = _mm_load_pd(&x1[j * 2]);
-              x2v = _mm_load_pd(&x2[j * 2]);
-              dv   = _mm_load_pd(&diagptable[j * 2]);
-              
-              x1v = _mm_mul_pd(x1v, x2v);
-              x1v = _mm_mul_pd(x1v, dv);
-              
-              termv = _mm_add_pd(termv, x1v);                 
-            }
-          
-          _mm_store_pd(t, termv);
-          
-          
-          if(fastScaling)
-            term = log(0.25 * (fabs(t[0] + t[1])));
-          else
-            term = log(0.25 * (fabs(t[0] + t[1]))) + ((ex1[i] +ex2[i]) * log(PLL_MINLIKELIHOOD));     
-#else     
-          for(j = 0, term = 0.0; j < 4; j++)
-            for(k = 0; k < 2; k++)
-              term += x1[j * 2 + k] * x2[j * 2 + k] * diagptable[j * 2 + k];                                          
-
-          if(fastScaling)
-            term = log(0.25 * fabs(term));
-          else
-            term = log(0.25 * fabs(term)) + (ex1[i] + ex2[i]) * log(PLL_MINLIKELIHOOD);
-#endif
-
-          sum += wptr[i] * term;
-        }                       
-    }
-
-  return sum;
-} 
-#endif
-
-
-
-/* below are the optimized function versions with geeky intrinsics */
-
-/** @ingroup evaluateLikelihoodGroup
-    @brief Evaluation of log likelihood of a tree under the GAMMA model of rate heterogeneity and LG4 model of evolution
-    
-    This is the same as ::evaluateGAMMA_FLEX but for the LG4 model. It contains two implementations,
-    one which is the generic, and one that is optimized with SSE3 instructions. The two implementations
-    are separated by preprocessor macros.
-    The difference from ::evaluateGAMMA_FLEX is that we have 4 different tipVectors computed from the 4 different
-    Q matrix decompositions.
-    Please check ::evaluateGAMMA_FLEX for more information and a description of the common
-    input parameters.
-*/
-static double evaluateGTRGAMMAPROT_LG4(int *ex1, int *ex2, int *wptr,
-                                       double *x1, double *x2,  
-                                       double *tipVector[4], 
-                                       unsigned char *tipX1, int n, double *diagptable, const pllBoolean fastScaling,
-                                       double * lg4_weights)
-{
-  double   sum = 0.0, term;        
-  int     i, j, l;   
-  double  *left, *right;              
-  
-  if(tipX1)
-    {               
-      for (i = 0; i < n; i++) 
-        {
-#if (defined(__SSE3) || defined(__AVX))
-          __m128d tv = _mm_setzero_pd();
-                                  
-          for(j = 0, term = 0.0; j < 4; j++)
-            {
-              double *d = &diagptable[j * 20];
-
-              __m128d
-              	  t = _mm_setzero_pd(),
-              	  w = _mm_set1_pd(lg4_weights[j]);
-
-              left = &(tipVector[j][20 * tipX1[i]]);
-              right = &(x2[80 * i + 20 * j]);
-              for(l = 0; l < 20; l+=2)
-                {
-                  __m128d mul = _mm_mul_pd(_mm_load_pd(&left[l]), _mm_load_pd(&right[l]));
-                  t = _mm_add_pd(t, _mm_mul_pd(mul, _mm_load_pd(&d[l])));
-                }
-              tv = _mm_add_pd(tv, _mm_mul_pd(t, w));
-            }
-
-          tv = _mm_hadd_pd(tv, tv);
-          _mm_storel_pd(&term, tv);
-          
-
-#else                             
-          for(j = 0, term = 0.0; j < 4; j++)
-            {
-        	  double t = 0.0;
-
-              left = &(tipVector[j][20 * tipX1[i]]);
-              right = &(x2[80 * i + 20 * j]);
-
-              for(l = 0; l < 20; l++)
-                t += left[l] * right[l] * diagptable[j * 20 + l];
-
-              term += lg4_weights[j] * t;
-            }     
-#endif
-          
-          if(fastScaling)
-            term = log(fabs(term));
-          else
-            term = log(fabs(term)) + (ex2[i] * log(PLL_MINLIKELIHOOD));
-
-          sum += wptr[i] * term;
-
-        }               
-    }              
-  else
-    {
-      for (i = 0; i < n; i++) 
-        {                                    
-#if (defined(__SSE3) || defined(__AVX))
-          __m128d tv = _mm_setzero_pd();                          
-              
-          for(j = 0, term = 0.0; j < 4; j++)
-            {
-              double *d = &diagptable[j * 20];
-
-              __m128d
-              t = _mm_setzero_pd(),
-              w = _mm_set1_pd(lg4_weights[j]);
-
-              left  = &(x1[80 * i + 20 * j]);
-              right = &(x2[80 * i + 20 * j]);
-              
-              for(l = 0; l < 20; l+=2)
-                {
-                  __m128d mul = _mm_mul_pd(_mm_load_pd(&left[l]), _mm_load_pd(&right[l]));
-                  t = _mm_add_pd(t, _mm_mul_pd(mul, _mm_load_pd(&d[l])));
-                }
-              tv = _mm_add_pd(tv, _mm_mul_pd(t, w));
-            }
-          tv = _mm_hadd_pd(tv, tv);
-          _mm_storel_pd(&term, tv);       
-#else
-          for(j = 0, term = 0.0; j < 4; j++)
-            {
-        	  double t = 0.0;
-
-              left  = &(x1[80 * i + 20 * j]);
-              right = &(x2[80 * i + 20 * j]);       
-              
-              for(l = 0; l < 20; l++)
-                t += left[l] * right[l] * diagptable[j * 20 + l];
-
-              term += lg4_weights[j] * t;
-            }
-#endif
-          
-          if(fastScaling)
-            term = log(fabs(term));
-          else
-            term = log(fabs(term)) + ((ex1[i] + ex2[i])*log(PLL_MINLIKELIHOOD));
-          
-          sum += wptr[i] * term;
-        }         
-    }
-
-  return  sum;
-}
-
-#if (defined(__SSE3) || defined(__AVX))
-/** @ingroup evaluateLikelihoodGroup
-    @brief Evaluation of log likelihood of a tree using the \b GAMMA model of rate heterogeneity 
-    and the memory saving technique (Optimized SSE3 version for AA data)
- 
-    This is the SSE3 optimized version of ::evaluateGAMMA_FLEX_SAVE for evaluating the log
-    likelihood at some edge whose two end-points (nodes) have the conditional likelihood
-    vectors \a x1 and \a x2. Please check ::evaluateGAMMA_FLEX_SAVE for more information and
-    a description of the input parameters
-*/
-static double evaluateGTRGAMMAPROT_GAPPED_SAVE (const pllBoolean fastScaling, int *ex1, int *ex2, int *wptr,
-                                                double *x1, double *x2,  
-                                                double *tipVector, 
-                                                unsigned char *tipX1, int n, double *diagptable, 
-                                                double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap)                                    
-{
-  double   sum = 0.0, term;        
-  int     i, j, l;   
-  double  
-    *left, 
-    *right,
-    *x1_ptr = x1,
-    *x2_ptr = x2,
-    *x1v,
-    *x2v;              
-  __m128d tv;
-
-  if(tipX1)
-  {               
-    for (i = 0; i < n; i++) 
-    {
-      if(x2_gap[i / 32] & mask32[i % 32])
-        x2v = x2_gapColumn;
-      else
-      {
-        x2v = x2_ptr;
-        x2_ptr += 80;
-      }
-
-	  //TUNG: Standard C does not allow declaration after executable statement
-	  tv = _mm_setzero_pd();
-      //__m128d tv = _mm_setzero_pd();
-      left = &(tipVector[20 * tipX1[i]]);                 
-
-      for(j = 0, term = 0.0; j < 4; j++)
-      {
-        double *d = &diagptable[j * 20];
-        right = &(x2v[20 * j]);
-        for(l = 0; l < 20; l+=2)
-        {
-          __m128d mul = _mm_mul_pd(_mm_load_pd(&left[l]), _mm_load_pd(&right[l]));
-          tv = _mm_add_pd(tv, _mm_mul_pd(mul, _mm_load_pd(&d[l])));                
-        }                               
-      }
-
-      tv = _mm_hadd_pd(tv, tv);
-      _mm_storel_pd(&term, tv);
-
-
-      if(!fastScaling)
-        term = log(0.25 * fabs(term)) + (ex2[i] * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(0.25 * fabs(term));    
-
-      sum += wptr[i] * term;
-    }                   
-  }              
-  else
-  {
-    for (i = 0; i < n; i++) 
-    {
-      if(x1_gap[i / 32] & mask32[i % 32])
-        x1v = x1_gapColumn;
-      else
-      {
-        x1v = x1_ptr;
-        x1_ptr += 80;
-      }
-
-      if(x2_gap[i / 32] & mask32[i % 32])
-        x2v = x2_gapColumn;
-      else
-      {
-        x2v = x2_ptr;
-        x2_ptr += 80;
-      }
-
-      //__m128d tv = _mm_setzero_pd(); 
-	  tv = _mm_setzero_pd();
-
-      for(j = 0, term = 0.0; j < 4; j++)
-      {
-        double *d = &diagptable[j * 20];
-        left  = &(x1v[20 * j]);
-        right = &(x2v[20 * j]);
-
-        for(l = 0; l < 20; l+=2)
-        {
-          __m128d mul = _mm_mul_pd(_mm_load_pd(&left[l]), _mm_load_pd(&right[l]));
-          tv = _mm_add_pd(tv, _mm_mul_pd(mul, _mm_load_pd(&d[l])));                
-        }                               
-      }
-      tv = _mm_hadd_pd(tv, tv);
-      _mm_storel_pd(&term, tv);   
-
-
-       if(!fastScaling)
-        term = log(0.25 * fabs(term)) + ((ex1[i] + ex2[i]) * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(0.25 * fabs(term));
-
-
-      sum += wptr[i] * term;
-    }         
-  }
-
-  return  sum;
-}
-
-
-
-/** @ingroup evaluateLikelihoodGroup
-    @brief Evaluation of log likelihood of a tree using the \b GAMMA model of rate heterogeneity 
-    (Optimized SSE3 version for AA data)
- 
-    This is the SSE3 optimized version of ::evaluateGAMMA_FLEX for evaluating the log
-    likelihood at some edge whose two end-points (nodes) have the conditional likelihood
-    vectors \a x1 and \a x2. Please check ::evaluateGAMMA_FLEX for more information and
-    a description of the common input parameters
-*/
-static double evaluateGTRGAMMAPROT (const pllBoolean fastScaling, int *ex1, int *ex2, int *wptr,
-                                    double *x1, double *x2,  
-                                    double *tipVector, 
-                                    unsigned char *tipX1, int n, double *diagptable)
-{
-  double   sum = 0.0, term;        
-  int     i, j, l;   
-  double  *left, *right;              
-
-  if(tipX1)
-  {               
-    for (i = 0; i < n; i++) 
-    {
-
-      __m128d tv = _mm_setzero_pd();
-      left = &(tipVector[20 * tipX1[i]]);                 
-
-      for(j = 0, term = 0.0; j < 4; j++)
-      {
-        double *d = &diagptable[j * 20];
-        right = &(x2[80 * i + 20 * j]);
-        for(l = 0; l < 20; l+=2)
-        {
-          __m128d mul = _mm_mul_pd(_mm_load_pd(&left[l]), _mm_load_pd(&right[l]));
-          tv = _mm_add_pd(tv, _mm_mul_pd(mul, _mm_load_pd(&d[l])));                
-        }                               
-      }
-      tv = _mm_hadd_pd(tv, tv);
-      _mm_storel_pd(&term, tv);
-
-
-      if(!fastScaling)
-        term = log(0.25 * fabs(term)) + (ex2[i] * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(0.25 * fabs(term));
-
-
-      sum += wptr[i] * term;
-    }                   
-  }              
-  else
-  {
-    for (i = 0; i < n; i++) 
-    {                                
-      __m128d tv = _mm_setzero_pd();                      
-
-      for(j = 0, term = 0.0; j < 4; j++)
-      {
-        double *d = &diagptable[j * 20];
-        left  = &(x1[80 * i + 20 * j]);
-        right = &(x2[80 * i + 20 * j]);
-
-        for(l = 0; l < 20; l+=2)
-        {
-          __m128d mul = _mm_mul_pd(_mm_load_pd(&left[l]), _mm_load_pd(&right[l]));
-          tv = _mm_add_pd(tv, _mm_mul_pd(mul, _mm_load_pd(&d[l])));                
-        }                               
-      }
-      tv = _mm_hadd_pd(tv, tv);
-      _mm_storel_pd(&term, tv);   
-
-
-       if(!fastScaling)
-        term = log(0.25 * fabs(term)) + ((ex1[i] + ex2[i]) * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(0.25 * fabs(term));
-
-
-      sum += wptr[i] * term;
-    }
-  }
-
-  return  sum;
-}
-
-
-/** @ingroup evaluateLikelihoodGroup
-    @brief Evaluation of log likelihood of a tree using the \b CAT model of rate heterogeneity 
-    (Optimized SSE3 version for AA data)
- 
-    This is the SSE3 optimized version of ::evaluateCAT_FLEX for evaluating the log
-    likelihood at some edge whose two end-points (nodes) have the conditional likelihood
-    vectors \a x1 and \a x2. Please check ::evaluateCAT_FLEX for more information and
-    a description of the common input parameters
-*/
-static double evaluateGTRCATPROT (const pllBoolean fastScaling, int *ex1, int *ex2, int *cptr, int *wptr,
-                                  double *x1, double *x2, double *tipVector,
-                                  unsigned char *tipX1, int n, double *diagptable_start)
-{
-  double   sum = 0.0, term;
-  double  *diagptable,  *left, *right;
-  int     i, l;                           
-  __m128d tv;
-
-  if(tipX1)
-  {                 
-    for (i = 0; i < n; i++) 
-    {           
-      left = &(tipVector[20 * tipX1[i]]);
-      right = &(x2[20 * i]);
-
-      diagptable = &diagptable_start[20 * cptr[i]];                      
-
-	  //TUNG: Standard C does not allow declaration after executable statement
-	  tv = _mm_setzero_pd();
-      //__m128d tv = _mm_setzero_pd();        
-
-      for(l = 0; l < 20; l+=2)
-      {
-        __m128d lv = _mm_load_pd(&left[l]);
-        __m128d rv = _mm_load_pd(&right[l]);
-        __m128d mul = _mm_mul_pd(lv, rv);
-        __m128d dv = _mm_load_pd(&diagptable[l]);
-
-        tv = _mm_add_pd(tv, _mm_mul_pd(mul, dv));                  
-      }                         
-
-      tv = _mm_hadd_pd(tv, tv);
-      _mm_storel_pd(&term, tv);
-
-      if(!fastScaling)
-        term = log(fabs(term)) + (ex2[i] * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(fabs(term));
-
-      sum += wptr[i] * term;
-    }      
-  }    
-  else
-  {
-
-    for (i = 0; i < n; i++) 
-    {                                 
-      left  = &x1[20 * i];
-      right = &x2[20 * i];
-
-      diagptable = &diagptable_start[20 * cptr[i]];             
-
-      __m128d tv = _mm_setzero_pd();        
-
-      for(l = 0; l < 20; l+=2)
-      {
-        __m128d lv = _mm_load_pd(&left[l]);
-        __m128d rv = _mm_load_pd(&right[l]);
-        __m128d mul = _mm_mul_pd(lv, rv);
-        __m128d dv = _mm_load_pd(&diagptable[l]);
-
-        tv = _mm_add_pd(tv, _mm_mul_pd(mul, dv));                  
-      }                         
-
-      tv = _mm_hadd_pd(tv, tv);
-      _mm_storel_pd(&term, tv);
-
-      if(!fastScaling)
-        term = log(fabs(term)) + ((ex1[i] + ex2[i]) * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(fabs(term));  
-
-      sum += wptr[i] * term;      
-    }
-  }
-
-  return  sum;         
-} 
-
-
-/** @ingroup evaluateLikelihoodGroup
-    @brief Evaluation of log likelihood of a tree using the \b CAT model of rate heterogeneity with memory saving 
-    (Optimized SSE3 version for AA data)
- 
-    This is the SSE3 optimized version of ::evaluateCAT_FLEX_SAVE for evaluating the log
-    likelihood at some edge whose two end-points (nodes) have the conditional likelihood
-    vectors \a x1 and \a x2. Please check ::evaluateCAT_FLEX_SAVE for more information and
-    a description of the common input parameters
-*/
-static double evaluateGTRCATPROT_SAVE (const pllBoolean fastScaling, int *ex1, int *ex2, int *cptr, int *wptr,
-                                       double *x1, double *x2, double *tipVector,
-                                       unsigned char *tipX1, int n, double *diagptable_start, 
-                                       double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap)
-{
-  double   
-    sum = 0.0, 
-        term,
-        *diagptable,  
-        *left, 
-        *right,
-        *left_ptr = x1,
-        *right_ptr = x2;
-
-  int     
-    i, 
-    l;                           
-
-  if(tipX1)
-  {                 
-    for (i = 0; i < n; i++) 
-    {           
-      left = &(tipVector[20 * tipX1[i]]);
-
-      if(isGap(x2_gap, i))
-        right = x2_gapColumn;
-      else
-      {
-        right = right_ptr;
-        right_ptr += 20;
-      }          
-
-      diagptable = &diagptable_start[20 * cptr[i]];                      
-
-      __m128d tv = _mm_setzero_pd();        
-
-      for(l = 0; l < 20; l+=2)
-      {
-        __m128d lv = _mm_load_pd(&left[l]);
-        __m128d rv = _mm_load_pd(&right[l]);
-        __m128d mul = _mm_mul_pd(lv, rv);
-        __m128d dv = _mm_load_pd(&diagptable[l]);
-
-        tv = _mm_add_pd(tv, _mm_mul_pd(mul, dv));                  
-      }                         
-
-      tv = _mm_hadd_pd(tv, tv);
-      _mm_storel_pd(&term, tv);
-
-      if(!fastScaling)
-        term = log(fabs(term)) + (ex2[i] * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(fabs(term));
-
-      sum += wptr[i] * term;
-    }      
-  }    
-  else
-  {
-
-    for (i = 0; i < n; i++) 
-    {                                     
-      if(isGap(x1_gap, i))
-        left = x1_gapColumn;
-      else
-      {
-        left = left_ptr;
-        left_ptr += 20;
-      }
-
-      if(isGap(x2_gap, i))
-        right = x2_gapColumn;
-      else
-      {
-        right = right_ptr;
-        right_ptr += 20;
-      }
-
-      diagptable = &diagptable_start[20 * cptr[i]];             
-
-      __m128d tv = _mm_setzero_pd();        
-
-      for(l = 0; l < 20; l+=2)
-      {
-        __m128d lv = _mm_load_pd(&left[l]);
-        __m128d rv = _mm_load_pd(&right[l]);
-        __m128d mul = _mm_mul_pd(lv, rv);
-        __m128d dv = _mm_load_pd(&diagptable[l]);
-
-        tv = _mm_add_pd(tv, _mm_mul_pd(mul, dv));                  
-      }                         
-
-      tv = _mm_hadd_pd(tv, tv);
-      _mm_storel_pd(&term, tv);
-
-      if(!fastScaling)
-        term = log(fabs(term)) + ((ex1[i] + ex2[i]) * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(fabs(term));  
-
-      sum += wptr[i] * term;      
-    }
-  }
-
-  return  sum;         
-} 
-
-
-/** @ingroup evaluateLikelihoodGroup
-    @brief Evaluation of log likelihood of a tree using the \b CAT model of rate heterogeneity with memory saving 
-    (Optimized SSE3 version for DNA data)
- 
-    This is the SSE3 optimized version of ::evaluateCAT_FLEX_SAVE for evaluating the log
-    likelihood at some edge whose two end-points (nodes) have the conditional likelihood
-    vectors \a x1 and \a x2. Please check ::evaluateCAT_FLEX_SAVE for more information and
-    a description of the common input parameters
-*/
-static double evaluateGTRCAT_SAVE (const pllBoolean fastScaling, int *ex1, int *ex2, int *cptr, int *wptr,
-                                   double *x1_start, double *x2_start, double *tipVector,                     
-                                   unsigned char *tipX1, int n, double *diagptable_start,
-                                   double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap)
-{
-  double  sum = 0.0, term;       
-  int     i;
-
-  double  *diagptable, 
-          *x1, 
-          *x2,
-          *x1_ptr = x1_start,
-          *x2_ptr = x2_start;
-
-  if(tipX1)
-  {           
-    for (i = 0; i < n; i++) 
-    {   
-    	PLL_ALIGN_BEGIN double t[2] PLL_ALIGN_END;
-      __m128d x1v1, x1v2, x2v1, x2v2, dv1, dv2;
-
-      x1 = &(tipVector[4 * tipX1[i]]);
-
-      if(isGap(x2_gap, i))
-        x2 = x2_gapColumn;
-      else
-      {
-        x2 = x2_ptr;
-        x2_ptr += 4;
-      }
-
-      diagptable = &diagptable_start[4 * cptr[i]];
-
-      x1v1 =  _mm_load_pd(&x1[0]);
-      x1v2 =  _mm_load_pd(&x1[2]);
-      x2v1 =  _mm_load_pd(&x2[0]);
-      x2v2 =  _mm_load_pd(&x2[2]);
-      dv1  =  _mm_load_pd(&diagptable[0]);
-      dv2  =  _mm_load_pd(&diagptable[2]);
-
-      x1v1 = _mm_mul_pd(x1v1, x2v1);
-      x1v1 = _mm_mul_pd(x1v1, dv1);
-
-      x1v2 = _mm_mul_pd(x1v2, x2v2);
-      x1v2 = _mm_mul_pd(x1v2, dv2);
-
-      x1v1 = _mm_add_pd(x1v1, x1v2);
-
-      _mm_store_pd(t, x1v1);
-
-      if(!fastScaling)
-        term = log(fabs(t[0] + t[1])) + (ex2[i] * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(fabs(t[0] + t[1]));
-
-
-
-      sum += wptr[i] * term;
-    }   
-  }               
-  else
-  {
-    for (i = 0; i < n; i++) 
-    { 
-    	PLL_ALIGN_BEGIN double t[2] PLL_ALIGN_END;
-      __m128d x1v1, x1v2, x2v1, x2v2, dv1, dv2;
-
-      if(isGap(x1_gap, i))
-        x1 = x1_gapColumn;
-      else
-      {
-        x1 = x1_ptr;
-        x1_ptr += 4;
-      }
-
-      if(isGap(x2_gap, i))
-        x2 = x2_gapColumn;
-      else
-      {
-        x2 = x2_ptr;
-        x2_ptr += 4;
-      }
-
-      diagptable = &diagptable_start[4 * cptr[i]];      
-
-      x1v1 =  _mm_load_pd(&x1[0]);
-      x1v2 =  _mm_load_pd(&x1[2]);
-      x2v1 =  _mm_load_pd(&x2[0]);
-      x2v2 =  _mm_load_pd(&x2[2]);
-      dv1  =  _mm_load_pd(&diagptable[0]);
-      dv2  =  _mm_load_pd(&diagptable[2]);
-
-      x1v1 = _mm_mul_pd(x1v1, x2v1);
-      x1v1 = _mm_mul_pd(x1v1, dv1);
-
-      x1v2 = _mm_mul_pd(x1v2, x2v2);
-      x1v2 = _mm_mul_pd(x1v2, dv2);
-
-      x1v1 = _mm_add_pd(x1v1, x1v2);
-
-      _mm_store_pd(t, x1v1);
-
-
-       if(!fastScaling)
-        term = log(fabs(t[0] + t[1])) + ((ex1[i] + ex2[i]) * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(fabs(t[0] + t[1]));
-
-      sum += wptr[i] * term;
-    }    
-  }
-
-  return  sum;         
-} 
-
-
-/** @ingroup evaluateLikelihoodGroup
-    @brief Evaluation of log likelihood of a tree using the \b GAMMA model of rate heterogeneity with memory saving 
-    (Optimized SSE3 version for DNA data)
- 
-    This is the SSE3 optimized version of ::evaluateGAMMA_FLEX_SAVE for evaluating the log
-    likelihood at some edge whose two end-points (nodes) have the conditional likelihood
-    vectors \a x1 and \a x2. Please check ::evaluateGAMMA_FLEX_SAVE for more information and
-    a description of the common input parameters
-*/
-static double evaluateGTRGAMMA_GAPPED_SAVE(const pllBoolean fastScaling, int *ex1, int *ex2, int *wptr,
-                                           double *x1_start, double *x2_start, 
-                                           double *tipVector, 
-                                           unsigned char *tipX1, const int n, double *diagptable,
-                                           double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap)
-{
-  double   sum = 0.0, term;    
-  int     i, j;
-  double  
-    *x1, 
-    *x2,
-    *x1_ptr = x1_start,
-    *x2_ptr = x2_start;
-
-
-
-  if(tipX1)
-  {        
-
-
-    for (i = 0; i < n; i++)
-    {
-    	PLL_ALIGN_BEGIN double t[2] PLL_ALIGN_END;
-      __m128d termv, x1v, x2v, dv;
-
-      x1 = &(tipVector[4 * tipX1[i]]);   
-      if(x2_gap[i / 32] & mask32[i % 32])
-        x2 = x2_gapColumn;
-      else
-      {
-        x2 = x2_ptr;     
-        x2_ptr += 16;
-      }
-
-
-      termv = _mm_set1_pd(0.0);            
-
-      for(j = 0; j < 4; j++)
-      {
-        x1v = _mm_load_pd(&x1[0]);
-        x2v = _mm_load_pd(&x2[j * 4]);
-        dv   = _mm_load_pd(&diagptable[j * 4]);
-
-        x1v = _mm_mul_pd(x1v, x2v);
-        x1v = _mm_mul_pd(x1v, dv);
-
-        termv = _mm_add_pd(termv, x1v);
-
-        x1v = _mm_load_pd(&x1[2]);
-        x2v = _mm_load_pd(&x2[j * 4 + 2]);
-        dv   = _mm_load_pd(&diagptable[j * 4 + 2]);
-
-        x1v = _mm_mul_pd(x1v, x2v);
-        x1v = _mm_mul_pd(x1v, dv);
-
-        termv = _mm_add_pd(termv, x1v);
-      }
-
-      _mm_store_pd(t, termv);            
-
-       if(!fastScaling)
-        term = log(0.25 * fabs(t[0] + t[1])) + (ex2[i] * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(0.25 * fabs(t[0] + t[1]));
-
-
-      sum += wptr[i] * term;
-    }     
-  }
-  else
-  {        
-
-    for (i = 0; i < n; i++) 
-    {
-    	PLL_ALIGN_BEGIN double t[2] PLL_ALIGN_END;
-      __m128d termv, x1v, x2v, dv;
-
-      if(x1_gap[i / 32] & mask32[i % 32])
-        x1 = x1_gapColumn;
-      else
-      {
-        x1 = x1_ptr;              
-        x1_ptr += 16;
-      }
-
-      if(x2_gap[i / 32] & mask32[i % 32])
-        x2 = x2_gapColumn;
-      else
-      {
-        x2 = x2_ptr;
-        x2_ptr += 16;
-      }
-
-      termv = _mm_set1_pd(0.0);          
-
-      for(j = 0; j < 4; j++)
-      {
-        x1v = _mm_load_pd(&x1[j * 4]);
-        x2v = _mm_load_pd(&x2[j * 4]);
-        dv   = _mm_load_pd(&diagptable[j * 4]);
-
-        x1v = _mm_mul_pd(x1v, x2v);
-        x1v = _mm_mul_pd(x1v, dv);
-
-        termv = _mm_add_pd(termv, x1v);
-
-        x1v = _mm_load_pd(&x1[j * 4 + 2]);
-        x2v = _mm_load_pd(&x2[j * 4 + 2]);
-        dv   = _mm_load_pd(&diagptable[j * 4 + 2]);
-
-        x1v = _mm_mul_pd(x1v, x2v);
-        x1v = _mm_mul_pd(x1v, dv);
-
-        termv = _mm_add_pd(termv, x1v);
-      }
-
-      _mm_store_pd(t, termv);
-
-      if(!fastScaling)
-        term = log(0.25 * fabs(t[0] + t[1])) + ((ex1[i] + ex2[i]) * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(0.25 * fabs(t[0] + t[1]));
-
-
-      sum += wptr[i] * term;
-    }                           
-  }
-
-  return sum;
-} 
-
-
-/** @ingroup evaluateLikelihoodGroup
-    @brief Evaluation of log likelihood of a tree using the \b GAMMA model of rate heterogeneity (Optimized SSE3 version for DNA data)
- 
-    This is the SSE3 optimized version of ::evaluateGAMMA_FLEX for evaluating the log
-    likelihood at some edge whose two end-points (nodes) have the conditional likelihood
-    vectors \a x1 and \a x2. Please check ::evaluateGAMMA_FLEX for more information and
-    a description of the common input parameters
-*/
-static double evaluateGTRGAMMA(const pllBoolean fastScaling, int *ex1, int *ex2, int *wptr,
-                               double *x1_start, double *x2_start, 
-                               double *tipVector, 
-                               unsigned char *tipX1, const int n, double *diagptable)
-{
-  double   sum = 0.0, term;    
-  int     i, j;
-
-  double  *x1, *x2;             
-
-
-
-  if(tipX1)
-  {             
-    for (i = 0; i < n; i++)
-    {
-    	PLL_ALIGN_BEGIN double t[2] PLL_ALIGN_END;
-      __m128d termv, x1v, x2v, dv;
-
-      x1 = &(tipVector[4 * tipX1[i]]);   
-      x2 = &x2_start[16 * i];    
-
-
-      termv = _mm_set1_pd(0.0);            
-
-      for(j = 0; j < 4; j++)
-      {
-        x1v = _mm_load_pd(&x1[0]);
-        x2v = _mm_load_pd(&x2[j * 4]);
-        dv   = _mm_load_pd(&diagptable[j * 4]);
-
-        x1v = _mm_mul_pd(x1v, x2v);
-        x1v = _mm_mul_pd(x1v, dv);
-
-        termv = _mm_add_pd(termv, x1v);
-
-        x1v = _mm_load_pd(&x1[2]);
-        x2v = _mm_load_pd(&x2[j * 4 + 2]);
-        dv   = _mm_load_pd(&diagptable[j * 4 + 2]);
-
-        x1v = _mm_mul_pd(x1v, x2v);
-        x1v = _mm_mul_pd(x1v, dv);
-
-        termv = _mm_add_pd(termv, x1v);
-      }
-
-      _mm_store_pd(t, termv);
-
-
-       if(!fastScaling)
-        term = log(0.25 * fabs(t[0] + t[1])) + (ex2[i] * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(0.25 * fabs(t[0] + t[1]));
-
-
-
-      sum += wptr[i] * term;
-    }     
-  }
-  else
-  {        
-    for (i = 0; i < n; i++) 
-    {
-    	PLL_ALIGN_BEGIN double t[2] PLL_ALIGN_END;
-      __m128d termv, x1v, x2v, dv;
-
-
-      x1 = &x1_start[16 * i];
-      x2 = &x2_start[16 * i];             
-
-
-      termv = _mm_set1_pd(0.0);          
-
-      for(j = 0; j < 4; j++)
-      {
-        x1v = _mm_load_pd(&x1[j * 4]);
-        x2v = _mm_load_pd(&x2[j * 4]);
-        dv   = _mm_load_pd(&diagptable[j * 4]);
-
-        x1v = _mm_mul_pd(x1v, x2v);
-        x1v = _mm_mul_pd(x1v, dv);
-
-        termv = _mm_add_pd(termv, x1v);
-
-        x1v = _mm_load_pd(&x1[j * 4 + 2]);
-        x2v = _mm_load_pd(&x2[j * 4 + 2]);
-        dv   = _mm_load_pd(&diagptable[j * 4 + 2]);
-
-        x1v = _mm_mul_pd(x1v, x2v);
-        x1v = _mm_mul_pd(x1v, dv);
-
-        termv = _mm_add_pd(termv, x1v);
-      }
-
-      _mm_store_pd(t, termv);
-
-      if(!fastScaling)
-        term = log(0.25 * fabs(t[0] + t[1])) + ((ex1[i] + ex2[i]) * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(0.25 * fabs(t[0] + t[1]));
-
-
-
-      sum += wptr[i] * term;
-    }                           
-  }
-
-  return sum;
-} 
-
-
-/** @ingroup evaluateLikelihoodGroup
-    @brief Evaluation of log likelihood of a tree using the \b CAT model of rate heterogeneity (Optimized SSE3 version for DNA data)
- 
-    This is the SSE3 optimized version of ::evaluateCAT_FLEX for evaluating the log
-    likelihood at some edge whose two end-points (nodes) have the conditional likelihood
-    vectors \a x1 and \a x2. Please check ::evaluateCAT_FLEX for more information and
-    a description of the common input parameters
-*/
-static double evaluateGTRCAT (const pllBoolean fastScaling, int *ex1, int *ex2, int *cptr, int *wptr,
-                              double *x1_start, double *x2_start, double *tipVector,                  
-                              unsigned char *tipX1, int n, double *diagptable_start)
-{
-  double  sum = 0.0, term;       
-  int     i;
-
-  double  *diagptable, *x1, *x2;                            
-
-  if(tipX1)
-  {           
-    for (i = 0; i < n; i++) 
-    {   
-    	PLL_ALIGN_BEGIN	double t[2] PLL_ALIGN_END;
-      __m128d x1v1, x1v2, x2v1, x2v2, dv1, dv2;
-
-      x1 = &(tipVector[4 * tipX1[i]]);
-      x2 = &x2_start[4 * i];
-
-      diagptable = &diagptable_start[4 * cptr[i]];
-
-
-      x1v1 =  _mm_load_pd(&x1[0]);
-      x1v2 =  _mm_load_pd(&x1[2]);
-      x2v1 =  _mm_load_pd(&x2[0]);
-      x2v2 =  _mm_load_pd(&x2[2]);
-      dv1  =  _mm_load_pd(&diagptable[0]);
-      dv2  =  _mm_load_pd(&diagptable[2]);
-
-      x1v1 = _mm_mul_pd(x1v1, x2v1);
-      x1v1 = _mm_mul_pd(x1v1, dv1);
-
-      x1v2 = _mm_mul_pd(x1v2, x2v2);
-      x1v2 = _mm_mul_pd(x1v2, dv2);
-
-      x1v1 = _mm_add_pd(x1v1, x1v2);
-
-      _mm_store_pd(t, x1v1);
-
-       if(!fastScaling)
-        term = log(fabs(t[0] + t[1])) + (ex2[i] * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(fabs(t[0] + t[1]));
-
-
-      sum += wptr[i] * term;
-    }   
-  }               
-  else
-  {
-    for (i = 0; i < n; i++) 
-    { 
-    	PLL_ALIGN_BEGIN double t[2] PLL_ALIGN_END;
-      __m128d x1v1, x1v2, x2v1, x2v2, dv1, dv2;
-
-      x1 = &x1_start[4 * i];
-      x2 = &x2_start[4 * i];
-
-      diagptable = &diagptable_start[4 * cptr[i]];      
-
-
-      x1v1 =  _mm_load_pd(&x1[0]);
-      x1v2 =  _mm_load_pd(&x1[2]);
-      x2v1 =  _mm_load_pd(&x2[0]);
-      x2v2 =  _mm_load_pd(&x2[2]);
-      dv1  =  _mm_load_pd(&diagptable[0]);
-      dv2  =  _mm_load_pd(&diagptable[2]);
-
-      x1v1 = _mm_mul_pd(x1v1, x2v1);
-      x1v1 = _mm_mul_pd(x1v1, dv1);
-
-      x1v2 = _mm_mul_pd(x1v2, x2v2);
-      x1v2 = _mm_mul_pd(x1v2, dv2);
-
-      x1v1 = _mm_add_pd(x1v1, x1v2);
-
-      _mm_store_pd(t, x1v1);
-
-      if(!fastScaling)
-        term = log(fabs(t[0] + t[1])) + ((ex1[i] + ex2[i]) * log(PLL_MINLIKELIHOOD));
-      else
-        term = log(fabs(t[0] + t[1]));
-
-
-      sum += wptr[i] * term;
-    }    
-  }
-
-  return  sum;         
-} 
-
-
-
-
-
-#endif
diff --git a/pllrepo/src/evaluatePartialGenericSpecial.c b/pllrepo/src/evaluatePartialGenericSpecial.c
deleted file mode 100644
index 4d461a5..0000000
--- a/pllrepo/src/evaluatePartialGenericSpecial.c
+++ /dev/null
@@ -1,1378 +0,0 @@
-/*  RAxML-VI-HPC (version 2.2) a program for sequential and parallel estimation of phylogenetic trees 
- *  Copyright August 2006 by Alexandros Stamatakis
- *
- *  Partially derived from
- *  fastDNAml, a program for estimation of phylogenetic trees from sequences by Gary J. Olsen
- *  
- *  and 
- *
- *  Programs of the PHYLIP package by Joe Felsenstein.
- 
- *  This program is free software; you may redistribute it and/or modify its
- *  under the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- * 
- *
- *  For any other enquiries send an Email to Alexandros Stamatakis
- *  Alexandros.Stamatakis at epfl.ch
- *
- *  When publishing work that is based on the results from RAxML-VI-HPC please cite:
- *
- *  Alexandros Stamatakis:"RAxML-VI-HPC: maximum likelihood-based phylogenetic analyses with thousands of taxa and mixed models". 
- *  Bioinformatics 2006; doi: 10.1093/bioinformatics/btl446
- */
-
-#include "mem_alloc.h"
-
-#ifndef WIN32 
-#include <unistd.h>
-#endif
-
-#include <math.h>
-#include <time.h> 
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <assert.h>
-#include "pll.h"
-#include "pllInternal.h"
-
-#ifdef __SSE3
-#include <xmmintrin.h>
-#include <pmmintrin.h>
-#endif
-
-
-/* optimized implementation for computing per-site log likelihoods under CAT and GAMMA for DNA and protein data */
-
-#if (defined(__SSE3) || defined(__AVX))
-static __inline void computeVectorGTRCATPROT(double *lVector, int *eVector, double ki, int i, double qz, double rz,
-					   traversalInfo *ti, double *EIGN, double *EI, double *EV, double *tipVector, 
-					   unsigned  char **yVector, int mxtips);
-
-static double evaluatePartialGTRCATPROT(int i, double ki, int counter,  traversalInfo *ti, double qz,
-					int w, double *EIGN, double *EI, double *EV,
-					double *tipVector, unsigned char **yVector, 
-					int branchReference, int mxtips);
-
-static __inline void computeVectorGTRGAMMAPROT(double *lVector, int *eVector, double *gammaRates, int i, double qz, double rz,
-					     traversalInfo *ti, double *EIGN, double *EI, double *EV, double *tipVector, 
-					     unsigned  char **yVector, int mxtips);
-
-static double evaluatePartialGTRGAMMAPROT(int i, int counter,  traversalInfo *ti, double qz,
-					  int w, double *EIGN, double *EI, double *EV,
-					  double *tipVector, unsigned char **yVector, 
-					  double *gammaRates,
-					  int branchReference, int mxtips);
-
-static __inline void computeVectorGTRCAT(double *lVector, int *eVector, double ki, int i, double qz, double rz,
-				       traversalInfo *ti, double *EIGN, double *EI, double *EV, double *tipVector, 
-				       unsigned char **yVector, int mxtips);
-
-static double evaluatePartialGTRCAT(int i, double ki, int counter,  traversalInfo *ti, double qz,
-				    int w, double *EIGN, double *EI, double *EV,
-				    double *tipVector, unsigned  char **yVector, 
-				    int branchReference, int mxtips);
-
-static __inline void computeVectorGTRCAT_BINARY(double *lVector, int *eVector, double ki, int i, double qz, double rz,
-					      traversalInfo *ti, double *EIGN, double *EI, double *EV, double *tipVector, 
-					      unsigned char **yVector, int mxtips);
-
-static double evaluatePartialGTRCAT_BINARY(int i, double ki, int counter,  traversalInfo *ti, double qz,
-					   int w, double *EIGN, double *EI, double *EV,
-					   double *tipVector, unsigned  char **yVector, 
-					   int branchReference, int mxtips);
-
-static double evaluatePartialGTRGAMMA(int i, int counter,  traversalInfo *ti, double qz,
-				      int w, double *EIGN, double *EI, double *EV,
-				      double *tipVector, unsigned char **yVector, 
-				      double *gammaRates,
-				      int branchReference, int mxtips);
-#endif
-
-/* the next two functions are generic non-optimized versions of the per-site log likelihood calculations,
-   but only under the CAT model. There are no generic implementations available for GAMMA yet, since 
-   these functions were not needed in RAxML. However there exist optimized functions for GAMMA further below.
-   The only use of the CAT functions was to optimize per-site rates based on their likelihood for the CAT 
-   model of rate heterogeneity. */
-
-
-static __inline void computeVectorCAT_FLEX(double *lVector, int *eVector, double ki, int i, double qz, double rz,
-					 traversalInfo *ti, double *EIGN, double *EI, double *EV, double *tipVector, 
-					 unsigned char **yVector, int mxtips, const int states)
-{      
-  /* allocate some space we need */
- 
-  double  
-    *d1 =    (double *)rax_malloc(sizeof(double) * states), 
-    *d2 =    (double *)rax_malloc(sizeof(double) * states),  
-    *x1px2 = (double *)rax_malloc(sizeof(double) * states), 
-    ump_x1, 
-    ump_x2,    
-    lz1, 
-    lz2,
-    *x1, 
-    *x2, 
-    *x3;
-  
-  int 
-    scale,
-    j, 
-    k,
-    pNumber = ti->pNumber,
-    rNumber = ti->rNumber,
-    qNumber = ti->qNumber;
- 
-  /* 
-     lVector holds the space for computing ancestral probablities on a single column of the tree 
-     hence under CAT we index the current space required to store the parent ancestral probability vector 
-     by multiplying the number of states with the offset in the array given by the inner node number
-   */
-
-  x3  = &lVector[states * (pNumber  - mxtips)];  
- 
-  /* do a case switch to figure out how to index the child nodes x1 and x2,
-     analogous to the standard newview implementation.
-     Note the index i that we use to index the specific tip poistion/index 
-     for which we want to compute the per-site log likelihood */
-
-  switch(ti->tipCase)
-    {
-    case PLL_TIP_TIP:     
-      x1 = &(tipVector[states * yVector[qNumber][i]]);
-      x2 = &(tipVector[states * yVector[rNumber][i]]);    
-      break;
-    case PLL_TIP_INNER:     
-      x1 = &(tipVector[states * yVector[qNumber][i]]);
-      x2 = &(lVector[states * (rNumber - mxtips)]);           
-      break;
-    case PLL_INNER_INNER:            
-      x1 = &(lVector[states * (qNumber - mxtips)]);
-      x2 = &(lVector[states * (rNumber - mxtips)]);     
-      break;
-    default:
-      assert(0);
-    }
-     
-  /* multiply the branch lengths with the evolutionary rate */
-
-  lz1 = qz * ki;  
-  lz2 = rz * ki;
-  
-
-  /* exponentiate the branch lengths using the eigenvalues */
-
-  d1[0] = x1[0];
-  d2[0] = x2[0];
-
-
-  for(j = 1; j < states; j++)
-    {
-      d1[j] = x1[j] * exp(EIGN[j] * lz1);
-      d2[j] = x2[j] * exp(EIGN[j] * lz2);	    
-    }
- 
- 
-  /* now loop over all states */
-
-  for(j = 0; j < states; j++)
-    {         
-      ump_x1 = 0.0;
-      ump_x2 = 0.0;
-
-      for(k = 0; k < states; k++)
-	{
-	  ump_x1 += d1[k] * EI[j * states + k];
-	  ump_x2 += d2[k] * EI[j * states + k];
-	}
-      
-      x1px2[j] = ump_x1 * ump_x2;
-    }
-  
-  for(j = 0; j < states; j++)
-    x3[j] = 0.0;
-
-  /* multiply the result of looping over all states with the eigenvector matrix EV */
-
-  for(j = 0; j < states; j++)          
-    for(k = 0; k < states; k++)	
-      x3[k] +=  x1px2[j] *  EV[states * j + k];	   
-      
-  /* now determine if we need to scale the #states entries in x[3] to avoid 
-     numerical underflow. */
-     
-
-  scale = 1;
-  for(j = 0; scale && (j < states); j++)
-    scale = ((x3[j] < PLL_MINLIKELIHOOD) && (x3[j] > PLL_MINUSMINLIKELIHOOD));
-  
-  /* if we need to scale, we multiply all probabilities of the site with 2^256 
-     and increment the scaling counter by 1. 
-     The counter eVector is used for tracking/counting the number of scaling events 
-     at the site i for which we are computing the per-site log likelihood such that 
-     we can "undo" the scaling multiplications when we compute the log likelihood of the site 
-     at the virtual root */
-  
-  if(scale)
-    {
-      for(j = 0; j < states; j++)
-	x3[j] *= PLL_TWOTOTHE256;       
-      *eVector = *eVector + 1;
-    }	              
-
-  rax_free(d1);
-  rax_free(d2);
-  rax_free(x1px2);
-       
-  return;
-}
-
-
-/* the following function computes the per-site log likelihood of a given site i at the virtual root of the tree.
-   as input it takes the indeix i, of the site, the evolutionary rate ki (for computing Q^(rt) where r = ki) 
-   the traversalDescriptor defining the full tree traversal (felsenstein pruning algo) 
-   the branch length at the root qz, the weigth of the site pattern w, i.e., how many identical sites have been compressed 
-   into the current site pattern, the eigenvalues etc (EIGN, EI, EV) associated to the Eigenvector/Eigenvalue decomposition 
-   of the given instataneous substitution matrix Q, the tipVector lookup table for obtaining tip probability vectors, 
-   a pointer to the raw sequence data at the tips, a branch index (to get the correct branch length/index into the correct branch 
-   if -M is used, i.e., a per-partition branch length estimate is deployed, and finally the maximum number of tips in the comprehensive tree 
-   as well as the number of states in the current model. */
-
-#if (!defined(__SSE3) && !defined(__AVX))
-static double evaluatePartialCAT_FLEX(int i, double ki, int counter,  traversalInfo *ti, double qz,
-				      int w, double *EIGN, double *EI, double *EV,
-				      double *tipVector, unsigned  char **yVector, 
-				      int branchReference, int mxtips, const int states)
-{
-  int 
-    scale = 0, 
-    k;
-  
-  double 
-    /* lVector is a temporary buffer to store the ancestral probability vactors of 
-       a single site, thus we allocate states * mxtips space for storing probability values.
-       Essentially  only (states * (mxtips - 2)) space would be required, but I was to lazy 
-       to think if it has to be -1 or -2 here */
-    * lVector = NULL,   
-    * d = NULL,
-    lz, 
-    term, 
-    *x1, 
-    *x2; 
-
-  
-
-  traversalInfo 
-    *trav = &ti[0];
- 
-  rax_posix_memalign ((void **)&lVector, PLL_BYTE_ALIGNMENT, sizeof(double) * states * mxtips);
-  rax_posix_memalign ((void **)&d,       PLL_BYTE_ALIGNMENT, sizeof(double) * states);
-  /* make sure that at one end of the branch into which we have placed the virtual root 
-     there actually is a tip!*/
-
-  assert(isTip(trav->pNumber, mxtips));
-     
-  /* for the tip we alread have the data, so just set the left probability vector to the 
-     corresponding address in the pre-computed tipVector[] lookup table */
-
-  x1 = &(tipVector[states *  yVector[trav->pNumber][i]]);   
-
-  /* now iterate over the traversal descriptor that contains the nodes of the tree in the order required 
-     by the Felsenstein pruning algorithm */
-
-  for(k = 1; k < counter; k++)    
-    {
-      /* obtain the branch lengths and take the logarithms */
-      
-      double 
-	qz = ti[k].qz[branchReference],
-	rz = ti[k].rz[branchReference];
-      
-      qz = (qz > PLL_ZMIN) ? log(qz) : log(PLL_ZMIN);
-      rz = (rz > PLL_ZMIN) ? log(rz) : log(PLL_ZMIN);
-
-      /* invoke essentially a newview() for one site on the entry k of the traversal descriptor.
-	 counter should always correspond to the number of inner nodes in the tree for which we need
-	 to compute ancestral probability values */
-
-      computeVectorCAT_FLEX(lVector, &scale, ki, i, qz, rz, &ti[k], 
-			    EIGN, EI, EV, 
-			    tipVector, yVector, mxtips, states);       
-    }
-   
-  /* now the ancestral probability values for site i at the node to the right of the virtual root 
-     are available and correctly computed, such that we can set the pointer to the right vector x2
-     to the corresponding entry */
-
-  x2 = &lVector[states * (trav->qNumber - mxtips)]; 
-
-  /* a paranoic assertion */
-
-  assert(0 <=  (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips);  
- 
-  /* now just compute the log likelihood score of this site */
-      
-  if(qz < PLL_ZMIN) 
-    lz = PLL_ZMIN;
-  lz  = log(qz); 
-  lz *= ki;  
-  
-  d[0] = 1.0; 
-
-  for(k = 1; k < states; k++)
-    d[k] = exp (EIGN[k] * lz);
-  
-  term = 0.0;
-
-  for(k = 0; k < states; k++) 
-    term += x1[k] * x2[k] * d[k];       
-
-  /* note the "scale * log(PLL_MINLIKELIHOOD)" term here which we use to undo/revert the scaling multiplications 
-     such that we obtain a correct log likelihood score. The integer variable scale, contains the number of times 
-     we had to scale (multiply by 2^256) for site i only during a full tree traversal using Felsenstein's algorithm */
-
-  term = log(fabs(term)) + (scale * log(PLL_MINLIKELIHOOD));   
-
-  /* multiply with the site pattern weight (site pattern compression factor */
-
-  term = term * w;
-
-  /* free the memory space used for likelihood computations on this site */
-
-  rax_free(lVector);  
-  rax_free(d);
-
-  return  term;
-}
-#endif
-
-/* this is the top-level function that can be called from other parts of the code.
-   As input it takes the tree data structure, the site index, the evolutionary rate ki, 
-   and the model index (partition index. It will return the 
-   log likelihood of site i. 
-   An important pre-condition is that the tree traversal descriptor must contain 
-   a full tree traversal starting at a tip !
-
-   Note that, if you wamt to obtain per-site log likes for other altered model parameters such 
-   as the Q matrix, you will have do re-invoke the eigenvalue/eigenvector decomposition prior 
-   to calling the function below.
-*/
-
-double evaluatePartialGeneric (pllInstance *tr, partitionList *pr, int i, double ki, int _model)
-{
-  double 
-    result;
-  
-  
-  int     
-    branchReference,
-
-    /* number of states of the data type in this partition */
-    states = pr->partitionData[_model]->states;
-    
-  /* SOS ATTENTION: note the different indexing used for the parallel and sequential versions ! */
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-  int index = i; 
-#else
-  int index = i - pr->partitionData[_model]->lower;
-#endif
-  
-  /* here we figure out if all partitions are linked via the same branch length, that is,
-     if we are conducting a joint branch length estimate or a per-partition branch length estimate */
-
-  if(pr->perGeneBranchLengths && pr->numberOfPartitions>1)
-    branchReference = _model;
-  else
-    branchReference = 0;
-
-  /* for the generic function implementation we only offer the CAT implementation for computing/optimizing per-site evolutionary rates */
-
-#if (!defined(__SSE3) && !defined(__AVX))
-  if(tr->rateHetModel == PLL_CAT)
-    result = evaluatePartialCAT_FLEX(index, ki, tr->td[0].count, tr->td[0].ti, tr->td[0].ti[0].qz[branchReference], 
-				     pr->partitionData[_model]->wgt[index],
-				     pr->partitionData[_model]->EIGN,
-				     pr->partitionData[_model]->EI,
-				     pr->partitionData[_model]->EV,
-				     pr->partitionData[_model]->tipVector,
-				     pr->partitionData[_model]->yVector, branchReference, tr->mxtips, states);
-  else
-    /* 
-       the per-site site likelihood function should only be called for the CAT model
-       under the GAMMA model this is required only for estimating per-site protein models 
-       which has however been removed in this version of the code
-    */
-    assert(0); 
-  
- 
-#else
-  /* switch over the number of states of the data in the current model/partition */
-  switch(states)
-    {
-    case 2:   /* BINARY */
-      assert(!tr->saveMemory);
-      assert(tr->rateHetModel == PLL_CAT);
-
-      result = evaluatePartialGTRCAT_BINARY(index, ki, tr->td[0].count, tr->td[0].ti, 
-                                            tr->td[0].ti[0].qz[branchReference],
-                                            pr->partitionData[_model]->wgt[index],
-                                            pr->partitionData[_model]->EIGN,
-                                            pr->partitionData[_model]->EI,
-                                            pr->partitionData[_model]->EV,
-                                            pr->partitionData[_model]->tipVector,
-                                            pr->partitionData[_model]->yVector, 
-                                            branchReference, 
-                                            tr->mxtips);
-      break;
-      
-    case 4:   /* DNA */
-      /* switch over CAT versus GAMMA and pass all model parameters for the respective partition to the respective functions */
-      if(tr->rateHetModel == PLL_CAT)      
-	result = evaluatePartialGTRCAT(index, ki, tr->td[0].count, tr->td[0].ti, tr->td[0].ti[0].qz[branchReference], 
-				       pr->partitionData[_model]->wgt[index],
-				       pr->partitionData[_model]->EIGN,
-				       pr->partitionData[_model]->EI,
-				       pr->partitionData[_model]->EV,
-				       pr->partitionData[_model]->tipVector,
-				       pr->partitionData[_model]->yVector, branchReference, tr->mxtips);
-      else	
-	result = evaluatePartialGTRGAMMA(index, tr->td[0].count, tr->td[0].ti, tr->td[0].ti[0].qz[branchReference], 
-					 pr->partitionData[_model]->wgt[index],
-					 pr->partitionData[_model]->EIGN,
-					 pr->partitionData[_model]->EI,
-					 pr->partitionData[_model]->EV,
-					 pr->partitionData[_model]->tipVector,
-					 pr->partitionData[_model]->yVector,
-					 pr->partitionData[_model]->gammaRates,
-					 branchReference, tr->mxtips);	
-	
-      break;
-    case 20: /* proteins */     
-      if(tr->rateHetModel == PLL_CAT)
-	result = evaluatePartialGTRCATPROT(index, ki, tr->td[0].count, tr->td[0].ti, tr->td[0].ti[0].qz[branchReference], 
-					   pr->partitionData[_model]->wgt[index],
-					   pr->partitionData[_model]->EIGN,
-					   pr->partitionData[_model]->EI,
-					   pr->partitionData[_model]->EV,
-					   pr->partitionData[_model]->tipVector,
-					   pr->partitionData[_model]->yVector, branchReference, tr->mxtips);
-      else
-	result =  evaluatePartialGTRGAMMAPROT(index, tr->td[0].count, tr->td[0].ti, tr->td[0].ti[0].qz[branchReference], 
-					      pr->partitionData[_model]->wgt[index],
-					      pr->partitionData[_model]->EIGN,
-					      pr->partitionData[_model]->EI,
-					      pr->partitionData[_model]->EV,
-					      pr->partitionData[_model]->tipVector,
-					      pr->partitionData[_model]->yVector,
-					      pr->partitionData[_model]->gammaRates,
-					      branchReference, tr->mxtips);
-      break;   
-    default:
-      assert(0);
-    }
-  #endif
- 
-
-  return result;
-}
-
-#if (defined(__SSE3) || defined(__AVX))
-/* optimized function implementations for computing per-site log likelihoods under CAT and GAMMA for protein and 
-   DNA data. 
-   The structure is analoguous as above with some data- and model-specific optimizations and vectorizations.
-*/
-
-static __inline void computeVectorGTRCAT_BINARY(double *lVector, int *eVector, double ki, int i, double qz, double rz,
-					      traversalInfo *ti, double *EIGN, double *EI, double *EV, double *tipVector, 
-					      unsigned char **yVector, int mxtips)
-{       
-  double  d1, d2,  ump_x1, ump_x2, x1px2[2], lz1, lz2; 
-  double *x1, *x2, *x3;
-  int 
-    j, k,
-    pNumber = ti->pNumber,
-    rNumber = ti->rNumber,
-    qNumber = ti->qNumber;
- 
-  x3  = &lVector[2 * (pNumber  - mxtips)];  
-
-  switch(ti->tipCase)
-    {
-    case PLL_TIP_TIP:     
-      x1 = &(tipVector[2 * yVector[qNumber][i]]);
-      x2 = &(tipVector[2 * yVector[rNumber][i]]);   
-      break;
-    case PLL_TIP_INNER:     
-      x1 = &(tipVector[2 * yVector[qNumber][i]]);
-      x2 = &lVector[2 * (rNumber - mxtips)];                    
-      break;
-    case PLL_INNER_INNER:            
-      x1 = &lVector[2 * (qNumber - mxtips)];
-      x2 = &lVector[2 * (rNumber - mxtips)];               
-      break;
-    default:
-      assert(0);
-    }
-     
-  lz1 = qz * ki;  
-  lz2 = rz * ki;
-  
- 
-  d1 = x1[1] * exp(EIGN[1] * lz1);
-  d2 = x2[1] * exp(EIGN[1] * lz2);	        
- 
-  for(j = 0; j < 2; j++)
-    {     
-      ump_x1 = x1[0];
-      ump_x2 = x2[0];
-      
-      ump_x1 += d1 * EI[j * 2 + 1];
-      ump_x2 += d2 * EI[j * 2 + 1];
-	
-      x1px2[j] = ump_x1 * ump_x2;
-    }
-  
-  for(j = 0; j < 2; j++)
-    x3[j] = 0.0;
-
-  for(j = 0; j < 2; j++)          
-    for(k = 0; k < 2; k++)	
-      x3[k] +=  x1px2[j] *  EV[2 * j + k];	   
-      
-  
-  if (x3[0] < PLL_MINLIKELIHOOD && x3[0] > PLL_MINUSMINLIKELIHOOD &&
-      x3[1] < PLL_MINLIKELIHOOD && x3[1] > PLL_MINUSMINLIKELIHOOD 
-      )
-    {	     
-      x3[0]   *= PLL_TWOTOTHE256;
-      x3[1]   *= PLL_TWOTOTHE256;     
-      *eVector = *eVector + 1;
-    }	              
-
-  return;
-}
-
-static double evaluatePartialGTRCAT_BINARY(int i, double ki, int counter,  traversalInfo *ti, double qz,
-					   int w, double *EIGN, double *EI, double *EV,
-					   double *tipVector, unsigned  char **yVector, 
-					   int branchReference, int mxtips)
-{
-  double lz, term;       
-  double  d;
-  double   *x1, *x2; 
-  int scale = 0, k;
-  double *lVector = (double *)malloc(sizeof(double) * 2 * mxtips);  
-  traversalInfo *trav = &ti[0];
- 
-  assert(isTip(trav->pNumber, mxtips));
-     
-  x1 = &(tipVector[2 *  yVector[trav->pNumber][i]]);   
-
-  for(k = 1; k < counter; k++)  
-    {
-      double 
-	qz = ti[k].qz[branchReference],
-	rz = ti[k].rz[branchReference];
-      
-      qz = (qz > PLL_ZMIN) ? log(qz) : log(PLL_ZMIN);
-      rz = (rz > PLL_ZMIN) ? log(rz) : log(PLL_ZMIN);
-
-      computeVectorGTRCAT_BINARY(lVector, &scale, ki, i, qz, rz, &ti[k], 
-				 EIGN, EI, EV, 
-				 tipVector, yVector, mxtips);       
-    }
-   
-  x2 = &lVector[2 * (trav->qNumber - mxtips)];
-     
-  assert(0 <=  (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips);  
-       
-  if(qz < PLL_ZMIN) 
-    lz = PLL_ZMIN;
-  lz  = log(qz); 
-  lz *= ki;  
-  
-  d = exp(EIGN[1] * lz);
-  
-  term =  x1[0] * x2[0];
-  term += x1[1] * x2[1] * d; 
-
-  term = log(fabs(term)) + (scale * log(PLL_MINLIKELIHOOD));   
-
-  term = term * w;
-
-  free(lVector);
-  
-  return  term;
-}
-
-
-
-static __inline void computeVectorGTRGAMMAPROT(double *lVector, int *eVector, double *gammaRates, int i, double qz, double rz,
-					     traversalInfo *ti, double *EIGN, double *EI, double *EV, double *tipVector, 
-					     unsigned  char **yVector, int mxtips)
-{       
-  double   
-    *x1, 
-    *x2, 
-    *x3;  
-  
-  int
-    s,
-    pNumber = ti->pNumber,
-    rNumber = ti->rNumber,
-    qNumber = ti->qNumber,
-    index1[4],
-    index2[4];
-  
- 
-  x3  = &(lVector[80 * (pNumber  - mxtips)]);     
-
-  switch(ti->tipCase)
-    {
-    case PLL_TIP_TIP:    
-      x1 = &(tipVector[20 * yVector[qNumber][i]]);
-      x2 = &(tipVector[20 * yVector[rNumber][i]]);     
-      for(s = 0; s < 4; s++)
-	{
-	  index1[s] = 0;
-	  index2[s] = 0;
-	}
-      break;
-    case PLL_TIP_INNER:     
-      x1 = &(tipVector[20 * yVector[qNumber][i]]);
-      x2 = &(  lVector[80 * (rNumber - mxtips)]);   
-      for(s = 0; s < 4; s++)       
-	index1[s] = 0;
-      for(s = 0; s < 4; s++)     
-	index2[s] = s;                     
-      break;
-    case PLL_INNER_INNER:            
-      x1 = &(lVector[80 * (qNumber - mxtips)]);
-      x2 = &(lVector[80 * (rNumber - mxtips)]); 
-      for(s = 0; s < 4; s++)
-	{
-	  index1[s] = s;
-	  index2[s] = s;
-	}                
-      break;    
-    default:
-      assert(0);
-    }
-     
-  {
-	  PLL_ALIGN_BEGIN double
-		  e1[20] PLL_ALIGN_END,
-		  e2[20] PLL_ALIGN_END,
-		  d1[20] PLL_ALIGN_END,
-		  d2[20] PLL_ALIGN_END;
-    double  
-      lz1, lz2;  
-    int 
-      l, 
-      k, 
-      scale, 
-      j;
-     
-    for(j = 0; j < 4; j++)
-      {
-	lz1 = qz * gammaRates[j];            
-	lz2 = rz * gammaRates[j];        
-
-	e1[0] = 1.0;
-	e2[0] = 1.0;
-    
-	for(l = 1; l < 20; l++)
-	  {
-	    e1[l] = exp(EIGN[l] * lz1);
-	    e2[l] = exp(EIGN[l] * lz2);
-	  }
-
-	for(l = 0; l < 20; l+=2)
-	  {
-	    __m128d d1v = _mm_mul_pd(_mm_load_pd(&x1[20 * index1[j] + l]), _mm_load_pd(&e1[l]));
-	    __m128d d2v = _mm_mul_pd(_mm_load_pd(&x2[20 * index2[j] + l]), _mm_load_pd(&e2[l]));
-	    
-	    _mm_store_pd(&d1[l], d1v);
-	    _mm_store_pd(&d2[l], d2v);	
-	  }
-
-	__m128d zero = _mm_setzero_pd();
-
-	for(l = 0; l < 20; l+=2)
-	  _mm_store_pd(&x3[j * 20 + l], zero);
-                
-	for(l = 0; l < 20; l++)
-	  { 	      
-	    double *ev = &EV[l * 20];
-	    __m128d ump_x1v = _mm_setzero_pd();
-	    __m128d ump_x2v = _mm_setzero_pd();
-	    __m128d x1px2v;
-	    
-	    for(k = 0; k < 20; k+=2)
-	      {       
-		__m128d eiv = _mm_load_pd(&EI[20 * l + k]);
-		__m128d d1v = _mm_load_pd(&d1[k]);
-		__m128d d2v = _mm_load_pd(&d2[k]);
-		ump_x1v = _mm_add_pd(ump_x1v, _mm_mul_pd(d1v, eiv));
-		ump_x2v = _mm_add_pd(ump_x2v, _mm_mul_pd(d2v, eiv));	  
-	      }
-
-	    ump_x1v = _mm_hadd_pd(ump_x1v, ump_x1v);
-	    ump_x2v = _mm_hadd_pd(ump_x2v, ump_x2v);
-
-	    x1px2v = _mm_mul_pd(ump_x1v, ump_x2v);
-
-	    for(k = 0; k < 20; k+=2)
-	      {
-		__m128d ex3v = _mm_load_pd(&x3[j * 20 + k]);
-		__m128d EVV  = _mm_load_pd(&ev[k]);
-		ex3v = _mm_add_pd(ex3v, _mm_mul_pd(x1px2v, EVV));
-		
-		_mm_store_pd(&x3[j * 20 + k], ex3v);	   	   
-	      }
-	  }        
-      }
-    
-    scale = 1;
-    for(l = 0; scale && (l < 80); l++)
-      scale = ((x3[l] < PLL_MINLIKELIHOOD) && (x3[l] > PLL_MINUSMINLIKELIHOOD));	       	      	      	       	       
-    
-    if(scale)
-      {	      
-	__m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-
-	for(l = 0; l < 80; l+=2)
-	  {
-	    __m128d ex3v = _mm_mul_pd(_mm_load_pd(&x3[l]),twoto);
-	    _mm_store_pd(&x3[l], ex3v);	
-	  }
-
-	*eVector = *eVector + 1;
-      }
-    
-    return;      
-  }
-}
-
-static  void computeVectorGTRGAMMA(double *lVector, int *eVector, double *gammaRates, int i, double qz, double rz,
-					 traversalInfo *ti, double *EIGN, double *EI, double *EV, double *tipVector, 
-					 unsigned  char **yVector, int mxtips)
-{       
-  double   
-    *x1, 
-    *x2, 
-    *x3;   
-
-  int
-    s,
-    pNumber = ti->pNumber,
-    rNumber = ti->rNumber,
-    qNumber = ti->qNumber,
-    index1[4],
-    index2[4];
-  
- 
-  x3  = &(lVector[16 * (pNumber  - mxtips)]);     
-
-  switch(ti->tipCase)
-    {
-    case PLL_TIP_TIP:          
-      x1 = &(tipVector[4 * yVector[qNumber][i]]);
-      x2 = &(tipVector[4 * yVector[rNumber][i]]);     
-      
-      for(s = 0; s < 4; s++)
-	{
-	  index1[s] = 0;
-	  index2[s] = 0;
-	}
-      break;
-    case PLL_TIP_INNER:     
-      x1 = &(tipVector[4 * yVector[qNumber][i]]);
-      x2 = &(lVector[16 * (rNumber - mxtips)]);   
-      for(s = 0; s < 4; s++)       
-	{
-	  index1[s] = 0;      
-	  index2[s] = s;  
-	}
-      break;
-    case PLL_INNER_INNER:            
-      x1 = &(lVector[16 * (qNumber - mxtips)]);
-      x2 = &(lVector[16 * (rNumber - mxtips)]);       
-      for(s = 0; s < 4; s++)
-	{
-	  index1[s] = s;
-	  index2[s] = s;
-	}                
-      break;    
-    default:
-      assert(0);
-    }
-     
-  {
-	  PLL_ALIGN_BEGIN double
-		  e1[20] PLL_ALIGN_END,
-		  e2[20] PLL_ALIGN_END,
-		  d1[20] PLL_ALIGN_END,
-		  d2[20] PLL_ALIGN_END;
-    double  
-      lz1, lz2;  
-    
-    int 
-      l, 
-      k, 
-      scale, 
-      j;
-     
-    for(j = 0; j < 4; j++)
-      {
-	lz1 = qz * gammaRates[j];            
-	lz2 = rz * gammaRates[j];        
-
-	e1[0] = 1.0;
-	e2[0] = 1.0;
-    
-	for(l = 1; l < 4; l++)
-	  {
-	    e1[l] = exp(EIGN[l] * lz1);
-	    e2[l] = exp(EIGN[l] * lz2);
-	  }
-
-	for(l = 0; l < 4; l+=2)
-	  {
-	    __m128d d1v = _mm_mul_pd(_mm_load_pd(&x1[4 * index1[j] + l]), _mm_load_pd(&e1[l]));
-	    __m128d d2v = _mm_mul_pd(_mm_load_pd(&x2[4 * index2[j] + l]), _mm_load_pd(&e2[l]));
-	    
-	    _mm_store_pd(&d1[l], d1v);
-	    _mm_store_pd(&d2[l], d2v);	
-	  }
-
-	__m128d zero = _mm_setzero_pd();
-
-	for(l = 0; l < 4; l+=2)
-	  _mm_store_pd(&x3[j * 4 + l], zero);
-                
-	for(l = 0; l < 4; l++)
-	  { 	      
-	    double *ev = &EV[l * 4];
-	    __m128d ump_x1v = _mm_setzero_pd();
-	    __m128d ump_x2v = _mm_setzero_pd();
-	    __m128d x1px2v;
-	    
-	    for(k = 0; k < 4; k+=2)
-	      {       
-		__m128d eiv = _mm_load_pd(&EI[4 * l + k]);
-		__m128d d1v = _mm_load_pd(&d1[k]);
-		__m128d d2v = _mm_load_pd(&d2[k]);
-		ump_x1v = _mm_add_pd(ump_x1v, _mm_mul_pd(d1v, eiv));
-		ump_x2v = _mm_add_pd(ump_x2v, _mm_mul_pd(d2v, eiv));	  
-	      }
-
-	    ump_x1v = _mm_hadd_pd(ump_x1v, ump_x1v);
-	    ump_x2v = _mm_hadd_pd(ump_x2v, ump_x2v);
-
-	    x1px2v = _mm_mul_pd(ump_x1v, ump_x2v);
-
-	    for(k = 0; k < 4; k+=2)
-	      {
-		__m128d ex3v = _mm_load_pd(&x3[j * 4 + k]);
-		__m128d EVV  = _mm_load_pd(&ev[k]);
-		ex3v = _mm_add_pd(ex3v, _mm_mul_pd(x1px2v, EVV));
-		
-		_mm_store_pd(&x3[j * 4 + k], ex3v);	   	   
-	      }
-	  }        
-      }
-    
-  
-    scale = 1;
-    for(l = 0; scale && (l < 16); l++)
-      scale = (PLL_ABS(x3[l]) < PLL_MINLIKELIHOOD);	       	      	      	       	       
-    
-    if(scale)
-      {	      
-	__m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-	
-	for(l = 0; l < 16; l+=2)
-	  {
-	    __m128d ex3v = _mm_mul_pd(_mm_load_pd(&x3[l]),twoto);
-	    _mm_store_pd(&x3[l], ex3v);	
-	  }
-	
-	*eVector = *eVector + 1;
-      }  
-    
-    return;      
-  }
-}
-
-
-static double evaluatePartialGTRGAMMAPROT(int i, int counter,  traversalInfo *ti, double qz,
-					  int w, double *EIGN, double *EI, double *EV,
-					  double *tipVector, unsigned char **yVector, 
-					  double *gammaRates,
-					  int branchReference, int mxtips)
-{
-  double lz, term;       
-  double  d[80];
-  double   *x1, *x2; 
-  int scale = 0, k, l, j;
-
-  double 
-	  *lVector = NULL;
-  PLL_ALIGN_BEGIN double
-	  myEI[400]  PLL_ALIGN_END;
-
-  traversalInfo 
-    *trav = &ti[0];
-
-  rax_posix_memalign ((void **)&lVector, PLL_BYTE_ALIGNMENT, sizeof(double) * 80 * mxtips);
-
-  for(k = 0; k < 20; k++)
-    {         
-      for(l = 0; l < 20; l++)
-	myEI[k * 20 + l] = EI[k * 20 + l];
-    }
-
-  assert(isTip(trav->pNumber, mxtips));
-     
-  x1 = &(tipVector[20 *  yVector[trav->pNumber][i]]);   
-
-  for(k = 1; k < counter; k++)                
-    {
-      double 
-	qz = ti[k].qz[branchReference],
-	rz = ti[k].rz[branchReference];
-      
-      qz = (qz > PLL_ZMIN) ? log(qz) : log(PLL_ZMIN);
-      rz = (rz > PLL_ZMIN) ? log(rz) : log(PLL_ZMIN);
-
-      computeVectorGTRGAMMAPROT(lVector, &scale, gammaRates, i, qz, rz, 
-				&ti[k], EIGN, myEI, EV, 
-				tipVector, yVector, mxtips);
-    }
-   
-  x2 = &lVector[80 * (trav->qNumber - mxtips)];       
-
-  assert(0 <=  (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips);  
-  
-  lz = qz;
-
-  if(qz < PLL_ZMIN) 
-    lz = PLL_ZMIN;
-  lz  = log(qz);
-  
-  
-  
-  for(j = 0; j < 4; j++)
-    {
-      d[20 * j] = 1.0;
-      for(l = 1; l < 20; l++)
-	d[20 * j + l] = exp(EIGN[l] * lz * gammaRates[j]);
-    }
-
- 
-  for(j = 0, term = 0.0; j < 4; j++)
-    {
-      for(l = 0; l < 20; l++)
-	term += x1[l] * x2[20 * j + l] * d[j * 20 + l];	      
-    }
-  
-  term = log(0.25 * fabs(term)) + (scale * log(PLL_MINLIKELIHOOD));   
-
-  term = term * w;
-
- rax_free(lVector);
-  
- 
-  return  term;
-}
-
-static double evaluatePartialGTRGAMMA(int i, int counter,  traversalInfo *ti, double qz,
-				      int w, double *EIGN, double *EI, double *EV,
-				      double *tipVector, unsigned char **yVector, 
-				      double *gammaRates,
-				      int branchReference, int mxtips)
-{
-  double lz, term;       
-  double  d[16];
-  double   *x1, *x2; 
-  int scale = 0, k, l, j;
-  double 
-	  *lVector = NULL;
-  PLL_ALIGN_BEGIN double
-	  myEI[16]  PLL_ALIGN_END;
-
-
-  traversalInfo 
-    *trav = &ti[0];
-
-  rax_posix_memalign ((void **)&lVector, PLL_BYTE_ALIGNMENT, sizeof(double) * 16 * mxtips);
-
-  for(k = 0; k < 4; k++)
-    {           
-      for(l = 0; l < 4; l++)
-	myEI[k * 4 + l] = EI[k * 4 + l];
-    }
-
-  assert(isTip(trav->pNumber, mxtips));
-     
-  x1 = &(tipVector[4 *  yVector[trav->pNumber][i]]);   
-
-  for(k = 1; k < counter; k++)                
-    {
-      double 
-	qz = ti[k].qz[branchReference],
-	rz = ti[k].rz[branchReference];
-      
-      qz = (qz > PLL_ZMIN) ? log(qz) : log(PLL_ZMIN);
-      rz = (rz > PLL_ZMIN) ? log(rz) : log(PLL_ZMIN);
-
-      computeVectorGTRGAMMA(lVector, &scale, gammaRates, i, qz, rz, 
-				&ti[k], EIGN, myEI, EV, 
-				tipVector, yVector, mxtips);
-    }
-   
-  x2 = &lVector[16 * (trav->qNumber - mxtips)];       
-
-  assert(0 <=  (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips);  
-  
-  if(qz < PLL_ZMIN) 
-    lz = PLL_ZMIN;
-  lz  = log(qz); 
-  
-  for(j = 0; j < 4; j++)
-    {
-      d[4 * j] = 1.0;
-      for(l = 1; l < 4; l++)
-	d[4 * j + l] = exp(EIGN[l] * lz * gammaRates[j]);
-    }
-
- 
-  for(j = 0, term = 0.0; j < 4; j++)
-    {
-      for(l = 0; l < 4; l++)
-	term += x1[l] * x2[4 * j + l] * d[j * 4 + l];	      
-    }
-
-  term = log(0.25 * fabs(term)) + (scale * log(PLL_MINLIKELIHOOD));   
-
-  term = term * w;
-
-  rax_free(lVector);
-  
-  
-  return  term;
-}
-
-
-
-
-static __inline void computeVectorGTRCAT(double *lVector, int *eVector, double ki, int i, double qz, double rz,
-				       traversalInfo *ti, double *EIGN, double *EI, double *EV, double *tipVector, 
-				       unsigned char **yVector, int mxtips)
-{       
-  double  d1[3], d2[3],  ump_x1, ump_x2, x1px2[4], lz1, lz2; 
-  double *x1, *x2, *x3;
-  int j, k,
-    pNumber = ti->pNumber,
-    rNumber = ti->rNumber,
-    qNumber = ti->qNumber;
- 
-  x3  = &lVector[4 * (pNumber  - mxtips)];  
- 
-
-  switch(ti->tipCase)
-    {
-    case PLL_TIP_TIP:     
-      x1 = &(tipVector[4 * yVector[qNumber][i]]);
-      x2 = &(tipVector[4 * yVector[rNumber][i]]);    
-      break;
-    case PLL_TIP_INNER:     
-      x1 = &(tipVector[4 * yVector[qNumber][i]]);
-      x2 = &lVector[4 * (rNumber - mxtips)];           
-      break;
-    case PLL_INNER_INNER:            
-      x1 = &lVector[4 * (qNumber - mxtips)];
-      x2 = &lVector[4 * (rNumber - mxtips)];     
-      break;
-    default:
-      assert(0);
-    }
-     
-  lz1 = qz * ki;  
-  lz2 = rz * ki;
-  
-  for(j = 0; j < 3; j++)
-    {
-      d1[j] = 
-	x1[j + 1] * 
-	exp(EIGN[j + 1] * lz1);
-      d2[j] = x2[j + 1] * exp(EIGN[j + 1] * lz2);	    
-    }
- 
- 
-  for(j = 0; j < 4; j++)
-    {     
-      ump_x1 = x1[0];
-      ump_x2 = x2[0];
-      for(k = 0; k < 3; k++)
-	{
-	  ump_x1 += d1[k] * EI[j * 4 + k + 1];
-	  ump_x2 += d2[k] * EI[j * 4 + k + 1];
-	}
-      x1px2[j] = ump_x1 * ump_x2;
-    }
-  
-  for(j = 0; j < 4; j++)
-    x3[j] = 0.0;
-
-  for(j = 0; j < 4; j++)          
-    for(k = 0; k < 4; k++)	
-      x3[k] +=  x1px2[j] *  EV[4 * j + k];	   
-      
-  
-  if (x3[0] < PLL_MINLIKELIHOOD && x3[0] > PLL_MINUSMINLIKELIHOOD &&
-      x3[1] < PLL_MINLIKELIHOOD && x3[1] > PLL_MINUSMINLIKELIHOOD &&
-      x3[2] < PLL_MINLIKELIHOOD && x3[2] > PLL_MINUSMINLIKELIHOOD &&
-      x3[3] < PLL_MINLIKELIHOOD && x3[3] > PLL_MINUSMINLIKELIHOOD)
-    {	     
-      x3[0]   *= PLL_TWOTOTHE256;
-      x3[1]   *= PLL_TWOTOTHE256;
-      x3[2]   *= PLL_TWOTOTHE256;     
-      x3[3]   *= PLL_TWOTOTHE256;     
-      *eVector = *eVector + 1;
-    }	              
-
-  return;
-}
-
-
-
-
-
-
-
-
-static double evaluatePartialGTRCAT(int i, double ki, int counter,  traversalInfo *ti, double qz,
-				    int w, double *EIGN, double *EI, double *EV,
-				    double *tipVector, unsigned  char **yVector, 
-				    int branchReference, int mxtips)
-{
-  double lz, term;       
-  double  d[3];
-  double   *x1, *x2, *lVector = NULL; 
-  int scale = 0, k;
-  traversalInfo *trav = &ti[0];
- 
-  rax_posix_memalign ((void **) &lVector, PLL_BYTE_ALIGNMENT, sizeof(double) * 4 * mxtips);    
-
-  assert(isTip(trav->pNumber, mxtips));
-     
-  x1 = &(tipVector[4 *  yVector[trav->pNumber][i]]);   
-
-  for(k = 1; k < counter; k++)    
-    {
-      double 
-	qz = ti[k].qz[branchReference],
-	rz = ti[k].rz[branchReference];
-      
-      qz = (qz > PLL_ZMIN) ? log(qz) : log(PLL_ZMIN);
-      rz = (rz > PLL_ZMIN) ? log(rz) : log(PLL_ZMIN);
-
-      computeVectorGTRCAT(lVector, &scale, ki, i, qz, rz, &ti[k], 
-			  EIGN, EI, EV, 
-			  tipVector, yVector, mxtips);       
-    }
-   
-  x2 = &lVector[4 * (trav->qNumber - mxtips)]; 
-
-  assert(0 <=  (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips);  
-       
-  if(qz < PLL_ZMIN) 
-    lz = PLL_ZMIN;
-  lz  = log(qz); 
-  lz *= ki;  
-  
-  d[0] = exp (EIGN[1] * lz);
-  d[1] = exp (EIGN[2] * lz);
-  d[2] = exp (EIGN[3] * lz);       	   
-  
-  term =  x1[0] * x2[0];
-  term += x1[1] * x2[1] * d[0];
-  term += x1[2] * x2[2] * d[1];
-  term += x1[3] * x2[3] * d[2];     
-
-  term = log(fabs(term)) + (scale * log(PLL_MINLIKELIHOOD));   
-
-  term = term * w;
-
-  rax_free(lVector);  
-
-  return  term;
-}
-
-/**********************************************************************************/
-
-static __inline void computeVectorGTRCATPROT(double *lVector, int *eVector, double ki, int i, double qz, double rz,
-				       traversalInfo *ti, double *EIGN, double *EI, double *EV, double *tipVector, 
-				       unsigned char **yVector, int mxtips)
-{       
-  double  d1[20], d2[20],  ump_x1, ump_x2, x1px2[20], lz1, lz2; 
-  double *x1, *x2, *x3;
-  int j, k,
-    scale = 1,
-    pNumber = ti->pNumber,
-    rNumber = ti->rNumber,
-    qNumber = ti->qNumber;
- 
-  x3  = &lVector[20 * (pNumber  - mxtips)];  
- 
-
-  switch(ti->tipCase)
-    {
-    case PLL_TIP_TIP:     
-      x1 = &(tipVector[20 * yVector[qNumber][i]]);
-      x2 = &(tipVector[20 * yVector[rNumber][i]]);    
-      break;
-    case PLL_TIP_INNER:     
-      x1 = &(tipVector[20 * yVector[qNumber][i]]);
-      x2 = &lVector[20 * (rNumber - mxtips)];           
-      break;
-    case PLL_INNER_INNER:            
-      x1 = &lVector[20 * (qNumber - mxtips)];
-      x2 = &lVector[20 * (rNumber - mxtips)];     
-      break;
-    default:
-      assert(0);
-    }
-     
-  lz1 = qz * ki;  
-  lz2 = rz * ki;
-  
-   d1[0] = x1[0];
-   d2[0] = x2[0];
-
-  for(j = 1; j < 20; j++)
-    {
-      d1[j] = x1[j] * exp(EIGN[j] * lz1);
-      d2[j] = x2[j] * exp(EIGN[j] * lz2);	    
-    }
- 
- 
-  for(j = 0; j < 20; j++)
-    {        
-      ump_x1 = 0;
-      ump_x2 = 0;
-
-      for(k = 0; k < 20; k++)
-	{
-	  ump_x1 += d1[k] * EI[j * 20 + k];
-	  ump_x2 += d2[k] * EI[j * 20 + k];
-	}
-      
-      x1px2[j] = ump_x1 * ump_x2;
-    }
-  
-  for(j = 0; j < 20; j++)
-    x3[j] = 0.0;
-
-  for(j = 0; j < 20; j++)          
-    for(k = 0; k < 20; k++)	
-      x3[k] +=  x1px2[j] *  EV[20 * j + k];	   
-      
-  scale = 1;
-  for(k = 0; (k < 20) && scale; k++)    
-    scale = ((x3[k] < PLL_MINLIKELIHOOD) && (x3[k] > PLL_MINUSMINLIKELIHOOD));    
-
-  if(scale)
-    {	        
-
-      for(k = 0; k < 20; k++)
-	x3[k]   *= PLL_TWOTOTHE256;
-         
-      *eVector = *eVector + 1;
-    }	              
-
-  return;
-}
-
-
-
-
-
-
-
-
-static double evaluatePartialGTRCATPROT(int i, double ki, int counter,  traversalInfo *ti, double qz,
-				    int w, double *EIGN, double *EI, double *EV,
-				    double *tipVector, unsigned  char **yVector, 
-				    int branchReference, int mxtips)
-{
-  double lz, term;       
-  double  d[20];
-  double   *x1, *x2, *lVector = NULL; 
-  int scale = 0, k;
-
-  traversalInfo *trav = &ti[0];
- 
-  rax_posix_memalign ((void **)&lVector, PLL_BYTE_ALIGNMENT, sizeof(double) * 20 * mxtips);
-
-  assert(isTip(trav->pNumber, mxtips));
-     
-  x1 = &(tipVector[20 *  yVector[trav->pNumber][i]]);   
-
-  for(k = 1; k < counter; k++)    
-    {
-      double 
-	qz = ti[k].qz[branchReference],
-	rz = ti[k].rz[branchReference];
-      
-      qz = (qz > PLL_ZMIN) ? log(qz) : log(PLL_ZMIN);
-      rz = (rz > PLL_ZMIN) ? log(rz) : log(PLL_ZMIN);
-
-      computeVectorGTRCATPROT(lVector, &scale, ki, i, qz, rz, &ti[k], 
-			  EIGN, EI, EV, 
-			  tipVector, yVector, mxtips);       
-    }
-   
-  x2 = &lVector[20 * (trav->qNumber - mxtips)]; 
-
-  assert(0 <=  (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips);  
-       
-  if(qz < PLL_ZMIN) 
-    lz = PLL_ZMIN;
-  lz  = log(qz); 
-  lz *= ki;  
-  
-  d[0] = 1.0;
-  
-  for(k = 1; k < 20; k++)
-    d[k] =  exp (EIGN[k] * lz);
-
-        	   
-  term =  0.0;
-  for(k = 0; k < 20; k++)
-    term += x1[k] * x2[k] * d[k];     
-
-  term = log(fabs(term)) + (scale * log(PLL_MINLIKELIHOOD));   
-
-  term = term * w;
-
-  rax_free(lVector);  
-
-  return  term;
-}
-
-/******************************************/
-
-
-
-#endif
diff --git a/pllrepo/src/fastDNAparsimony.c b/pllrepo/src/fastDNAparsimony.c
deleted file mode 100644
index 72900a6..0000000
--- a/pllrepo/src/fastDNAparsimony.c
+++ /dev/null
@@ -1,1941 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file fastDNAparsimony.c
- */
-#include "mem_alloc.h"
-
-#ifndef WIN32
-#include <sys/times.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <unistd.h>  
-#endif
-
-#include <limits.h>
-#include <math.h>
-#include <time.h> 
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <stdint.h>
-#include <assert.h>
-
-#if defined(__MIC_NATIVE)
-
-#include <immintrin.h>
-
-#define INTS_PER_VECTOR 16
-//#define LONG_INTS_PER_VECTOR 8
-#define LONG_INTS_PER_VECTOR (64/sizeof(long))
-#define INT_TYPE __m512i
-#define CAST double*
-#define SET_ALL_BITS_ONE _mm512_set1_epi32(0xFFFFFFFF)
-#define SET_ALL_BITS_ZERO _mm512_setzero_epi32()
-#define VECTOR_LOAD _mm512_load_epi32
-#define VECTOR_STORE  _mm512_store_epi32
-#define VECTOR_BIT_AND _mm512_and_epi32
-#define VECTOR_BIT_OR  _mm512_or_epi32
-#define VECTOR_AND_NOT _mm512_andnot_epi32
-
-#elif defined(__AVX)
-
-#include <xmmintrin.h>
-#include <immintrin.h>
-#include <pmmintrin.h>
-
-#define INTS_PER_VECTOR 8
-//#define LONG_INTS_PER_VECTOR 4
-#define LONG_INTS_PER_VECTOR (32/sizeof(long))
-#define INT_TYPE __m256d
-#define CAST double*
-//#define SET_ALL_BITS_ONE (__m256d)_mm256_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)
-//#define SET_ALL_BITS_ZERO (__m256d)_mm256_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000)
-#define SET_ALL_BITS_ONE _mm256_castsi256_pd(_mm256_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF))
-#define SET_ALL_BITS_ZERO _mm256_castsi256_pd(_mm256_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000))
-#define VECTOR_LOAD _mm256_load_pd
-#define VECTOR_BIT_AND _mm256_and_pd
-#define VECTOR_BIT_OR  _mm256_or_pd
-#define VECTOR_STORE  _mm256_store_pd
-#define VECTOR_AND_NOT _mm256_andnot_pd
-
-#elif (defined(__SSE3))
-
-#include <xmmintrin.h>
-#include <pmmintrin.h>
-  
-#define INTS_PER_VECTOR 4
-#ifdef __i386__
-//#define LONG_INTS_PER_VECTOR 4
-#define LONG_INTS_PER_VECTOR (16/sizeof(long))
-#else
-//#define LONG_INTS_PER_VECTOR 2
-#define LONG_INTS_PER_VECTOR (16/sizeof(long))
-#endif
-#define INT_TYPE __m128i
-#define CAST __m128i*
-#define SET_ALL_BITS_ONE _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)
-#define SET_ALL_BITS_ZERO _mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000000)
-#define VECTOR_LOAD _mm_load_si128
-#define VECTOR_BIT_AND _mm_and_si128
-#define VECTOR_BIT_OR  _mm_or_si128
-#define VECTOR_STORE  _mm_store_si128
-#define VECTOR_AND_NOT _mm_andnot_si128
-
-#endif
-
-
-#include "pll.h"
-#include "pllInternal.h"
-
-#if defined (_MSC_VER)
-#	if defined ( __SSE4_2__ ) || defined (__AVX__)
-#		include <nmmintrin.h>
-#		define __builtin_popcount _mm_popcnt_u32
-#		define __builtin_popcountl _mm_popcnt_u64
-#	else
-#		include <intrin.h>
-	static __inline uint32_t __builtin_popcount (uint32_t a) {
-		// popcnt instruction not available
-		uint32_t b = a - ((a >> 1) & 0x55555555);
-		uint32_t c = (b & 0x33333333) + ((b >> 2) & 0x33333333);
-		uint32_t d = (c + (c >> 4)) & 0x0F0F0F0F;
-		uint32_t e = d * 0x01010101;
-		return   e >> 24;
-	}
-//#		define __builtin_popcount __popcnt
-#		define __builtin_popcountl __popcnt64
-#	endif
-#endif
-
-static pllBoolean tipHomogeneityCheckerPars(pllInstance *tr, nodeptr p, int grouping);
-
-extern const unsigned int mask32[32]; 
-/* vector-specific stuff */
-
-
-extern double masterTime;
-
-/************************************************ pop count stuff ***********************************************/
-
- unsigned int bitcount_32_bit(unsigned int i)
-{
-  return ((unsigned int) __builtin_popcount(i));
-}
-
-/* bit count for 64 bit integers */
-
-//__inline unsigned int bitcount_64_bit(uint64_t i)
-//{
-//  return ((unsigned int) __builtin_popcountl(i));
-//}
-
-/* bit count for 128 bit SSE3 and 256 bit AVX registers */
-
-#if (defined(__SSE3) || defined(__AVX))
-
-#ifdef _WIN32
- /* emulate with 32-bit version */
-static __inline unsigned int vectorPopcount(INT_TYPE v)
-{
-PLL_ALIGN_BEGIN unsigned int counts[INTS_PER_VECTOR] PLL_ALIGN_END;
-
-  int
-    i,
-    sum = 0;
-
-  VECTOR_STORE((CAST)counts, v);
-
-  for(i = 0; i < INTS_PER_VECTOR; i++)
-    sum += __builtin_popcount(counts[i]);
-
-  return ((unsigned int)sum);
-}
-#else
-
-static __inline unsigned int vectorPopcount(INT_TYPE v)
-{
-  unsigned long
-    counts[LONG_INTS_PER_VECTOR] __attribute__ ((aligned (PLL_BYTE_ALIGNMENT)));
-
-  int    
-    i,
-    sum = 0;
-  
-  VECTOR_STORE((CAST)counts, v);
-
-  for(i = 0; i < LONG_INTS_PER_VECTOR; i++)
-    sum += __builtin_popcountl(counts[i]);
-             
-  return ((unsigned int)sum);
-}
-#endif
-
-#endif
-
-
-
-/********************************DNA FUNCTIONS *****************************************************************/
-
-
-static int checkerPars(pllInstance *tr, nodeptr p)
-{
-  int group = tr->constraintVector[p->number];
-
-  if(isTip(p->number, tr->mxtips))
-    {
-      group = tr->constraintVector[p->number];
-      return group;
-    }
-  else
-    {
-      if(group != -9) 
-        return group;
-
-      group = checkerPars(tr, p->next->back);
-      if(group != -9) 
-        return group;
-
-      group = checkerPars(tr, p->next->next->back);
-      if(group != -9) 
-        return group;
-
-      return -9;
-    }
-}
-
-static pllBoolean tipHomogeneityCheckerPars(pllInstance *tr, nodeptr p, int grouping)
-{
-  if(isTip(p->number, tr->mxtips))
-    {
-      if(tr->constraintVector[p->number] != grouping) 
-        return PLL_FALSE;
-      else 
-        return PLL_TRUE;
-    }
-  else
-    {   
-      return  (tipHomogeneityCheckerPars(tr, p->next->back, grouping) && tipHomogeneityCheckerPars(tr, p->next->next->back,grouping));      
-    }
-}
-
-static void getxnodeLocal (nodeptr p)
-{
-  nodeptr  s;
-
-  if((s = p->next)->xPars || (s = s->next)->xPars)
-    {
-      p->xPars = s->xPars;
-      s->xPars = 0;
-    }
-
-  assert(p->next->xPars || p->next->next->xPars || p->xPars);
-
-}
-
-static void computeTraversalInfoParsimony(nodeptr p, int *ti, int *counter, int maxTips, pllBoolean full)
-{        
-  nodeptr 
-    q = p->next->back,
-    r = p->next->next->back;
-  
-  if(! p->xPars)
-    getxnodeLocal(p);  
-  
-  if(full)
-    {
-       if(q->number > maxTips) 
-         computeTraversalInfoParsimony(q, ti, counter, maxTips, full);
-      
-      if(r->number > maxTips) 
-        computeTraversalInfoParsimony(r, ti, counter, maxTips, full);
-    }
-  else
-    {
-      if(q->number > maxTips && !q->xPars) 
-        computeTraversalInfoParsimony(q, ti, counter, maxTips, full);
-      
-      if(r->number > maxTips && !r->xPars) 
-        computeTraversalInfoParsimony(r, ti, counter, maxTips, full);
-    }
-  
-  
-  ti[*counter]     = p->number;
-  ti[*counter + 1] = q->number;
-  ti[*counter + 2] = r->number;
-  *counter = *counter + 4;
-}
-
-
-
-
-
-
-
-#if (defined(__SSE3) || defined(__AVX))
-
-static void newviewParsimonyIterativeFast(pllInstance *tr, partitionList *pr)
-{    
-  INT_TYPE
-    allOne = SET_ALL_BITS_ONE;
-
-  int 
-    model,
-    *ti = tr->ti,
-    count = ti[0],
-    index; 
-
-  for(index = 4; index < count; index += 4)
-    {      
-      unsigned int
-        totalScore = 0;
-
-      size_t
-        pNumber = (size_t)ti[index],
-        qNumber = (size_t)ti[index + 1],
-        rNumber = (size_t)ti[index + 2];
-      
-      for(model = 0; model < pr->numberOfPartitions; model++)
-        {
-          size_t
-            k,
-            states = pr->partitionData[model]->states,
-            width = pr->partitionData[model]->parsimonyLength;
-            
-          unsigned int  
-            i;      
-                 
-          switch(states)
-            {
-            case 2:       
-              {
-                parsimonyNumber
-                  *left[2],
-                  *right[2],
-                  *this[2];
-
-                for(k = 0; k < 2; k++)
-                  {
-                    left[k]  = &(pr->partitionData[model]->parsVect[(width * 2 * qNumber) + width * k]);
-                    right[k] = &(pr->partitionData[model]->parsVect[(width * 2 * rNumber) + width * k]);
-                    this[k]  = &(pr->partitionData[model]->parsVect[(width * 2 * pNumber) + width * k]);
-                  }
-
-                for(i = 0; i < width; i += INTS_PER_VECTOR)
-                  {               
-                    INT_TYPE
-                      s_r, s_l, v_N,
-                      l_A, l_C,
-                      v_A, v_C;          
-                    
-                    s_l = VECTOR_LOAD((CAST)(&left[0][i]));
-                    s_r = VECTOR_LOAD((CAST)(&right[0][i]));
-                    l_A = VECTOR_BIT_AND(s_l, s_r);
-                    v_A = VECTOR_BIT_OR(s_l, s_r);
-                    
-                    s_l = VECTOR_LOAD((CAST)(&left[1][i]));
-                    s_r = VECTOR_LOAD((CAST)(&right[1][i]));
-                    l_C = VECTOR_BIT_AND(s_l, s_r);
-                    v_C = VECTOR_BIT_OR(s_l, s_r);                                                                
-                    
-                    v_N = VECTOR_BIT_OR(l_A, l_C);
-                    
-                    VECTOR_STORE((CAST)(&this[0][i]), VECTOR_BIT_OR(l_A, VECTOR_AND_NOT(v_N, v_A)));
-                    VECTOR_STORE((CAST)(&this[1][i]), VECTOR_BIT_OR(l_C, VECTOR_AND_NOT(v_N, v_C)));                                                                    
-                    
-                    v_N = VECTOR_AND_NOT(v_N, allOne);
-                    
-                    totalScore += vectorPopcount(v_N);            
-                  }
-              }
-              break;
-            case 4:
-              {
-                parsimonyNumber
-                  *left[4],
-                  *right[4],
-                  *this[4];
-
-                for(k = 0; k < 4; k++)
-                  {
-                    left[k]  = &(pr->partitionData[model]->parsVect[(width * 4 * qNumber) + width * k]);
-                    right[k] = &(pr->partitionData[model]->parsVect[(width * 4 * rNumber) + width * k]);
-                    this[k]  = &(pr->partitionData[model]->parsVect[(width * 4 * pNumber) + width * k]);
-                  }
-
-                for(i = 0; i < width; i += INTS_PER_VECTOR)
-                  {               
-                    INT_TYPE
-                      s_r, s_l, v_N,
-                      l_A, l_C, l_G, l_T,
-                      v_A, v_C, v_G, v_T;                
-                    
-                    s_l = VECTOR_LOAD((CAST)(&left[0][i]));
-                    s_r = VECTOR_LOAD((CAST)(&right[0][i]));
-                    l_A = VECTOR_BIT_AND(s_l, s_r);
-                    v_A = VECTOR_BIT_OR(s_l, s_r);
-                    
-                    s_l = VECTOR_LOAD((CAST)(&left[1][i]));
-                    s_r = VECTOR_LOAD((CAST)(&right[1][i]));
-                    l_C = VECTOR_BIT_AND(s_l, s_r);
-                    v_C = VECTOR_BIT_OR(s_l, s_r);
-                    
-                    s_l = VECTOR_LOAD((CAST)(&left[2][i]));
-                    s_r = VECTOR_LOAD((CAST)(&right[2][i]));
-                    l_G = VECTOR_BIT_AND(s_l, s_r);
-                    v_G = VECTOR_BIT_OR(s_l, s_r);
-                    
-                    s_l = VECTOR_LOAD((CAST)(&left[3][i]));
-                    s_r = VECTOR_LOAD((CAST)(&right[3][i]));
-                    l_T = VECTOR_BIT_AND(s_l, s_r);
-                    v_T = VECTOR_BIT_OR(s_l, s_r);
-                    
-                    v_N = VECTOR_BIT_OR(VECTOR_BIT_OR(l_A, l_C), VECTOR_BIT_OR(l_G, l_T));                                
-                    
-                    VECTOR_STORE((CAST)(&this[0][i]), VECTOR_BIT_OR(l_A, VECTOR_AND_NOT(v_N, v_A)));
-                    VECTOR_STORE((CAST)(&this[1][i]), VECTOR_BIT_OR(l_C, VECTOR_AND_NOT(v_N, v_C)));
-                    VECTOR_STORE((CAST)(&this[2][i]), VECTOR_BIT_OR(l_G, VECTOR_AND_NOT(v_N, v_G)));
-                    VECTOR_STORE((CAST)(&this[3][i]), VECTOR_BIT_OR(l_T, VECTOR_AND_NOT(v_N, v_T)));                                                    
-                    
-                    v_N = VECTOR_AND_NOT(v_N, allOne);
-                    
-                    totalScore += vectorPopcount(v_N);  
-                  }
-              }
-              break;
-            case 20:
-              {
-                parsimonyNumber
-                  *left[20],
-                  *right[20],
-                  *this[20];
-
-                for(k = 0; k < 20; k++)
-                  {
-                    left[k]  = &(pr->partitionData[model]->parsVect[(width * 20 * qNumber) + width * k]);
-                    right[k] = &(pr->partitionData[model]->parsVect[(width * 20 * rNumber) + width * k]);
-                    this[k]  = &(pr->partitionData[model]->parsVect[(width * 20 * pNumber) + width * k]);
-                  }
-
-                for(i = 0; i < width; i += INTS_PER_VECTOR)
-                  {               
-                    size_t j;
-                    
-                    INT_TYPE
-                      s_r, s_l, 
-                      v_N = SET_ALL_BITS_ZERO,
-                      l_A[20], 
-                      v_A[20];           
-                    
-                    for(j = 0; j < 20; j++)
-                      {
-                        s_l = VECTOR_LOAD((CAST)(&left[j][i]));
-                        s_r = VECTOR_LOAD((CAST)(&right[j][i]));
-                        l_A[j] = VECTOR_BIT_AND(s_l, s_r);
-                        v_A[j] = VECTOR_BIT_OR(s_l, s_r);
-                        
-                        v_N = VECTOR_BIT_OR(v_N, l_A[j]);
-                      }
-                    
-                    for(j = 0; j < 20; j++)                 
-                      VECTOR_STORE((CAST)(&this[j][i]), VECTOR_BIT_OR(l_A[j], VECTOR_AND_NOT(v_N, v_A[j])));                                                                    
-                    
-                    v_N = VECTOR_AND_NOT(v_N, allOne);
-                    
-                    totalScore += vectorPopcount(v_N);
-                  }
-              }
-              break;
-            default:
-              {
-                parsimonyNumber
-                  *left[32], 
-                  *right[32],
-                  *this[32];
-
-                assert(states <= 32);
-                
-                for(k = 0; k < states; k++)
-                  {
-                    left[k]  = &(pr->partitionData[model]->parsVect[(width * states * qNumber) + width * k]);
-                    right[k] = &(pr->partitionData[model]->parsVect[(width * states * rNumber) + width * k]);
-                    this[k]  = &(pr->partitionData[model]->parsVect[(width * states * pNumber) + width * k]);
-                  }
-
-                for(i = 0; i < width; i += INTS_PER_VECTOR)
-                  {               
-                    size_t j;
-                    
-                    INT_TYPE
-                      s_r, s_l, 
-                      v_N = SET_ALL_BITS_ZERO,
-                      l_A[32], 
-                      v_A[32];           
-                    
-                    for(j = 0; j < states; j++)
-                      {
-                        s_l = VECTOR_LOAD((CAST)(&left[j][i]));
-                        s_r = VECTOR_LOAD((CAST)(&right[j][i]));
-                        l_A[j] = VECTOR_BIT_AND(s_l, s_r);
-                        v_A[j] = VECTOR_BIT_OR(s_l, s_r);
-                        
-                        v_N = VECTOR_BIT_OR(v_N, l_A[j]);
-                      }
-                    
-                    for(j = 0; j < states; j++)             
-                      VECTOR_STORE((CAST)(&this[j][i]), VECTOR_BIT_OR(l_A[j], VECTOR_AND_NOT(v_N, v_A[j])));                                                                    
-                    
-                    v_N = VECTOR_AND_NOT(v_N, allOne);
-                    
-                    totalScore += vectorPopcount(v_N);
-                  }                             
-              }
-            }            
-        }
-
-      tr->parsimonyScore[pNumber] = totalScore + tr->parsimonyScore[rNumber] + tr->parsimonyScore[qNumber];      
-    }
-}
-
-
-
-static unsigned int evaluateParsimonyIterativeFast(pllInstance *tr, partitionList *pr)
-{
-  INT_TYPE 
-    allOne = SET_ALL_BITS_ONE;
-
-  size_t 
-    pNumber = (size_t)tr->ti[1],
-    qNumber = (size_t)tr->ti[2];
-
-  int
-    model;
-
-  unsigned int 
-    bestScore = tr->bestParsimony,    
-    sum;
-
-  if(tr->ti[0] > 4)
-    newviewParsimonyIterativeFast(tr, pr);
-
-  sum = tr->parsimonyScore[pNumber] + tr->parsimonyScore[qNumber];
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-    {
-      size_t
-        k,
-        states = pr->partitionData[model]->states,
-        width  = pr->partitionData[model]->parsimonyLength,
-        i;
-
-       switch(states)
-         {
-         case 2:
-           {
-             parsimonyNumber
-               *left[2],
-               *right[2];
-             
-             for(k = 0; k < 2; k++)
-               {
-                 left[k]  = &(pr->partitionData[model]->parsVect[(width * 2 * qNumber) + width * k]);
-                 right[k] = &(pr->partitionData[model]->parsVect[(width * 2 * pNumber) + width * k]);
-               }     
-             
-             for(i = 0; i < width; i += INTS_PER_VECTOR)
-               {                                               
-                 INT_TYPE      
-                   l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[0][i])), VECTOR_LOAD((CAST)(&right[0][i]))),
-                   l_C = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[1][i])), VECTOR_LOAD((CAST)(&right[1][i]))),            
-                   v_N = VECTOR_BIT_OR(l_A, l_C);
-                 
-                 v_N = VECTOR_AND_NOT(v_N, allOne);
-                 
-                 sum += vectorPopcount(v_N);
-                 
-                 if(sum >= bestScore)
-                   return sum;                         
-               }
-           }
-           break;
-         case 4:
-           {
-             parsimonyNumber
-               *left[4],
-               *right[4];
-      
-             for(k = 0; k < 4; k++)
-               {
-                 left[k]  = &(pr->partitionData[model]->parsVect[(width * 4 * qNumber) + width * k]);
-                 right[k] = &(pr->partitionData[model]->parsVect[(width * 4 * pNumber) + width * k]);
-               }        
-
-             for(i = 0; i < width; i += INTS_PER_VECTOR)
-               {                                                
-                 INT_TYPE      
-                   l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[0][i])), VECTOR_LOAD((CAST)(&right[0][i]))),
-                   l_C = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[1][i])), VECTOR_LOAD((CAST)(&right[1][i]))),
-                   l_G = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[2][i])), VECTOR_LOAD((CAST)(&right[2][i]))),
-                   l_T = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[3][i])), VECTOR_LOAD((CAST)(&right[3][i]))),
-                   v_N = VECTOR_BIT_OR(VECTOR_BIT_OR(l_A, l_C), VECTOR_BIT_OR(l_G, l_T));     
-                 
-                 v_N = VECTOR_AND_NOT(v_N, allOne);
-                 
-                 sum += vectorPopcount(v_N);
-                 
-                 if(sum >= bestScore)            
-                   return sum;          
-               }                 
-           }
-           break;
-         case 20:
-           {
-             parsimonyNumber
-               *left[20],
-               *right[20];
-             
-              for(k = 0; k < 20; k++)
-                {
-                  left[k]  = &(pr->partitionData[model]->parsVect[(width * 20 * qNumber) + width * k]);
-                  right[k] = &(pr->partitionData[model]->parsVect[(width * 20 * pNumber) + width * k]);
-                }  
-           
-              for(i = 0; i < width; i += INTS_PER_VECTOR)
-                {                              
-                  int 
-                    j;
-                  
-                  INT_TYPE      
-                    l_A,
-                    v_N = SET_ALL_BITS_ZERO;     
-                  
-                  for(j = 0; j < 20; j++)
-                    {
-                      l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[j][i])), VECTOR_LOAD((CAST)(&right[j][i])));
-                      v_N = VECTOR_BIT_OR(l_A, v_N);
-                    }
-                  
-                  v_N = VECTOR_AND_NOT(v_N, allOne);
-                  
-                  sum += vectorPopcount(v_N);          
-                  
-                  if(sum >= bestScore)      
-                    return sum;                        
-                }
-           }
-           break;
-         default:
-           {
-             parsimonyNumber
-               *left[32],  
-               *right[32]; 
-
-             assert(states <= 32);
-
-             for(k = 0; k < states; k++)
-               {
-                 left[k]  = &(pr->partitionData[model]->parsVect[(width * states * qNumber) + width * k]);
-                 right[k] = &(pr->partitionData[model]->parsVect[(width * states * pNumber) + width * k]);
-               }  
-           
-             for(i = 0; i < width; i += INTS_PER_VECTOR)
-               {                               
-                 size_t
-                   j;
-                 
-                 INT_TYPE      
-                   l_A,
-                   v_N = SET_ALL_BITS_ZERO;     
-                 
-                 for(j = 0; j < states; j++)
-                   {
-                     l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[j][i])), VECTOR_LOAD((CAST)(&right[j][i])));
-                     v_N = VECTOR_BIT_OR(l_A, v_N);
-                   }
-                 
-                 v_N = VECTOR_AND_NOT(v_N, allOne);
-                 
-                 sum += vectorPopcount(v_N);           
-                 
-                 if(sum >= bestScore)         
-                   return sum;                 
-               }
-           }
-         }
-    }
-  
-  return sum;
-}
-
-
-#else
-static void newviewParsimonyIterativeFast(pllInstance *tr, partitionList * pr)
-{    
-  int 
-    model,
-    *ti = tr->ti,
-    count = ti[0],
-    index; 
-
-  for(index = 4; index < count; index += 4)
-    {      
-      unsigned int
-        totalScore = 0;
-
-      size_t
-        pNumber = (size_t)ti[index],
-        qNumber = (size_t)ti[index + 1],
-        rNumber = (size_t)ti[index + 2];
-      
-      for(model = 0; model < pr->numberOfPartitions; model++)
-        {
-          size_t
-            k,
-            states = pr->partitionData[model]->states,
-            width = pr->partitionData[model]->parsimonyLength;    
-            
-          unsigned int  
-            i;      
-                 
-          switch(states)
-            {
-            case 2:       
-              {
-                parsimonyNumber
-                  *left[2],
-                  *right[2],
-                  *this[2];
-                
-                parsimonyNumber
-                   o_A,
-                   o_C,
-                   t_A,
-                   t_C, 
-                   t_N;
-                
-                for(k = 0; k < 2; k++)
-                  {
-                    left[k]  = &(pr->partitionData[model]->parsVect[(width * 2 * qNumber) + width * k]);
-                    right[k] = &(pr->partitionData[model]->parsVect[(width * 2 * rNumber) + width * k]);
-                    this[k]  = &(pr->partitionData[model]->parsVect[(width * 2 * pNumber) + width * k]);
-                  }
-
-                for(i = 0; i < width; i++)
-                  {               
-                    t_A = left[0][i] & right[0][i];
-                    t_C = left[1][i] & right[1][i];                
-
-                    o_A = left[0][i] | right[0][i];
-                    o_C = left[1][i] | right[1][i];
-                  
-                    t_N = ~(t_A | t_C);   
-
-                    this[0][i] = t_A | (t_N & o_A);
-                    this[1][i] = t_C | (t_N & o_C);                
-                    
-                    totalScore += ((unsigned int) __builtin_popcount(t_N));
-                  }
-              }
-              break;
-            case 4:
-              {
-                parsimonyNumber
-                  *left[4],
-                  *right[4],
-                  *this[4];
-
-                for(k = 0; k < 4; k++)
-                  {
-                    left[k]  = &(pr->partitionData[model]->parsVect[(width * 4 * qNumber) + width * k]);
-                    right[k] = &(pr->partitionData[model]->parsVect[(width * 4 * rNumber) + width * k]);
-                    this[k]  = &(pr->partitionData[model]->parsVect[(width * 4 * pNumber) + width * k]);
-                  }
-
-                parsimonyNumber
-                   o_A,
-                   o_C,
-                   o_G,
-                   o_T,
-                   t_A,
-                   t_C,
-                   t_G,
-                   t_T, 
-                   t_N;
-
-                for(i = 0; i < width; i++)
-                  {               
-                    t_A = left[0][i] & right[0][i];
-                    t_C = left[1][i] & right[1][i];
-                    t_G = left[2][i] & right[2][i];       
-                    t_T = left[3][i] & right[3][i];
-
-                    o_A = left[0][i] | right[0][i];
-                    o_C = left[1][i] | right[1][i];
-                    o_G = left[2][i] | right[2][i];       
-                    o_T = left[3][i] | right[3][i];
-
-                    t_N = ~(t_A | t_C | t_G | t_T);       
-
-                    this[0][i] = t_A | (t_N & o_A);
-                    this[1][i] = t_C | (t_N & o_C);
-                    this[2][i] = t_G | (t_N & o_G);
-                    this[3][i] = t_T | (t_N & o_T); 
-                    
-                    totalScore += ((unsigned int) __builtin_popcount(t_N));
-                  }
-              }
-              break;
-            case 20:
-              {
-                parsimonyNumber
-                  *left[20],
-                  *right[20],
-                  *this[20];
-
-                parsimonyNumber
-                  o_A[20],
-                  t_A[20],        
-                  t_N;
-
-                for(k = 0; k < 20; k++)
-                  {
-                    left[k]  = &(pr->partitionData[model]->parsVect[(width * 20 * qNumber) + width * k]);
-                    right[k] = &(pr->partitionData[model]->parsVect[(width * 20 * rNumber) + width * k]);
-                    this[k]  = &(pr->partitionData[model]->parsVect[(width * 20 * pNumber) + width * k]);
-                  }
-
-                for(i = 0; i < width; i++)
-                  {               
-                    size_t k;
-                    
-                    t_N = 0;
-
-                    for(k = 0; k < 20; k++)
-                      {
-                        t_A[k] = left[k][i] & right[k][i];
-                        o_A[k] = left[k][i] | right[k][i];
-                        t_N = t_N | t_A[k];
-                      }
-                    
-                    t_N = ~t_N;
-
-                    for(k = 0; k < 20; k++)                   
-                      this[k][i] = t_A[k] | (t_N & o_A[k]);                
-                    
-                    totalScore += ((unsigned int) __builtin_popcount(t_N));
-                  }
-              }
-              break;
-            default:
-              {         
-                parsimonyNumber
-                  *left[32],
-                  *right[32],
-                  *this[32];
-                
-                parsimonyNumber
-                  o_A[32],
-                  t_A[32],        
-                  t_N;
-                
-                assert(states <= 32);
-                
-                for(k = 0; k < states; k++)
-                  {
-                    left[k]  = &(pr->partitionData[model]->parsVect[(width * states * qNumber) + width * k]);
-                    right[k] = &(pr->partitionData[model]->parsVect[(width * states * rNumber) + width * k]);
-                    this[k]  = &(pr->partitionData[model]->parsVect[(width * states * pNumber) + width * k]);
-                  }
-                
-                for(i = 0; i < width; i++)
-                  {               
-                    t_N = 0;
-                    
-                    for(k = 0; k < states; k++)
-                      {
-                        t_A[k] = left[k][i] & right[k][i];
-                        o_A[k] = left[k][i] | right[k][i];
-                        t_N = t_N | t_A[k];
-                      }
-                    
-                    t_N = ~t_N;
-                    
-                    for(k = 0; k < states; k++)               
-                      this[k][i] = t_A[k] | (t_N & o_A[k]);                
-                    
-                    totalScore += ((unsigned int) __builtin_popcount(t_N));
-                  }
-              }                       
-            } 
-        }
-
-      tr->parsimonyScore[pNumber] = totalScore + tr->parsimonyScore[rNumber] + tr->parsimonyScore[qNumber];      
-    }
-}
-
-
-static unsigned int evaluateParsimonyIterativeFast(pllInstance *tr, partitionList * pr)
-{
-  size_t 
-    pNumber = (size_t)tr->ti[1],
-    qNumber = (size_t)tr->ti[2];
-
-  int
-    model;
-
-  unsigned int 
-    bestScore = tr->bestParsimony,    
-    sum;
-
-  if(tr->ti[0] > 4)
-    newviewParsimonyIterativeFast(tr, pr); 
-
-  sum = tr->parsimonyScore[pNumber] + tr->parsimonyScore[qNumber];
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-    {
-      size_t
-        k,
-        states = pr->partitionData[model]->states,
-        width  = pr->partitionData[model]->parsimonyLength, 
-        i;
-
-       switch(states)
-         {
-         case 2:
-           {
-             parsimonyNumber 
-               t_A,
-               t_C,           
-               t_N,
-               *left[2],
-               *right[2];
-             
-             for(k = 0; k < 2; k++)
-               {
-                 left[k]  = &(pr->partitionData[model]->parsVect[(width * 2 * qNumber) + width * k]);
-                 right[k] = &(pr->partitionData[model]->parsVect[(width * 2 * pNumber) + width * k]);
-               }     
-             
-             for(i = 0; i < width; i++)
-               {                                               
-                 t_A = left[0][i] & right[0][i];
-                 t_C = left[1][i] & right[1][i];
-                 
-                  t_N = ~(t_A | t_C);
-
-                  sum += ((unsigned int) __builtin_popcount(t_N));
-                 
-                 if(sum >= bestScore)
-                   return sum;                         
-               }
-           }
-           break;
-         case 4:
-           {
-             parsimonyNumber
-               t_A,
-               t_C,
-               t_G,
-               t_T,
-               t_N,
-               *left[4],
-               *right[4];
-      
-             for(k = 0; k < 4; k++)
-               {
-                 left[k]  = &(pr->partitionData[model]->parsVect[(width * 4 * qNumber) + width * k]);
-                 right[k] = &(pr->partitionData[model]->parsVect[(width * 4 * pNumber) + width * k]);
-               }        
-
-             for(i = 0; i < width; i++)
-               {                                                
-                  t_A = left[0][i] & right[0][i];
-                  t_C = left[1][i] & right[1][i];
-                  t_G = left[2][i] & right[2][i];         
-                  t_T = left[3][i] & right[3][i];
-
-                  t_N = ~(t_A | t_C | t_G | t_T);
-
-                  sum += ((unsigned int) __builtin_popcount(t_N));
-                 
-                 if(sum >= bestScore)            
-                   return sum;          
-               }                 
-           }
-           break;
-         case 20:
-           {
-             parsimonyNumber
-               t_A,
-               t_N,
-               *left[20],
-               *right[20];
-             
-              for(k = 0; k < 20; k++)
-                {
-                  left[k]  = &(pr->partitionData[model]->parsVect[(width * 20 * qNumber) + width * k]);
-                  right[k] = &(pr->partitionData[model]->parsVect[(width * 20 * pNumber) + width * k]);
-                }  
-           
-              for(i = 0; i < width; i++)
-                { 
-                  t_N = 0;
-                  
-                  for(k = 0; k < 20; k++)
-                    {
-                      t_A = left[k][i] & right[k][i];
-                      t_N = t_N | t_A;
-                    }
-               
-                  t_N = ~t_N;
-
-                  sum += ((unsigned int) __builtin_popcount(t_N));
-                  
-                  if(sum >= bestScore)      
-                    return sum;                        
-                }
-           }
-           break;
-         default:
-           {
-             parsimonyNumber
-               t_A,
-               t_N,
-               *left[32], 
-               *right[32];  
-
-             assert(states <= 32);
-
-             for(k = 0; k < states; k++)
-               {
-                 left[k]  = &(pr->partitionData[model]->parsVect[(width * states * qNumber) + width * k]);
-                 right[k] = &(pr->partitionData[model]->parsVect[(width * states * pNumber) + width * k]);
-               }  
-           
-             for(i = 0; i < width; i++)
-               {                               
-                 t_N = 0;
-                  
-                 for(k = 0; k < states; k++)
-                   {
-                     t_A = left[k][i] & right[k][i];
-                     t_N = t_N | t_A;
-                   }
-               
-                  t_N = ~t_N;
-
-                  sum += ((unsigned int) __builtin_popcount(t_N));
-                                                 
-                 if(sum >= bestScore)                     
-                   return sum;                     
-               }                     
-           }
-         }
-    }
-  
-  return sum;
-}
-
-#endif
-
-
-
-
-
-
-static unsigned int evaluateParsimony(pllInstance *tr, partitionList *pr, nodeptr p, pllBoolean full)
-{
-  volatile unsigned int result;
-  nodeptr q = p->back;
-  int
-    *ti = tr->ti,
-    counter = 4;
-  
-  ti[1] = p->number;
-  ti[2] = q->number;
-
-  if(full)
-    {
-      if(p->number > tr->mxtips)
-        computeTraversalInfoParsimony(p, ti, &counter, tr->mxtips, full);
-      if(q->number > tr->mxtips)
-        computeTraversalInfoParsimony(q, ti, &counter, tr->mxtips, full); 
-    }
-  else
-    {
-      if(p->number > tr->mxtips && !p->xPars)
-        computeTraversalInfoParsimony(p, ti, &counter, tr->mxtips, full);
-      if(q->number > tr->mxtips && !q->xPars)
-        computeTraversalInfoParsimony(q, ti, &counter, tr->mxtips, full); 
-    }
-
-  ti[0] = counter;
-
-  result = evaluateParsimonyIterativeFast(tr, pr);
-
-  return result;
-}
-
-
-static void newviewParsimony(pllInstance *tr, partitionList *pr, nodeptr  p)
-{     
-  if(p->number <= tr->mxtips)
-    return;
-
-  {
-    int 
-      counter = 4;     
-           
-    computeTraversalInfoParsimony(p, tr->ti, &counter, tr->mxtips, PLL_FALSE);              
-    tr->ti[0] = counter;            
-    
-    newviewParsimonyIterativeFast(tr, pr);
-  }
-}
-
-
-
-
-
-/****************************************************************************************************************************************/
-
-static void insertParsimony (pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q)
-{
-  nodeptr  r;
-  
-  r = q->back;
-  
-  hookupDefault(p->next,       q);
-  hookupDefault(p->next->next, r);
-   
-  newviewParsimony(tr, pr, p);
-} 
-
-
-
-static nodeptr buildNewTip (pllInstance *tr, nodeptr p)
-{ 
-  nodeptr  q;
-
-  q = tr->nodep[(tr->nextnode)++];
-  hookupDefault(p, q);
-  q->next->back = (nodeptr)NULL;
-  q->next->next->back = (nodeptr)NULL;
- 
-  return  q;
-} 
-
-static void buildSimpleTree (pllInstance *tr, partitionList *pr, int ip, int iq, int ir)
-{    
-  nodeptr  p, s;
-  int  i;
-  
-  i = PLL_MIN(ip, iq);
-  if (ir < i)  i = ir; 
-  tr->start = tr->nodep[i];
-  tr->ntips = 3;
-  p = tr->nodep[ip];
-  hookupDefault(p, tr->nodep[iq]);
-  s = buildNewTip(tr, tr->nodep[ir]);
-  insertParsimony(tr, pr, s, p);
-}
-
-
-static void testInsertParsimony (pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q, pllBoolean saveBranches)
-{ 
-  unsigned int 
-    mp;
- 
-  nodeptr  
-    r = q->back;   
-
-  pllBoolean
-    doIt = PLL_TRUE;
-
-  int numBranches = pr->perGeneBranchLengths?pr->numberOfPartitions:1;
-
-  if(tr->grouped)
-    {
-      int 
-        rNumber = tr->constraintVector[r->number],
-        qNumber = tr->constraintVector[q->number],
-        pNumber = tr->constraintVector[p->number];
-
-      doIt = PLL_FALSE;
-     
-      if(pNumber == -9)
-        pNumber = checkerPars(tr, p->back);
-      if(pNumber == -9)
-        doIt = PLL_TRUE;
-      else
-        {
-          if(qNumber == -9)
-            qNumber = checkerPars(tr, q);
-
-          if(rNumber == -9)
-            rNumber = checkerPars(tr, r);
-
-          if(pNumber == rNumber || pNumber == qNumber)
-            doIt = PLL_TRUE;       
-        }
-    }
-
-  if(doIt)
-    {
-      double 
-        z[PLL_NUM_BRANCHES];
-      
-      if(saveBranches)
-        {
-          int i;
-          
-          for(i = 0; i < numBranches; i++)
-            z[i] = q->z[i];
-        }
-
-      insertParsimony(tr, pr, p, q);
-  
-      mp = evaluateParsimony(tr, pr, p->next->next, PLL_FALSE);
-
-      if(mp < tr->bestParsimony)
-        {
-          tr->bestParsimony = mp;
-          tr->insertNode = q;
-          tr->removeNode = p;
-        }
-      
-      if(saveBranches)
-        hookup(q, r, z, numBranches);
-      else
-        hookupDefault(q, r);
-      
-      p->next->next->back = p->next->back = (nodeptr) NULL;
-    }
-       
-  return;
-} 
-
-
-static void restoreTreeParsimony(pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q)
-{ 
-  nodeptr
-    r = q->back;
-  
-  int counter = 4;
-  
-  hookupDefault(p->next,       q);
-  hookupDefault(p->next->next, r);
-  
-  computeTraversalInfoParsimony(p, tr->ti, &counter, tr->mxtips, PLL_FALSE);              
-  tr->ti[0] = counter;
-    
-  newviewParsimonyIterativeFast(tr, pr);
-}
-
-
-static void addTraverseParsimony (pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q, int mintrav, int maxtrav, pllBoolean doAll, pllBoolean saveBranches)
-{        
-  if (doAll || (--mintrav <= 0))               
-    testInsertParsimony(tr, pr, p, q, saveBranches);
-
-  if (((q->number > tr->mxtips)) && ((--maxtrav > 0) || doAll))
-    {         
-      addTraverseParsimony(tr, pr, p, q->next->back, mintrav, maxtrav, doAll, saveBranches);
-      addTraverseParsimony(tr, pr, p, q->next->next->back, mintrav, maxtrav, doAll, saveBranches);
-    }
-}
-
-
-
-
-
-static void makePermutationFast(int *perm, int n, pllInstance *tr)
-{    
-  int  
-    i, 
-    j, 
-    k;
-
-  for (i = 1; i <= n; i++)    
-    perm[i] = i;               
-
-  for (i = 1; i <= n; i++) 
-    {      
-      double d =  randum(&tr->randomNumberSeed);
-
-      k =  (int)((double)(n + 1 - i) * d);
-      
-      j        = perm[i];
-
-      perm[i]     = perm[i + k];
-      perm[i + k] = j; 
-    }
-}
-
-//static nodeptr  removeNodeParsimony (nodeptr p, tree *tr)
-static nodeptr  removeNodeParsimony (nodeptr p)
-{ 
-  nodeptr  q, r;         
-
-  q = p->next->back;
-  r = p->next->next->back;   
-    
-  hookupDefault(q, r);
-
-  p->next->next->back = p->next->back = (node *) NULL;
-  
-  return  q;
-}
-
-static int rearrangeParsimony(pllInstance *tr, partitionList *pr, nodeptr p, int mintrav, int maxtrav, pllBoolean doAll)
-{   
-  nodeptr  
-    p1, 
-    p2, 
-    q, 
-    q1, 
-    q2;
-  
-  int      
-    mintrav2; 
-
-  pllBoolean
-    doP = PLL_TRUE,
-    doQ = PLL_TRUE;
-           
-  if (maxtrav > tr->ntips - 3)  
-    maxtrav = tr->ntips - 3; 
-
-  assert(mintrav == 1);
-
-  if(maxtrav < mintrav)
-    return 0;
-
-  q = p->back;
-
-  if(tr->constrained)
-    {    
-      if(! tipHomogeneityCheckerPars(tr, p->back, 0))
-        doP = PLL_FALSE;
-        
-      if(! tipHomogeneityCheckerPars(tr, q->back, 0))
-        doQ = PLL_FALSE;
-                        
-      if(doQ == PLL_FALSE && doP == PLL_FALSE)
-        return 0;
-    }  
-
-  if((p->number > tr->mxtips) && doP) 
-    {     
-      p1 = p->next->back;
-      p2 = p->next->next->back;
-      
-      if ((p1->number > tr->mxtips) || (p2->number > tr->mxtips)) 
-        {                 
-          //removeNodeParsimony(p, tr);          
-          removeNodeParsimony(p);                
-
-          if ((p1->number > tr->mxtips)) 
-            {
-              addTraverseParsimony(tr, pr, p, p1->next->back, mintrav, maxtrav, doAll, PLL_FALSE);
-              addTraverseParsimony(tr, pr, p, p1->next->next->back, mintrav, maxtrav, doAll, PLL_FALSE);
-            }
-         
-          if ((p2->number > tr->mxtips)) 
-            {
-              addTraverseParsimony(tr, pr, p, p2->next->back, mintrav, maxtrav, doAll, PLL_FALSE);
-              addTraverseParsimony(tr, pr, p, p2->next->next->back, mintrav, maxtrav, doAll, PLL_FALSE);
-            }
-            
-           
-          hookupDefault(p->next,       p1);
-          hookupDefault(p->next->next, p2);
-
-          newviewParsimony(tr, pr, p);
-        }
-    }  
-       
-  if ((q->number > tr->mxtips) && (maxtrav > 0) && doQ) 
-    {
-      q1 = q->next->back;
-      q2 = q->next->next->back;
-
-      if (
-          (
-           (q1->number > tr->mxtips) && 
-           ((q1->next->back->number > tr->mxtips) || (q1->next->next->back->number > tr->mxtips))
-           )
-          ||
-          (
-           (q2->number > tr->mxtips) && 
-           ((q2->next->back->number > tr->mxtips) || (q2->next->next->back->number > tr->mxtips))
-           )
-          )
-        {          
-
-          //removeNodeParsimony(q, tr);
-          removeNodeParsimony(q);
-          
-          mintrav2 = mintrav > 2 ? mintrav : 2;
-          
-          if ((q1->number > tr->mxtips)) 
-            {
-              addTraverseParsimony(tr, pr, q, q1->next->back, mintrav2 , maxtrav, doAll, PLL_FALSE);
-              addTraverseParsimony(tr, pr, q, q1->next->next->back, mintrav2 , maxtrav, doAll, PLL_FALSE);
-            }
-         
-          if ((q2->number > tr->mxtips)) 
-            {
-              addTraverseParsimony(tr, pr, q, q2->next->back, mintrav2 , maxtrav, doAll, PLL_FALSE);
-              addTraverseParsimony(tr, pr, q, q2->next->next->back, mintrav2 , maxtrav, doAll, PLL_FALSE);
-            }      
-           
-          hookupDefault(q->next,       q1);
-          hookupDefault(q->next->next, q2);
-           
-          newviewParsimony(tr, pr, q);
-        }
-    }
-
-  return 1;
-} 
-
-
-static void restoreTreeRearrangeParsimony(pllInstance *tr, partitionList *pr)
-{    
-  removeNodeParsimony(tr->removeNode);  
-  //removeNodeParsimony(tr->removeNode, tr);  
-  restoreTreeParsimony(tr, pr, tr->removeNode, tr->insertNode);
-}
-
-/*
-static pllBoolean isInformative2(pllInstance *tr, int site)
-{
-  int
-    informativeCounter = 0,
-    check[256],   
-    j,   
-    undetermined = 15;
-
-  unsigned char
-    nucleotide,
-    target = 0;
-        
-  for(j = 0; j < 256; j++)
-    check[j] = 0;
-  
-  for(j = 1; j <= tr->mxtips; j++)
-    {      
-      nucleotide = tr->yVector[j][site];            
-      check[nucleotide] =  check[nucleotide] + 1;                  
-    }
-  
-  
-  if(check[1] > 1)
-    {
-      informativeCounter++;    
-      target = target | 1;
-    }
-  if(check[2] > 1)
-    {
-      informativeCounter++; 
-      target = target | 2;
-    }
-  if(check[4] > 1)
-    {
-      informativeCounter++; 
-      target = target | 4;
-    }
-  if(check[8] > 1)
-    {
-      informativeCounter++; 
-      target = target | 8;
-    }
-          
-  if(informativeCounter >= 2)
-    return PLL_TRUE;    
-  else
-    {        
-      for(j = 0; j < undetermined; j++)
-        {
-          if(j == 3 || j == 5 || j == 6 || j == 7 || j == 9 || j == 10 || j == 11 || 
-             j == 12 || j == 13 || j == 14)
-            {
-              if(check[j] > 1)
-                {
-                  if(!(target & j))
-                    return PLL_TRUE;
-                }
-            }
-        } 
-    }
-     
-  return PLL_FALSE;          
-}
-*/
-
-static pllBoolean isInformative(pllInstance *tr, int dataType, int site)
-{
-  int
-    informativeCounter = 0,
-    check[256],   
-    j,   
-    undetermined = getUndetermined(dataType);
-
-  const unsigned int
-    *bitVector = getBitVector(dataType);
-
-  unsigned char
-    nucleotide;
-  
-        
-  for(j = 0; j < 256; j++)
-    check[j] = 0;
-  
-  for(j = 1; j <= tr->mxtips; j++)
-    {      
-      nucleotide = tr->yVector[j][site];            
-      check[nucleotide] =  check[nucleotide] + 1;
-      assert(bitVector[nucleotide] > 0);                   
-    }
-  
-  for(j = 0; j < undetermined; j++)
-    {
-      if(check[j] > 0)
-        informativeCounter++;    
-    } 
-          
-  if(informativeCounter <= 1)
-    return PLL_FALSE;    
-  else
-    {        
-      for(j = 0; j < undetermined; j++)
-        {
-          if(check[j] > 1)
-            return PLL_TRUE;
-        } 
-    }
-     
-  return PLL_FALSE;          
-}
-
-
-static void determineUninformativeSites(pllInstance *tr, partitionList *pr, int *informative)
-{
-  int 
-    model,
-    number = 0,
-    i;
-
-  /* 
-     Not all characters are useful in constructing a parsimony tree. 
-     Invariant characters, those that have the same state in all taxa, 
-     are obviously useless and are ignored by the method. Characters in 
-     which a state occurs in only one taxon are also ignored. 
-     All these characters are called parsimony uninformative.
-
-     Alternative definition: informative columns contain at least two types
-     of nucleotides, and each nucleotide must appear at least twice in each 
-     column. Kind of a pain if we intend to check for this when using, e.g.,
-     amibiguous DNA encoding.
-  */
-
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-    {
-      for(i = pr->partitionData[model]->lower; i < pr->partitionData[model]->upper; i++)
-        {
-           if(isInformative(tr, pr->partitionData[model]->dataType, i))
-             informative[i] = 1;
-           else
-             {
-               informative[i] = 0;
-               number++;
-             }  
-        }      
-    }
-
- 
-  /* printf("Uninformative Patterns: %d\n", number); */
-}
-
-
-static void reorderNodes(pllInstance *tr, nodeptr *np, nodeptr p, int *count)
-{
-  int i, found = 0;
-
-  if((p->number <= tr->mxtips))    
-    return;
-  else
-    {              
-      for(i = tr->mxtips + 1; (i <= (tr->mxtips + tr->mxtips - 1)) && (found == 0); i++)
-        {
-          if (p == np[i] || p == np[i]->next || p == np[i]->next->next)
-            {
-              if(p == np[i])                           
-                tr->nodep[*count + tr->mxtips + 1] = np[i];                             
-              else
-                {
-                  if(p == np[i]->next)            
-                    tr->nodep[*count + tr->mxtips + 1] = np[i]->next;                      
-                  else             
-                    tr->nodep[*count + tr->mxtips + 1] = np[i]->next->next;                                 
-                }
-
-              found = 1;                     
-              *count = *count + 1;
-            }
-        }            
-     
-      assert(found != 0);
-
-      reorderNodes(tr, np, p->next->back, count);     
-      reorderNodes(tr, np, p->next->next->back, count);                
-    }
-}
-
-
-
-static void nodeRectifierPars(pllInstance *tr)
-{
-  nodeptr *np = (nodeptr *)rax_malloc(2 * tr->mxtips * sizeof(nodeptr));
-  int i;
-  int count = 0;
-  
-  tr->start       = tr->nodep[1];
-  tr->rooted      = PLL_FALSE;
-
-  /* TODO why is tr->rooted set to PLL_FALSE here ?*/
-  
-  for(i = tr->mxtips + 1; i <= (tr->mxtips + tr->mxtips - 1); i++)
-    np[i] = tr->nodep[i];           
-  
-  reorderNodes(tr, np, tr->start->back, &count); 
-
- 
-  rax_free(np);
-}
-
-
-  
-static void compressDNA(pllInstance *tr, partitionList *pr, int *informative)
-{
-  size_t
-    totalNodes,
-    i,
-    model;
-   
-  totalNodes = 2 * (size_t)tr->mxtips;
-
- 
-
-  for(model = 0; model < (size_t) pr->numberOfPartitions; model++)
-    {
-      size_t
-        k,
-        states = (size_t)pr->partitionData[model]->states,
-        compressedEntries,
-        compressedEntriesPadded,
-        entries = 0, 
-        lower = pr->partitionData[model]->lower,
-        upper = pr->partitionData[model]->upper;
-
-      parsimonyNumber 
-        **compressedTips = (parsimonyNumber **)rax_malloc(states * sizeof(parsimonyNumber*)),
-        *compressedValues = (parsimonyNumber *)rax_malloc(states * sizeof(parsimonyNumber));
-      
-      for(i = lower; i < upper; i++)    
-        if(informative[i])
-          entries += (size_t)tr->aliaswgt[i];     
-  
-      compressedEntries = entries / PLL_PCF;
-
-      if(entries % PLL_PCF != 0)
-        compressedEntries++;
-
-#if (defined(__SSE3) || defined(__AVX))
-      if(compressedEntries % INTS_PER_VECTOR != 0)
-        compressedEntriesPadded = compressedEntries + (INTS_PER_VECTOR - (compressedEntries % INTS_PER_VECTOR));
-      else
-        compressedEntriesPadded = compressedEntries;
-#else
-      compressedEntriesPadded = compressedEntries;
-#endif     
-
-      
-      rax_posix_memalign ((void **) &(pr->partitionData[model]->parsVect), PLL_BYTE_ALIGNMENT, (size_t)compressedEntriesPadded * states * totalNodes * sizeof(parsimonyNumber));
-     
-      for(i = 0; i < compressedEntriesPadded * states * totalNodes; i++)      
-        pr->partitionData[model]->parsVect[i] = 0;
-
-      for(i = 0; i < (size_t)tr->mxtips; i++)
-        {
-          size_t
-            w = 0,
-            compressedIndex = 0,
-            compressedCounter = 0,
-            index = 0;
-
-          for(k = 0; k < states; k++)
-            {
-              compressedTips[k] = &(pr->partitionData[model]->parsVect[(compressedEntriesPadded * states * (i + 1)) + (compressedEntriesPadded * k)]);
-              compressedValues[k] = 0;
-            }                
-              
-          for(index = lower; index < (size_t)upper; index++)
-            {
-              if(informative[index])
-                {
-                  const unsigned int 
-                    *bitValue = getBitVector(pr->partitionData[model]->dataType);
-
-                  parsimonyNumber 
-                    value = bitValue[tr->yVector[i + 1][index]];          
-              
-                  for(w = 0; w < (size_t)tr->aliaswgt[index]; w++)
-                    {      
-                      for(k = 0; k < states; k++)
-                        {
-                          if(value & mask32[k])
-                            compressedValues[k] |= mask32[compressedCounter];
-                        }
-                     
-                      compressedCounter++;
-                  
-                      if(compressedCounter == PLL_PCF)
-                        {
-                          for(k = 0; k < states; k++)
-                            {
-                              compressedTips[k][compressedIndex] = compressedValues[k];
-                              compressedValues[k] = 0;
-                            }                    
-                          
-                          compressedCounter = 0;
-                          compressedIndex++;
-                        }
-                    }
-                }
-            }
-                           
-          for(;compressedIndex < compressedEntriesPadded; compressedIndex++)
-            {   
-              for(;compressedCounter < PLL_PCF; compressedCounter++)              
-                for(k = 0; k < states; k++)
-                  compressedValues[k] |= mask32[compressedCounter];               
-          
-              for(k = 0; k < states; k++)
-                {
-                  compressedTips[k][compressedIndex] = compressedValues[k];
-                  compressedValues[k] = 0;
-                }                     
-              
-              compressedCounter = 0;
-            }           
-        }               
-  
-      pr->partitionData[model]->parsimonyLength = compressedEntriesPadded;
-
-      rax_free(compressedTips);
-      rax_free(compressedValues);
-    }
-  
-  rax_posix_memalign ((void **) &(tr->parsimonyScore), PLL_BYTE_ALIGNMENT, sizeof(unsigned int) * totalNodes);  
-          
-  for(i = 0; i < totalNodes; i++) 
-    tr->parsimonyScore[i] = 0;
-}
-
-
-
-static void stepwiseAddition(pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q)
-{            
-  nodeptr 
-    r = q->back;
-
-  unsigned int 
-    mp;
-  
-  int 
-    counter = 4;
-  
-  p->next->back = q;
-  q->back = p->next;
-
-  p->next->next->back = r;
-  r->back = p->next->next;
-   
-  computeTraversalInfoParsimony(p, tr->ti, &counter, tr->mxtips, PLL_FALSE);              
-  tr->ti[0] = counter;
-  tr->ti[1] = p->number;
-  tr->ti[2] = p->back->number;
-    
-  mp = evaluateParsimonyIterativeFast(tr, pr);
-  
-  if(mp < tr->bestParsimony)
-    {    
-      tr->bestParsimony = mp;
-      tr->insertNode = q;     
-    }
- 
-  q->back = r;
-  r->back = q;
-   
-  if(q->number > tr->mxtips && tr->parsimonyScore[q->number] > 0)
-    {         
-      stepwiseAddition(tr, pr, p, q->next->back);
-      stepwiseAddition(tr, pr, p, q->next->next->back);
-    }
-}
-
-
-
-void allocateParsimonyDataStructures(pllInstance *tr, partitionList *pr)
-{
-  int 
-    i,
-    *informative = (int *)rax_malloc(sizeof(int) * (size_t)tr->originalCrunchedLength);
- 
-  determineUninformativeSites(tr, pr, informative);
-
-  compressDNA(tr, pr, informative);
-
-  for(i = tr->mxtips + 1; i <= tr->mxtips + tr->mxtips - 1; i++)
-    {
-      nodeptr 
-        p = tr->nodep[i];
-
-      p->xPars = 1;
-      p->next->xPars = 0;
-      p->next->next->xPars = 0;
-    }
-
-  tr->ti = (int*)rax_malloc(sizeof(int) * 4 * (size_t)tr->mxtips);  
-
-  rax_free(informative); 
-}
-
-void pllFreeParsimonyDataStructures(pllInstance *tr, partitionList *pr)
-{
-  size_t 
-    model;
-
-  rax_free(tr->parsimonyScore);
-  
-  for(model = 0; model < (size_t) pr->numberOfPartitions; ++model)
-    rax_free(pr->partitionData[model]->parsVect);
-  
-  rax_free(tr->ti);
-}
-
-
-void pllMakeParsimonyTreeFast(pllInstance *tr, partitionList *pr, int sprDist)
-{   
-  nodeptr  
-    p, 
-    f;    
-
-  int 
-    i, 
-    nextsp,
-    *perm        = (int *)rax_malloc((size_t)(tr->mxtips + 1) * sizeof(int));  
-
-  unsigned int 
-    randomMP, 
-    startMP;         
-  
-  assert(!tr->constrained);
-
-  makePermutationFast(perm, tr->mxtips, tr);
-  
-  tr->ntips = 0;    
-  
-  tr->nextnode = tr->mxtips + 1;       
-  
-  buildSimpleTree(tr, pr, perm[1], perm[2], perm[3]);
-  
-  f = tr->start;       
-  
-  while(tr->ntips < tr->mxtips) 
-    {   
-      nodeptr q;
-      
-      tr->bestParsimony = INT_MAX;
-      nextsp = ++(tr->ntips);             
-      p = tr->nodep[perm[nextsp]];                 
-      q = tr->nodep[(tr->nextnode)++];
-      p->back = q;
-      q->back = p;
-        
-      if(tr->grouped)
-        {
-          int 
-            number = p->back->number;            
-
-          tr->constraintVector[number] = -9;
-        }
-          
-      stepwiseAddition(tr, pr, q, f->back);
-      
-      {
-        nodeptr   
-          r = tr->insertNode->back;
-        
-        int counter = 4;
-        
-        hookupDefault(q->next,       tr->insertNode);
-        hookupDefault(q->next->next, r);
-        
-        computeTraversalInfoParsimony(q, tr->ti, &counter, tr->mxtips, PLL_FALSE);              
-        tr->ti[0] = counter;
-        
-        newviewParsimonyIterativeFast(tr, pr);
-      }
-    }    
-  
-  nodeRectifierPars(tr);
-  
-  randomMP = tr->bestParsimony;        
-  
-  do
-    {
-      startMP = randomMP;
-      nodeRectifierPars(tr);
-      for(i = 1; i <= tr->mxtips + tr->mxtips - 2; i++)
-        {
-          rearrangeParsimony(tr, pr, tr->nodep[i], 1, sprDist, PLL_FALSE);
-          if(tr->bestParsimony < randomMP)
-            {           
-              restoreTreeRearrangeParsimony(tr, pr);
-              randomMP = tr->bestParsimony;
-            }
-        }                          
-    }
-  while(randomMP < startMP);
-  
-  rax_free(perm);
-} 
diff --git a/pllrepo/src/genericParallelization.c b/pllrepo/src/genericParallelization.c
deleted file mode 100644
index 1454b5e..0000000
--- a/pllrepo/src/genericParallelization.c
+++ /dev/null
@@ -1,2283 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file genericParallelization.c
- */
-#include "mem_alloc.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-#include <limits.h>
-
-#ifdef MEASURE_TIME_PARALLEL
-#include <time.h>
-#endif
-
-#include <assert.h>
-
-#include "genericParallelization.h"
-#include "pllInternal.h"
-#include "pll.h"
-
-/** @file genericParallelization.c
-    
-    @brief Generic master-worker parallelization with either pthreads or MPI. 
-    
-    Worker threads/processes mostly work on a local
-    tree. Implementationwise, MPI operations are abstracted as good as
-    possible via defines (that translate to no-ops or memcpy-calls in
-    the pthreads version).
-
-    @todo the code still contains many memory copy operations that
-    could be executed more efficiently in-place  
-*/
-
-
-
-void perSiteLogLikelihoodsPthreads(pllInstance *tr, partitionList *pr, double *lhs, int n, int tid);
-void broadcastAfterRateOpt(pllInstance *tr, pllInstance *localTree, partitionList *pr, int n, int tid);
-void branchLength_parallelReduce(pllInstance *tr, double *dlnLdlz,  double *d2lnLdlz2, int numBranches );
-void pllMasterPostBarrier(pllInstance *tr, partitionList *pr, int jobType);
-static void distributeYVectors(pllInstance *localTree, pllInstance *tr, partitionList *localPr);
-static void distributeWeights(pllInstance *localTree, pllInstance *tr, partitionList *localPr);
-static pllBoolean execFunction(pllInstance *tr, pllInstance *localTree, partitionList *pr, partitionList *localPr, int tid, int n);
-
-static void *likelihoodThread(void *tData); 
-
-static void multiprocessorScheduling(pllInstance * tr, partitionList *pr, int tid);
-
-static void computeFraction(partitionList *localPr, int tid, int n);
-static void computeFractionMany(partitionList *localPr, int tid);
-static void initializePartitionsMaster(pllInstance *tr, pllInstance *localTree, partitionList *pr, partitionList *localPr, int tid, int n);
-
-#ifdef _FINE_GRAIN_MPI
-static char* addBytes(char *buf, void *toAdd, size_t numBytes); 
-static char* popBytes(char *buf, void *result, size_t numBytes); 
-static void defineTraversalInfoMPI(void);
-static pllBoolean pllWorkerTrap(pllInstance *tr, partitionList *pr);
-#endif
-
-#ifdef _USE_PTHREADS
-static pthread_t *threads;
-static threadData *tData;
-#endif
-
-extern volatile int jobCycle; 
-extern volatile int threadJob;          /**< current job to be done by worker threads/processes */
-extern pllBoolean treeIsInitialized; 
-
-#ifdef MEASURE_TIME_PARALLEL
-extern double masterTimePerPhase; 
-double timeBuffer[NUM_PAR_JOBS]; 
-double timePerRegion[NUM_PAR_JOBS]; 
-#endif
-
-extern char* getJobName(int tmp); 
-
-//extern double *globalResult; 
-extern volatile char *barrierBuffer;
-
-
-#ifdef _FINE_GRAIN_MPI
-extern MPI_Datatype TRAVERSAL_MPI; 
-
-/** @brief Pthreads helper function for adding bytes to communication buffer.
-
-    Copy from \toAdd to \a buf \a numBytes bytes
-
-    @param buf
-      Where to place bytes
-
-    @pram toAdd
-      Where to copy them from
-
-    @para numBytes
-      How many to copy
-
-    @return
-      Pointer to the end of placed data in communication buffer (first free slot)
- */ 
-static char* addBytes(char *buf, void *toAdd, size_t numBytes)
-{
-  memcpy(buf, toAdd, numBytes);  
-  return buf + numBytes;  
-}
-
-/** @brief Pthreads helper function for removing bytes from communication buffer
-    
-    Copies \a numBytes from communication buffer \a buf to some local buffer \a buf
-
-    @param buf
-      Where to store the bytes
-
-    @param result
-      Where to copy from
-
-    @param numBytes
-      How many to copy
-    
-    @return
-      Pointer to the end of read data in communication buffer (first free slot)
- */ 
-static char* popBytes(char *buf, void *result, size_t numBytes)
-{
-  memcpy(result, buf, numBytes); 
-  return buf + numBytes;   
-}
-
-/** @brief Lock the MPI slave processes prior allocating partitions
-
-    MPI slave processes are locked and wait until the master process
-    has read the number of partitions, which it then broadcasts
-    to slaves, effectively unlocking them. The slave processes will
-    then allocate their own data structures and be locked in the
-    likelihood function.
-
-    @param tr
-      PLL instance
-    
-    @todo
-      This function should not be called by the user. It is called
-      at \a pllCreateInstance. Probably this function should be removed
-      and inline code be placed in \a pllCreateInstance.
-*/
-void pllLockMPI (pllInstance * tr)
-{
-  int numberOfPartitions;
-  partitionList * pr;
-
-  if (!MASTER_P) 
-   {
-     //MPI_Bcast (&numberOfPartitions, 1, MPI_INT, MPI_ROOT, MPI_COMM_WORLD);
-     MPI_Bcast (&numberOfPartitions, 1, MPI_INT, 0, MPI_COMM_WORLD);
-     pr = (partitionList *) rax_calloc (1, sizeof (partitionList));
-     pr->numberOfPartitions = numberOfPartitions;
-
-     pllWorkerTrap (tr, pr);
-     MPI_Barrier (MPI_COMM_WORLD);
-     MPI_Finalize ();
-     exit(0);
-   }
-}
-
-/** Finalize MPI run
-
-    Finalizes MPI run by synchronizing all processes (master + slaves) with a
-    barrier so that all free their allocated resources. Then \a MPI_Finalize ()
-    is called.
-
-    @todo
-      Similarly as with the \a pllLockMPI function, this should not be called
-      by the user, but it is called implicitly at the end of \a pllDestroyInstance.
-      Probably this function should be removed and inline code be placed in
-      \a pllDestroyInstance.
-*/
-void pllFinalizeMPI (void)
-{
-  MPI_Barrier (MPI_COMM_WORLD);
-  MPI_Finalize ();
-}
-
-/**
-   @brief Sets up the MPI environment.  
-
-   Calls the \a MPI_Init function and makes sure all processes store
-   their process ID and the total number of processes, using a barrier.
-   
-   @note this should be the first call that is executed in your main
-   method.
-   
-   @param argc   
-     Address of argc from main
-   @param argv   
-     Address of argv from main
- */
-void pllInitMPI(int * argc, char **argv[])
-{  
-  MPI_Init(argc, argv);
-  MPI_Comm_rank(MPI_COMM_WORLD, &processID);
-  MPI_Comm_size(MPI_COMM_WORLD, &processes);
-
-  /* if(MASTER_P) */
-  /*   printf("\nThis is RAxML Process Number: %d (MASTER)\n", processID); */
-  MPI_Barrier(MPI_COMM_WORLD);
-
-}
-
-
-/**
-   @brief Traps worker MPI processes.    
-   
-   @note  This function should be called immediately after initMPI()
-
-   @param tr 
-     PLL instance 
-
-   @param pr
-     List of partitions
-
-   @return
-     Returns /b PLL_FALSE if the callee was the master thread/process, otherwise /b PLL_TRUE
- */ 
-static pllBoolean pllWorkerTrap(pllInstance *tr, partitionList *pr)
-{
-  /// @note for the broadcasting, we need to, if the tree structure has already been initialized 
-  treeIsInitialized = PLL_FALSE; 
-
-  if(NOT MASTER_P) 
-    {
-      threadData tData; 
-      tData.tr = tr; 
-      tData.threadNumber = processID;
-      tData.pr = pr;
-      
-      likelihoodThread(&tData);
-
-      /* notice: the next call MUST be the return call from the main method */
-      return PLL_TRUE; 
-    }
-  return PLL_FALSE; 
-}
-
-
-#define ELEMS_IN_TRAV_INFO  9
-/** @brief Create a datastructure for sending the traversal descriptor.
-    
-    @note This seems to be a very safe method to define your own mpi
-   datatypes (often there are problems with padding). But it is not
-   entirely for the weak of heart...
- */ 
-static void defineTraversalInfoMPI (void)
-{
-  MPI_Datatype *result  = &TRAVERSAL_MPI; 
-
-  int i ; 
-  MPI_Aint base; 
-  int blocklen[ELEMS_IN_TRAV_INFO+1] = {1, 1, 1, 1, PLL_NUM_BRANCHES, PLL_NUM_BRANCHES, 1,1,1,1}; 
-  MPI_Aint disp[ELEMS_IN_TRAV_INFO+1];
-  MPI_Datatype type[ELEMS_IN_TRAV_INFO+1] = {MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_DOUBLE, MPI_DOUBLE, MPI_INT, MPI_INT, MPI_INT, MPI_UB}; 
-  traversalInfo desc[2]; 
-
-  MPI_Get_address( desc, disp);
-  MPI_Get_address( &(desc[0].pNumber), disp + 1 );
-  MPI_Get_address( &(desc[0].qNumber), disp + 2 );  
-  MPI_Get_address( &(desc[0].rNumber), disp + 3); 
-  MPI_Get_address( desc[0].qz, disp + 4 );
-  MPI_Get_address( desc[0].rz, disp + 5 );
-  MPI_Get_address( &(desc[0].slot_p), disp + 6);
-  MPI_Get_address( &(desc[0].slot_q), disp + 7);
-  MPI_Get_address( &(desc[0].slot_r), disp + 8);
-  MPI_Get_address( desc + 1, disp + 9);
-
-  base = disp[0]; 
-  for(i = 0; i < ELEMS_IN_TRAV_INFO+1; ++i)
-    disp[i] -= base;
-
-  MPI_Type_create_struct( ELEMS_IN_TRAV_INFO+1 , blocklen, disp, type, result);
-  MPI_Type_commit(result);
-}
-
-
-#endif
-
-
-/********************/
-/* PTHREAD-SPECIFIC */
-/********************/
-#ifdef _USE_PTHREADS
-
-#ifndef _PORTABLE_PTHREADS
-/** @brief Pins a thread to a core (for efficiency). 
-
-    This is a non-portable function that works only on some linux distributions of pthreads.
-    It sets the affinity of each thread to a specific core so that the performance is not
-    degraded due to threads migration.
-
-    @note 
-      It is only called if \a _PORTABLE_PTHREADS is not defined
-
-    @param tid the thread id
- */ 
-void pinToCore(int tid)
-{
-  static int nextCore = 0;
-
-  cpu_set_t cpuset;
-
-  CPU_ZERO(&cpuset);    
-  CPU_SET(nextCore++, &cpuset);
-
-  if(pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset) != 0)
-    {
-      assert(0);
-    }
-}
-#endif
-
-/**  Start PThreads
-
-     Start JOINABLE threads by executing \a pthread_create. The threads
-     are attached to the \a pllLikelihoodThread function
-
-     @param tr
-       PLL instance
-
-     @param pr
-       List of partitions
-
-     @todo
-       This function should never be called by the user. It is called
-       implicitly at \a pllInitModel. Perhaps we should add a check
-       or inline the code
- */ 
-void pllStartPthreads (pllInstance *tr, partitionList *pr)
-{
-  pthread_attr_t attr;
-  int rc, t;
-  treeIsInitialized = PLL_FALSE; 
-
-  jobCycle        = 0;
-  threadJob       = 0;
-
-  /* printf("\nThis is the RAxML Master Pthread\n");   */
-
-#if (NOT defined(_USE_PTHREADS) && defined( MEASURE_TIME_PARALLEL))
-  timeBuffer = rax_calloc(NUM_PAR_JOBS * tr->numberOfThreads, sizeof(double)); 
-#endif
-
-  pthread_attr_init(&attr);
-  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
-
-  threads    = (pthread_t *)rax_malloc((size_t)tr->numberOfThreads * sizeof(pthread_t));
-  tData      = (threadData *)rax_malloc((size_t)tr->numberOfThreads * sizeof(threadData));
-
-  barrierBuffer            = (volatile char *)  rax_malloc(sizeof(volatile char)   *  (size_t)tr->numberOfThreads);
-
-  for(t = 0; t < tr->numberOfThreads; t++)
-    barrierBuffer[t] = 0;
-
-  for(t = 1; t < tr->numberOfThreads; t++)
-    {
-      tData[t].tr  = tr;
-      tData[t].pr  = pr;
-      tData[t].threadNumber = t;
-      rc = pthread_create(&threads[t], &attr, likelihoodThread, (void *)(&tData[t]));
-      if(rc)
-	{
-	  printf("ERROR; return code from pthread_create() is %d\n", rc);
-	  exit(-1);
-	}
-    }
-  pthread_attr_destroy (&attr);
-}
-
-/** Stop PThread
-    
-    Stop threads by \a pthread_join
-
-    @param  tr
-      PLL instance
-
-    @todo
-      This function should never be called by the user. It is implicitly called
-      at \a pllPartitionsDestroy. We should inline the code
-*/
-void pllStopPthreads (pllInstance * tr)
-{
-  int i;
-
-  for (i = 1; i < tr->numberOfThreads; ++ i)
-   {
-     pthread_join (threads[i], NULL);
-   }
- 
-  rax_free (threads);
-  rax_free (tData);
-  rax_free ((void *)barrierBuffer);
-  rax_free (globalResult);
-
-}
-#endif
-
-
-/** Compute per-site log likelihoods (PThreads version) 
-
-    Worker threads evaluate the likelihood on their sites
-
-    @param tr 
-      Tree instance
-
-    @param lhs
-      Likelihood array
-
-    @param n
-      Number of threads
-
-    @param tid
-      Thread id
- */ 
-void perSiteLogLikelihoodsPthreads(pllInstance *tr, partitionList *pr, double *lhs, int n, int tid)
-{
-  size_t 
-    model, 
-    i;
-
-  for(model = 0; model < (size_t)pr->numberOfPartitions; model++)
-    {      
-      size_t 
-	localIndex = 0;
-
-      /* decide if this partition is handled by the thread when -Q is ativated 
-	 or when -Q is not activated figure out which sites have been assigned to the 
-	 current thread */
-
-      pllBoolean 
-	execute = ((tr->manyPartitions && isThisMyPartition(pr, tid, model)) || (!tr->manyPartitions));
-
-      /* if the entire partition has been assigned to this thread (-Q) or if -Q is not activated 
-	 we need to compute some per-site log likelihoods with thread tid for this partition */
-
-      if(execute)
-	for(i = (size_t)(pr->partitionData[model]->lower);  i < (size_t)(pr->partitionData[model]->upper); i++)
-	  {
-	    /* if -Q is active we compute all per-site log likelihoods for the partition,
-	       othwerise we only compute those that have been assigned to thread tid 
-	       using the cyclic distribution scheme */
-
-	    if(tr->manyPartitions || (i % n == (size_t)tid))
-	      {
-		double 
-		  l;
-
-		/* now compute the per-site log likelihood at the current site */
-
-		switch(tr->rateHetModel)
-		  {
-		  case PLL_CAT:
-		    l = evaluatePartialGeneric (tr, pr, localIndex, pr->partitionData[model]->perSiteRates[pr->partitionData[model]->rateCategory[localIndex]], model);
-		    break;
-		  case PLL_GAMMA:
-		    l = evaluatePartialGeneric (tr, pr, localIndex, 1.0, model);
-		    break;
-		  default:
-		    assert(0);
-		  }
-
-		/* store it in an array that is local in memory to the current thread,
-		   see function collectDouble() in axml.c for understanding how we then collect these 
-		   values stored in local arrays from the threads */
-
-		lhs[i] = l;
-
-		localIndex++;
-	      }
-	  }
-    }
-}
-
-/** @brief Check if a partition is assign to a thread/process.
-
-    Checks whether partition \a model from partition list \a localPr is
-    assigned to be processed by process/thread with id \a tid.
-
-    @param localTree
-      Local PLL instance
-
-    @param tid 
-      Thread/Process id
-
-    @param model
-      Partition number
- */ 
-pllBoolean isThisMyPartition(partitionList *localPr, int tid, int model)
-{ 
-  if(localPr->partitionData[model]->partitionAssignment == tid)
-    return PLL_TRUE;
-  else
-    return PLL_FALSE;
-}
-
-/** @brief Computes partition size for all partitions (in case full partitions are assigns to workers). 
-
-    @param localPr the local partitions instance
-    
-    @param tid thread id    
- */ 
-static void computeFractionMany(partitionList *localPr, int tid)
-{
-  int
-    sites = 0;
-
-  int   
-    model;
-
-  for(model = 0; model < localPr->numberOfPartitions; model++)
-    {
-      if(isThisMyPartition(localPr, tid, model))
-	{	 
-    	  localPr->partitionData[model]->width = localPr->partitionData[model]->upper - localPr->partitionData[model]->lower;
-	  sites += localPr->partitionData[model]->width;
-	}
-      else       	  
-    	  localPr->partitionData[model]->width = 0;
-    }
-
-
-}
-
-
-/** @brief Computes partition size for all partitions (for cyclic distribution of sites)
-    
-    @param localPr the local partitions instance
-    @param tid thread id
-    @param n number of workers
- */ 
-static void computeFraction(partitionList *localPr, int tid, int n)
-{
-  int
-    i,
-    model;
-
-  for(model = 0; model < localPr->numberOfPartitions; model++)
-    {
-      int width = 0;
-
-      for(i = localPr->partitionData[model]->lower; i < localPr->partitionData[model]->upper; i++)
-	if(i % n == tid)
-	  width++;
-      localPr->partitionData[model]->width = width;
-    }
-}
-
-
-
-/** @brief Compare partition sizes. 
-    @param p1 pointer to a partition
-    @param p2 pointer to another partition
- */ 
-static int partCompare(const void *p1, const void *p2)
-{
-  partitionType 
-    *rc1 = (partitionType *)p1,
-    *rc2 = (partitionType *)p2;
-
-  int 
-    i = rc1->partitionLength,
-    j = rc2->partitionLength;
-
-  if (i > j)
-    return (-1);
-  if (i < j)
-    return (1);
-  return (0);
-}
-
-
-/** @brief Top-level function for the multi processor scheduling
-    scheme (assigns full partitions to workers).
-    
-   tr->manyPartitions is set to PLL_TRUE if the user has indicated via -Q
-   that there are substantially more partitions than threads/cores
-   available. In that case we do not distribute sites from each
-   partition in a cyclic fashion to the cores , but distribute entire
-   partitions to cores.  Achieving a good balance of alignment sites
-   to cores boils down to the multi-processor scheduling problem known
-   from theoretical comp. sci.  which is NP-complete.  We have
-   implemented very simple "standard" heuristics for solving the
-   multiprocessor scheduling problem that turn out to work very well
-   and are cheap to compute.
-   
-   @param pr 
-     List of partitions
-
-   @param tid
-     Id of current process/thread 
-*/
-static void multiprocessorScheduling(pllInstance * tr, partitionList *pr, int tid)
-{
-  int 
-    s,
-    model,
-    modelStates[2] = {4, 20},
-    numberOfPartitions[2] = {0 , 0},
-      arrayLength = sizeof(modelStates) / sizeof(int);
-
-      /* check that we have not addedd any new models for data types with a different number of states
-	 and forgot to update modelStates */
-
-      for(model = 0; model < pr->numberOfPartitions; model++)
-	{        
-	  pllBoolean 
-	    exists = PLL_FALSE;
-
-	  for(s = 0; s < arrayLength; s++)
-	    {
-	      exists = exists || (pr->partitionData[model]->states == modelStates[s]);
-	      if(pr->partitionData[model]->states == modelStates[s])
-		numberOfPartitions[s] += 1;
-	    }
-
-	  assert(exists);
-	}
-
-      for(s = 0; s < arrayLength; s++)
-	{
-	  if(numberOfPartitions[s] > 0)
-	    {
-	      size_t   
-		checkSum = 0,
-		sum = 0;
-
-	      int    
-		i,
-		k,
-#ifndef _FINE_GRAIN_MPI
-		n = tr->numberOfThreads,
-#else
-		n = processes,
-#endif
-		p = numberOfPartitions[s],    
-		*assignments = (int *)rax_calloc((size_t)n, sizeof(int));  
-
-	      partitionType 
-		*pt = (partitionType *)rax_malloc(sizeof(partitionType) * (size_t)p);
-
-
-
-	      for(i = 0, k = 0; i < pr->numberOfPartitions; i++)
-		{
-		  if(pr->partitionData[i]->states == modelStates[s])
-		    {
-		      pt[k].partitionNumber = i;
-		      pt[k].partitionLength = pr->partitionData[i]->upper - pr->partitionData[i]->lower;
-		      sum += (size_t)pt[k].partitionLength;
-		      k++;
-		    }
-		}
-
-	      assert(k == p);
-
-	      qsort(pt, p, sizeof(partitionType), partCompare);    
-
-	      for(i = 0; i < p; i++)
-		{
-		  int 
-		    k, 
-		    min = INT_MAX,
-		    minIndex = -1;
-
-		  for(k = 0; k < n; k++)	
-		    if(assignments[k] < min)
-		      {
-			min = assignments[k];
-			minIndex = k;
-		      }
-
-		  assert(minIndex >= 0);
-
-		  assignments[minIndex] +=  pt[i].partitionLength;
-		  assert(pt[i].partitionNumber >= 0 && pt[i].partitionNumber < pr->numberOfPartitions);
-		  pr->partitionData[pt[i].partitionNumber]->partitionAssignment = minIndex;
-		}
-
-              
-              /* Process i gets assignments[i] sites for modelStates[s] state model */
-
-	      for(i = 0; i < n; i++)
-		checkSum += (size_t)assignments[i];
-
-	      assert(sum == checkSum);
-
-	      rax_free(assignments);
-	      rax_free(pt);
-	    }
-	}
-}
-
-
-
-/** @brief Reduce the first and second derivative of the likelihood
-    function.
-    
-    We collect the first and second derivatives from the various
-    threads and sum them up. It's similar to what we do in
-    pllEvaluateGeneric() with the only difference that we have to collect
-    two values (firsrt and second derivative) instead of onyly one (the
-    log likelihood
-
-   @warning operates on global reduction buffers \a globalResult
-   
-   @param tr tree 
-   @param dlnLdlz first derivative
-   @param d2lnLdlz2 second derivative
-*/
-void branchLength_parallelReduce(pllInstance *tr, double *dlnLdlz,  double *d2lnLdlz2, int numBranches )
-{
-#ifdef _REPRODUCIBLE_MPI_OR_PTHREADS
-
-  /* only the master executes this  */
-  assert(tr->threadID == 0); 
-  
-  int b; 
-  int t; 
-  for(b = 0; b < numBranches; ++b)
-    {
-      dlnLdlz[b] = 0; 
-      d2lnLdlz2[b] = 0; 
-
-      for(t = 0; t < tr->numberOfThreads; ++t)
-	{
-	  dlnLdlz[b] += globalResult[t * numBranches * 2 + b ];
-	  d2lnLdlz2[b] += globalResult[t * numBranches * 2 + numBranches + b];
-	}
-    }
-#else 
-  memcpy(dlnLdlz, globalResult, sizeof(double) * numBranches);
-  memcpy(d2lnLdlz2, globalResult + numBranches, sizeof(double) * numBranches);
-#endif
-}
-
-
-
-/** @brief Read from buffer or writes rates into buffer.  Return
-    number of elems written.
-
-    If \a read is set to \b PLL_TRUE, then the contents \a srcTar are
-    copied to \a buf. Otherwise, the contents of \a buf are moved to
-    \a srcTar.
-   
-   @param buf 
-     Buffer
-
-   @param srcTar 
-     Pointer to either source or destination array
-
-   @param tr
-     PLL instance
-
-   @param n number of workers
-
-   @param tid process id
-
-   @param read 
-     If read-mode then set to \b PLL_TRUE
-
-   @param countOnly
-     if \b PLL_TRUE, simply return the number of elements
-*/
-static int doublesToBuffer(double *buf, double *srcTar, pllInstance *tr, partitionList *pr, int n, int tid, pllBoolean read, pllBoolean countOnly)
-{
-  int 
-    model,
-    i;
-  double 
-    *initPtr = buf; 
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-    {
-      if(tr->manyPartitions)
-	{
-	  if(isThisMyPartition(pr, tid, model))
-	    for(i = pr->partitionData[model]->lower; i < pr->partitionData[model]->upper; i++)
-	      {
-		if(NOT countOnly)
-		  {
-		    if(read)
-		      *buf = srcTar[i]; 
-		    else 
-		      srcTar[i] = *buf; 
-		  }
-		buf++;
-	      }	  
-	}      
-      else
-	{
-	  for(i = pr->partitionData[model]->lower; i < pr->partitionData[model]->upper; i++)
-	    if(i % n == tid)
-	      {
-		if(NOT countOnly)
-		  {
-		    if(read)
-		      *buf = srcTar[i];
-		    else 
-		      srcTar[i] = *buf; 
-		  }
-		buf++; 
-	      }
-	}
-    }
-  
-  return buf - initPtr; 
-}
-
-
-
-
-/** @brief broadcast rates after rate optimization. 
-    
-    @param tre Library instance
-    @param localTree local library instance 
-    @param n number of workers 
-    @param tid worker id 
-    
-    @todo mpi_alltoallv/w may be more efficient, but it is a hell to set up
- */ 
-void broadcastAfterRateOpt(pllInstance *tr, pllInstance *localTree, partitionList *pr, int n, int tid)
-{				  
-  int
-    num1 = 0,
-    num2 = 0,
-    num3 = 0, 
-    i ; 
-    
-  for(i = 0; i < n; ++i)
-    {
-      double
-	allBuf[tr->originalCrunchedLength * 3],
-	buf1[tr->originalCrunchedLength],
-	buf2[tr->originalCrunchedLength], 
-	buf3[tr->originalCrunchedLength]; 
-
-#ifdef _USE_PTHREADS
-      if(i != tid)
-	continue; 
-#endif
-      int numDouble = 0; 
-      
-      /* extract doubles  */
-
-      num1 = doublesToBuffer(buf1, localTree->patrat, tr, pr, n,i, PLL_TRUE, i!= tid);
-      num2 = doublesToBuffer(buf2, localTree->patratStored, tr, pr, n,i, PLL_TRUE, i!= tid);
-      num3 = doublesToBuffer(buf3, localTree->lhs, tr, pr, n,i, PLL_TRUE, i!= tid);
-
-      /* printf("%d + %d + %d\n", num1, num2, num3);  */
-
-      numDouble += num1 + num2 + num3; 
-
-      /* copy doubles  */
-      
-      memcpy(allBuf, buf1, num1 * sizeof(double)); 
-      memcpy(allBuf + num1, buf2, num2 * sizeof(double)); 
-      memcpy(allBuf + (num1 + num2) , buf3, num3 * sizeof(double)); 
-
-      BCAST_BUF(allBuf, numDouble, MPI_DOUBLE, i); 
-
-      memcpy(buf1, allBuf, num1 * sizeof(double)); 
-      memcpy(buf2, allBuf + num1, num2 * sizeof(double)); 
-      memcpy(buf3, allBuf + (num1 + num2), num3 * sizeof(double)); 
-      
-      /* re-insert doubles  */
-      int assertCtr = 0; 
-      assertCtr += doublesToBuffer(buf1, tr->patrat, tr, pr, n,i,PLL_FALSE, PLL_FALSE);
-      assertCtr += doublesToBuffer(buf2, tr->patratStored, tr, pr, n,i,PLL_FALSE, PLL_FALSE);
-      assertCtr += doublesToBuffer(buf3, tr->lhs, tr, pr, n,i,PLL_FALSE, PLL_FALSE);
-
-      assert(assertCtr == numDouble); 
-    }
-}
-
-
-/** @brief Collect doubles from workers to master.
- 
-    
-
-    @param dst destination array
-    @param src source array
-    @param tr library instance 
-    @param n number of workers 
-    @param tid worker id 
- */
-static void collectDouble(double *dst, double *src, pllInstance *tr, partitionList *pr, int n, int tid)
-{
-#ifdef _FINE_GRAIN_MPI    
-  int
-    assertNum = 0,
-    i, 
-    displacements[tr->numberOfThreads];
-  double 
-    buf[tr->originalCrunchedLength],
-    resultBuf[tr->originalCrunchedLength]; 
-
-  /* NOTE: This was moved here because it was an additional unnecessary move for the PTHREADS version. I didnt
-  have time to check the MPI version, have to get back to this and remove it */
-  /* gather own persite log likelihood values into local buffer  */
-  int numberCollected = doublesToBuffer(buf, src, tr, pr,n,tid,PLL_TRUE, PLL_FALSE);
-
-  /* this communicates all the values to the master */
-  
-  int numberPerWorker[tr->numberOfThreads];     
-  if(MASTER_P)			/* master counts number to receive, receives and writes back */
-    {
-      for(i = 0; i < n; ++i)
-	{
-	  numberPerWorker[i] = doublesToBuffer(buf,src,tr,pr,n,i,PLL_FALSE, PLL_TRUE);
-	  displacements[i] = i == 0 ? 0 : displacements[i-1] + numberPerWorker[i-1]; 
-	}
-      
-      MPI_Gatherv(buf, numberCollected, MPI_DOUBLE,
-		  resultBuf, numberPerWorker, displacements,  MPI_DOUBLE,
-		  0, MPI_COMM_WORLD); 
-
-      double *bufPtr = resultBuf; 
-      for(i = 0 ; i < n; ++i)
-	{
-	  int numberWritten = doublesToBuffer(bufPtr, dst,tr,pr,n,i, PLL_FALSE, PLL_FALSE);
-	  bufPtr += numberWritten; 
-	  assertNum += numberWritten; 
-	}    
-      
-      assert(assertNum == tr->originalCrunchedLength);
-    }
-  else 				/* workers only send their buffer   */
-    MPI_Gatherv(buf, numberCollected, MPI_DOUBLE, resultBuf, numberPerWorker, displacements, MPI_DOUBLE, 0, MPI_COMM_WORLD);   
-#else 
-  /* pthread version only writes to global space  */  
-
-  //assertNum = doublesToBuffer(buf, dst,tr,pr,n,tid, PLL_FALSE, PLL_FALSE);
-  doublesToBuffer (dst, src, tr, pr, n, tid, PLL_TRUE, PLL_FALSE);
-  //assert(assertNum == numberCollected); 
-#endif
-}
-
-
-
-/** @brief broadcast a new alpha (for the GAMMA model)
-    @param localTree local library instance
-    @param tr library instance
-    @param tid worker id 
- */
-static void broadCastAlpha(partitionList *localPr, partitionList *pr)
-{
-  int  i, 
-    model; 
-
-#ifdef _FINE_GRAIN_MPI
-    int bufSize = localPr->numberOfPartitions * 4 * sizeof(double);
-  char bufDbl[bufSize]; 
-  char *bufPtrDbl = bufDbl;   
-#endif
-
-  RECV_BUF(bufDbl, bufSize, MPI_BYTE); 
-
-  for(model = 0; model < localPr->numberOfPartitions; model++)
-    for(i = 0; i < 4; ++i)
-      ASSIGN_BUF_DBL(localPr->partitionData[model]->gammaRates[i], pr->partitionData[model]->gammaRates[i]);
-  
-  SEND_BUF(bufDbl, bufSize, MPI_BYTE);  
-}
-
-/** @brief broadcast new LG4X weights
-    @param localTree local library instance
-    @param tr library instance
-    @param tid worker id
- */
-static void broadCastLg4xWeights(partitionList *localPr, partitionList *pr)
-{
-  int  i,
-    model;
-
-#ifdef _FINE_GRAIN_MPI
-    int bufSize = localPr->numberOfPartitions * 4 * sizeof(double);
-  char bufDbl[bufSize];
-  char *bufPtrDbl = bufDbl;
-#endif
-
-  RECV_BUF(bufDbl, bufSize, MPI_BYTE);
-
-  for(model = 0; model < localPr->numberOfPartitions; model++)
-    for(i = 0; i < 4; ++i)
-      ASSIGN_BUF_DBL(localPr->partitionData[model]->lg4x_weights[i], pr->partitionData[model]->lg4x_weights[i]);
-
-  SEND_BUF(bufDbl, bufSize, MPI_BYTE);
-}
-
-static void copyLG4(partitionList *localPr, partitionList *pr)
-{
-    int model, i, k;
-
-    /* determine size of buffer needed first */
-    int bufSize = 0;
-
-#ifdef _FINE_GRAIN_MPI
-    for(model = 0; model < localPr->numberOfPartitions; ++model )
-      {
-        const partitionLengths *pl = getPartitionLengths(pr->partitionData[model]);
-        bufSize += 4*(pl->eignLength + pl->evLength + pl->eiLength + pl->tipVectorLength + pl->substRatesLength + pl->frequenciesLength) * sizeof(double) ;
-      }
-#endif
-
-    char
-      bufDbl[bufSize];
-    char *bufPtrDbl = bufDbl;
-
-    RECV_BUF(bufDbl, bufSize, MPI_BYTE);
-
-    for (model = 0; model < localPr->numberOfPartitions; model++)
-    {
-        pInfo * localInfo = localPr->partitionData[model];
-        pInfo * info = pr->partitionData[model];
-
-        if (info->protModels == PLL_LG4M || info->protModels == PLL_LG4X)
-        {
-            for (k = 0; k < 4; k++)
-            {
-                const partitionLengths *pl = getPartitionLengths(pr->partitionData[model]);
-
-                for (i = 0; i < pl->eignLength; ++i)
-                    ASSIGN_BUF_DBL(
-                            localPr->partitionData[model]->EIGN_LG4[k][i],
-                            pr->partitionData[model]->EIGN_LG4[k][i]);
-                for (i = 0; i < pl->evLength; ++i)
-                    ASSIGN_BUF_DBL(localPr->partitionData[model]->EV_LG4[k][i],
-                            pr->partitionData[model]->EV_LG4[k][i]);
-                for (i = 0; i < pl->eiLength; ++i)
-                    ASSIGN_BUF_DBL(localPr->partitionData[model]->EI_LG4[k][i],
-                            pr->partitionData[model]->EI_LG4[k][i]);
-                for (i = 0; i < pl->substRatesLength; ++i)
-                    ASSIGN_BUF_DBL(
-                            localPr->partitionData[model]->substRates_LG4[k][i],
-                            pr->partitionData[model]->substRates_LG4[k][i]);
-                for (i = 0; i < pl->frequenciesLength; ++i)
-                    ASSIGN_BUF_DBL(
-                            localPr->partitionData[model]->frequencies_LG4[k][i],
-                            pr->partitionData[model]->frequencies_LG4[k][i]);
-                for (i = 0; i < pl->tipVectorLength; ++i)
-                    ASSIGN_BUF_DBL(
-                            localPr->partitionData[model]->tipVector_LG4[k][i],
-                            pr->partitionData[model]->tipVector_LG4[k][i]);
-            }
-        }
-    }
-    SEND_BUF(bufDbl, bufSize, MPI_BYTE); /*  */
-}
-
-/** @brief Master broadcasts rates.
-    
-    @param localTree local library instance
-    @param tr library instance
-    @param tid worker id     
- */ 
-static void broadCastRates(partitionList *localPr, partitionList *pr)
-{
-  int 
-    model;
-
-  /* determine size of buffer needed first */
-  int bufSize = 0;
-#ifdef _FINE_GRAIN_MPI
-  for(model = 0; model < localPr->numberOfPartitions; ++model )
-    {	  
-      const partitionLengths *pl = getPartitionLengths(pr->partitionData[model]); /* this is constant, isnt it?  */
-      bufSize += (pl->eignLength + pl->evLength + pl->eiLength + pl->tipVectorLength) * sizeof(double) ;
-    }
-#endif
-
-  char
-      bufDbl[bufSize];
-    char *bufPtrDbl = bufDbl;
-
-  RECV_BUF(bufDbl, bufSize, MPI_BYTE);
-  int i ; 
-
-  for(model = 0; model < localPr->numberOfPartitions; model++)
-    {
-      const partitionLengths *pl = getPartitionLengths(pr->partitionData[model]); /* this is constant, isnt it?  */
-
-      for(i = 0; i < pl->eignLength; ++i)
-	ASSIGN_BUF_DBL(localPr->partitionData[model]->EIGN[i], pr->partitionData[model]->EIGN[i]);
-      for(i = 0; i < pl->evLength; ++i)
-	ASSIGN_BUF_DBL(localPr->partitionData[model]->EV[i],pr->partitionData[model]->EV[i]);
-      for(i = 0; i  < pl->eiLength; ++i)
-	ASSIGN_BUF_DBL(localPr->partitionData[model]->EI[i], pr->partitionData[model]->EI[i]);
-      for(i = 0; i < pl->tipVectorLength; ++i)
-	ASSIGN_BUF_DBL(localPr->partitionData[model]->tipVector[i],   pr->partitionData[model]->tipVector[i]);
-    }
-  SEND_BUF(bufDbl, bufSize, MPI_BYTE); /*  */
-
-  copyLG4(localPr, pr);
-}
-
-/** @brief Evaluate the likelihood of this topology (PThreads/MPI implementation)
-
-    Evaluate the likelihood of the topology described in the PLL instance. First
-    every thread calls \a pllEvaluateIterative where it computes the log likelihoods
-    for the  portion of each assigned partition. The results (for all partition) are stored
-    as elements of a local buffer array (\a buf). This is done by all threads. Subsequently, 
-    an \a MPI_Reduce operation sums the contents of corresponding elements of the local
-    buffer arrays into another array (\a targetBuf) which are the log likelihoods of
-    each (complete) partition. Finally, the last array is copied to the master thread/process.
-    In addition, if \a getPerSiteLikelihoods is enabled the log likelihoods for each site
-    in the (compressed) alignment are stored in the array \a tr->lhs.
-
-    @param tr
-      PLL instance
-    @param tr
-      Local (thread/process) PLL instance
-
-    @param pr
-      Local (thread/process) list of partitions
-
-    @param tid
-      Thread/Process ID
-
-    @param getPerSiteLikelihoods 
-      If set to \b PLL_TRUE, compute the log likelihood for each site. 
- */ 
-static void reduceEvaluateIterative(pllInstance *tr, pllInstance *localTree, partitionList *localPr, int tid, pllBoolean getPerSiteLikelihoods)
-{
-  int model;
-
-  pllEvaluateIterative(localTree, localPr, getPerSiteLikelihoods);
-
-  /* when this is done we need to write the per-thread log likelihood to the 
-     global reduction buffer. Tid is the thread ID, hence thread 0 will write its 
-     results to reductionBuffer[0] thread 1 to reductionBuffer[1] etc.
-
-     the actual sum over the entries in the reduction buffer will then be computed 
-     by the master thread which ensures that the sum is determinsitic */
-
-  
-  /* if (getPerSiteLikelihoods == PLL_TRUE) store per-site likelihoods in array tr->lhs */
-  if(getPerSiteLikelihoods)
-    {    
-#ifdef _FINE_GRAIN_MPI
-      int n = processes; 
-#else 
-      int n = tr->numberOfThreads; 
-#endif
-
-      /* rearrange per site likelihoods into single local array for gathering */
-      int i ; 
-      for(model = 0; model < localPr->numberOfPartitions; ++model)
-	{
-	  pInfo *partition = localPr->partitionData[model]; 
-	  pllBoolean isMyPartition  = isThisMyPartition(localPr, tid, model);
-
-	  int ctr = 0; 
-	  for(i = partition->lower; i < partition->upper; ++i)
-	    {
-	      if(tr->manyPartitions && isMyPartition)
-		localTree->lhs[i] = partition->perSiteLikelihoods[ ctr++]; 
-	      else if(NOT tr->manyPartitions && (i % n) == tid)
-		localTree->lhs[i] = partition->perSiteLikelihoods[ctr++];
-	    }
-	}
-      
-      /* gather all the double into the global array */
-      collectDouble(tr->lhs, localTree->lhs, localTree, localPr,  n, tid); 
-    }
-
-  /* printf("collecting done\n" ); */
-#ifdef _REPRODUCIBLE_MPI_OR_PTHREADS
-  /* 
-     aberer: I implemented this as a mpi_gather operation into this buffer, 
-     pthreads version emulates this gather; 
-     master takes care of the reduction; 
-  */
-
-  double 
-    buf[localPr->numberOfPartitions];
-
-  for(model = 0; model < localPr->numberOfPartitions; ++model)
-    buf[model] = localPr->partitionData[model]->partitionLH;
-
-  /* either make reproducible or efficient */
-  ASSIGN_GATHER(globalResult, buf, localPr->numberOfPartitions, PLL_DOUBLE, tid);
-
-  /* printf("gather worked\n"); */
-#else 
-  /* the efficient mpi version: a proper reduce  */
-  double 
-    buf[localPr->numberOfPartitions];
-  
-  for(model = 0; model < localPr->numberOfPartitions; ++model)
-    buf[model] = localPr->partitionData[model]->partitionLH;
-
-  double 
-    targetBuf[localPr->numberOfPartitions];
-  
-  memset(targetBuf, 0, sizeof(double) * localPr->numberOfPartitions);
-
-  MPI_Reduce(buf, targetBuf, localPr->numberOfPartitions, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
-  
-  if(MASTER_P) 
-    {
-      for(model = 0; model < localPr->numberOfPartitions; ++model) {
-	localPr->partitionData[model]->partitionLH = targetBuf[model];
-      }
-    }
-#endif
-}
-
-
-
-/*@ @brief Broadcast the traversal descriptor to worker threads. 
-
-  The one below is a hack we are re-assigning the local pointer to
-  the global one the memcpy version below is just for testing and
-  preparing the fine-grained MPI BlueGene version
-
-  @param localTree local library instance
-  @param tr library instance
-*/
-/* TODO: we should reset this at some point, the excplicit copy is just done for testing */
-__inline static void broadcastTraversalInfo(pllInstance *localTree, pllInstance *tr, partitionList *localPr)
-{
-  /* @todo these two regions could be joined */
-#ifdef _USE_PTHREADS
-  /* memcpy -> memmove (see ticket #43). This function is sometimes called with localTree == tr,
-   * in which case some memcpy implementations can corrupt the buffers.
-   */
-  
-  localTree->td[0].functionType =            tr->td[0].functionType;
-  localTree->td[0].count =                   tr->td[0].count ;
-  localTree->td[0].traversalHasChanged =     tr->td[0].traversalHasChanged;
-
-  memmove(localTree->td[0].executeModel,    tr->td[0].executeModel,    sizeof(pllBoolean) * localPr->numberOfPartitions);
-  memmove(localTree->td[0].parameterValues, tr->td[0].parameterValues, sizeof(double) * localPr->numberOfPartitions);
-  
-  if(localTree->td[0].traversalHasChanged)
-    memmove(localTree->td[0].ti, tr->td[0].ti, localTree->td[0].count * sizeof(traversalInfo));
-
-#else
-  /* MPI */
-  /* like in raxml-light: first we send a small message, if the
-     travesalDescriptor is longer, then resend */
-  
-  int length = treeIsInitialized ? localPr->numberOfPartitions : 0;
-  char broadCastBuffer[messageSize(length)]; 
-  char *bufPtr = broadCastBuffer; 
-  int i; 
-
-  RECV_BUF(broadCastBuffer, messageSize(length), MPI_BYTE); 
-
-  ASSIGN_BUF(localTree->td[0].functionType, tr->td[0].functionType , int);   
-  ASSIGN_BUF(localTree->td[0].count,  tr->td[0].count , int); 
-  ASSIGN_BUF(localTree->td[0].traversalHasChanged, tr->td[0].traversalHasChanged , int); 
-
-  if(treeIsInitialized)  
-    { 
-      for(i = 0; i < localPr->numberOfPartitions; ++i)
-	{
-	  ASSIGN_BUF(localTree->td[0].executeModel[i],      tr->td[0].executeModel[i], int); 
-	  ASSIGN_BUF(localTree->td[0].parameterValues[i],	 tr->td[0].parameterValues[i], double); 
-	}      
-
-      for(i = 0; i < TRAVERSAL_LENGTH; ++i )
-	ASSIGN_BUF(localTree->td[0].ti[i], tr->td[0].ti[i], traversalInfo); 
-    }
-    
-  SEND_BUF(broadCastBuffer, messageSize(length), MPI_BYTE); 
-
-  /* now we send the second part of the traversal descriptor, if we
-     exceed the pre-set number of elements */
-  if(treeIsInitialized && localTree->td[0].count > TRAVERSAL_LENGTH) 
-    {
-      /* lets use the MPI_Datatype for this thing, what I've read it's
-	 supposed to be more secure and efficient */
-      MPI_Bcast(localTree->td[0].ti + TRAVERSAL_LENGTH, localTree->td[0].count - TRAVERSAL_LENGTH, TRAVERSAL_MPI, 0, MPI_COMM_WORLD );
-    }
-#endif
-}
-
-
-/** @brief helper that yields a string representation of a parallel region. 
-    
-    @param type type of parallel region
- */ 
-char* getJobName(int type)
-{
-  switch(type)  
-    {
-    case  PLL_THREAD_NEWVIEW:       
-      return "PLL_THREAD_NEWVIEW";
-    case PLL_THREAD_EVALUATE: 
-      return "PLL_THREAD_EVALUATE";
-    case PLL_THREAD_MAKENEWZ: 
-      return "PLL_THREAD_MAKENEWZ";
-    case PLL_THREAD_MAKENEWZ_FIRST: 
-      return "PLL_THREAD_MAKENEWZ_FIRST";
-    case PLL_THREAD_RATE_CATS: 
-      return "PLL_THREAD_RATE_CATS";
-    case PLL_THREAD_COPY_RATE_CATS: 
-      return "PLL_THREAD_COPY_RATE_CATS";
-    case PLL_THREAD_COPY_INIT_MODEL: 
-      return "PLL_THREAD_COPY_INIT_MODEL";
-    case PLL_THREAD_INIT_PARTITION: 
-      return "PLL_THREAD_INIT_PARTITION";
-    case PLL_THREAD_OPT_ALPHA: 
-      return "PLL_THREAD_OPT_ALPHA";
-    case PLL_THREAD_OPT_RATE: 
-      return "PLL_THREAD_OPT_RATE";
-    case PLL_THREAD_COPY_ALPHA: 
-      return "PLL_THREAD_COPY_ALPHA";
-    case PLL_THREAD_COPY_RATES: 
-      return "PLL_THREAD_COPY_RATES";
-    case PLL_THREAD_PER_SITE_LIKELIHOODS: 
-      return "PLL_THREAD_PER_SITE_LIKELIHOODS";
-    case PLL_THREAD_NEWVIEW_ANCESTRAL: 
-      return "PLL_THREAD_NEWVIEW_ANCESTRAL";
-    case PLL_THREAD_GATHER_ANCESTRAL: 
-      return "PLL_THREAD_GATHER_ANCESTRAL";
-    case PLL_THREAD_EXIT_GRACEFULLY: 
-      return "PLL_THREAD_EXIT_GRACEFULLY";
-    case PLL_THREAD_EVALUATE_PER_SITE_LIKES:
-      return "PLL_THREAD_EVALUATE_PER_SITE_LIKES";
-    default: assert(0); 
-    }
-}
-
-/**
-   @brief Generic entry point for parallel regions (mostly broadcasts
-   traversal descriptor first).
-
-   This function here handles all parallel regions in the Pthreads
-   version, when we enter this function pllMasterBarrier() has been called
-   by the master thread from within the sequential part of the
-   program, tr is the library instance (tree) at the master thread, 
-   localTree is the library instance (tree) at the worker threads
-
-   While this is not necessary, adress spaces of threads are indeed
-   separated for easier transition to a distributed memory paradigm
-   
-   @param tr library instance
-   @param localTree local library instance 
-   @param tid worker id 
-   @param n number of workers 
-*/
-static pllBoolean execFunction(pllInstance *tr, pllInstance *localTree, partitionList *pr, partitionList *localPr, int tid, int n)
-{
-  int
-    i,
-    model,
-    localCounter;
-
-#ifdef MEASURE_TIME_PARALLEL
-  double timeForParallelRegion = gettime();
-#endif
-
-
-#ifdef _USE_PTHREADS
-  /* some stuff associated with the barrier implementation using Pthreads and busy wait */
-  int currentJob = threadJob >> 16;
-#endif
-
-  /* here the master sends and all threads/processes receive the traversal descriptor */
-  broadcastTraversalInfo(localTree, tr, localPr);
-
-#ifdef _USE_PTHREADS
-  /* make sure that nothing is going wrong */
-  assert(currentJob == localTree->td[0].functionType);
-#else   
-  localTree = tr; 
-  int currentJob = localTree->td[0].functionType; 
-#endif
-
-#ifdef DEBUG_PARALLEL
-  printf("[%d] working on %s\n", tid, getJobName(currentJob)); 
-#endif  
-
-  switch(currentJob)
-    { 
-    case PLL_THREAD_NEWVIEW: 
-      /* just a newview on the fraction of sites that have been assigned to this thread */
-
-      pllNewviewIterative(localTree, localPr, 0);
-      break;     
-    case PLL_THREAD_EVALUATE: 
-      reduceEvaluateIterative(tr, localTree, localPr, tid, PLL_FALSE);
-      break;	
-    case PLL_THREAD_MAKENEWZ_FIRST:
-
-      /* this is the first call from within makenewz that requires getting the likelihood vectors to the left and 
-         right of the branch via newview and doing some precomputations.
-	 
-         For details see comments in makenewzGenericSpecial.c 
-      */
-    case  PLL_THREAD_MAKENEWZ:
-      {	
-	double
-	  dlnLdlz[PLL_NUM_BRANCHES],
-	  d2lnLdlz2[PLL_NUM_BRANCHES]; 
-
-	if(localTree->td[0].functionType == PLL_THREAD_MAKENEWZ_FIRST)
-	  makenewzIterative(localTree, localPr);
-	execCore(localTree, localPr, dlnLdlz, d2lnLdlz2);
-
-	/* gather the first and second derivatives that have been written by each thread */
-	/* as for evaluate above, the final sum over the derivatives will be computed by the 
-	   master thread in its sequential part of the code */
-
-	int numBranches = localPr->perGeneBranchLengths?localPr->numberOfPartitions:1;
-
-#ifdef _REPRODUCIBLE_MPI_OR_PTHREADS
-	/* MPI: implemented as a gather again, pthreads: just buffer copying */	
-	double buf[ 2 * numBranches];
-	memcpy( buf, dlnLdlz, numBranches * sizeof(double) );
-	memcpy(buf + numBranches, d2lnLdlz2, numBranches * sizeof(double));
-
-	ASSIGN_GATHER(globalResult, buf,  2 * numBranches, PLL_DOUBLE, tid);
-#else 	
-	double result[numBranches];
-	memset(result,0, numBranches * sizeof(double));
-	MPI_Reduce( dlnLdlz , result , numBranches, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
-	if(MASTER_P)
-	  memcpy(globalResult, result, sizeof(double) * numBranches);
-	
-	memset(result,0,numBranches * sizeof(double));
-	MPI_Reduce( d2lnLdlz2 , result , numBranches, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
-	if(MASTER_P)
-	  memcpy(globalResult + numBranches, result, sizeof(double) * numBranches);
-#endif
-      }
-
-      break;
-
-    case PLL_THREAD_INIT_PARTITION:       
-
-      /* broadcast data and initialize and allocate arrays in partitions */
-      
-      initializePartitionsMaster(tr, localTree, pr, localPr, tid, n);
-
-      break;          
-    case PLL_THREAD_COPY_ALPHA: 
-    case PLL_THREAD_OPT_ALPHA:
-      /* this is when we have changed the alpha parameter, inducing a change in the discrete gamma rate categories.
-	 this is called when we are optimizing or sampling (in the Bayesioan case) alpha parameter values */
-      
-      /* distribute the new discrete gamma rates to the threads */
-      broadCastAlpha(localPr,pr);
-
-      /* compute the likelihood, note that this is always a full tree traversal ! */
-      if(localTree->td[0].functionType == PLL_THREAD_OPT_ALPHA)
-	reduceEvaluateIterative(tr, localTree, localPr, tid, PLL_FALSE);
-
-      break;
-    case PLL_THREAD_OPT_RATE:
-    case PLL_THREAD_COPY_RATES:
-
-      /* if we are optimizing the rates in the transition matrix Q this induces recomputing the eigenvector eigenvalue 
-	 decomposition and the tipVector as well because of the special numerics in RAxML, the matrix of eigenvectors 
-	 is "rotated" into the tip lookup table.
-
-	 Hence if the sequential part of the program that steers the Q matrix rate optimization has changed a rate we
-	 need to broadcast all eigenvectors, eigenvalues etc to each thread 
-      */
-
-      broadCastRates(localPr, pr);
-
-      /* now evaluate the likelihood of the new Q matrix, this always requires a full tree traversal because the changes need
-	 to be propagated throughout the entire tree */
-
-      if(localTree->td[0].functionType == PLL_THREAD_OPT_RATE)
-	reduceEvaluateIterative(tr, localTree, localPr, tid, PLL_FALSE);
-
-      break;
-    case PLL_THREAD_COPY_LG4X_RATES:
-
-        broadCastLg4xWeights(localPr, pr);
-        broadCastAlpha(localPr, pr);
-
-        assert(localPr->partitionData[0]->lg4x_weights[0] == pr->partitionData[0]->lg4x_weights[0]);
-
-        break;
-    case PLL_THREAD_OPT_LG4X_RATE:
-
-        broadCastLg4xWeights(localPr, pr);
-        broadCastAlpha(localPr, pr);
-
-        assert(localPr->partitionData[0]->lg4x_weights[0] == pr->partitionData[0]->lg4x_weights[0]);
-
-        /* compute the likelihood, note that this is always a full tree traversal ! */
-        reduceEvaluateIterative(tr, localTree, localPr, tid, PLL_FALSE);
-
-        break;
-    case PLL_THREAD_COPY_INIT_MODEL:
-      {
-
-	/* need to be very careful here ! PLL_THREAD_COPY_INIT_MODEL is also used when the program is restarted 
-	   it is hence not sufficient to just initialize everything by the default values ! */
-
-	broadCastRates(localPr, pr);
-	broadCastAlpha(localPr, pr); /* isnt that only executed when we are on gamma?  */
-	broadCastLg4xWeights(localPr, pr);
-
-	/*
-	  copy initial model parameters, the Q matrix and alpha are initially, when we start our likelihood search 
-	  set to default values. 
-	  Hence we need to copy all those values that are required for computing the likelihood 
-	  with newview(), evaluate() and makenez() to the private memory of the threads 
-	*/
-
-
-	if( localTree->rateHetModel == PLL_CAT) /* TRICKY originally this should only be executed by workers  */
-	  {
-#ifdef _FINE_GRAIN_MPI
-	    int bufSize = 2 * localTree->originalCrunchedLength * sizeof(double); 
-	    char bufDbl[bufSize], 
-	      *bufPtrDbl = bufDbl; 
-#endif
-
-	    RECV_BUF(bufDbl, bufSize,MPI_BYTE); 
-
-	    /* this should be local  */
-	    for(model = 0; model < localPr->numberOfPartitions; model++)
-	      localPr->partitionData[model]->numberOfCategories      = pr->partitionData[model]->numberOfCategories;
-
-
-	    /* this is only relevant for the PSR model, we can worry about this later */
-	    for(i = 0; i < localTree->originalCrunchedLength; ++i)
-	      {
-		ASSIGN_BUF_DBL(localTree->patrat[i], tr->patrat[i]);
-		ASSIGN_BUF_DBL(localTree->patratStored[i], tr->patratStored[i]); 
-	      }
-
-	    SEND_BUF(bufDbl, bufSize, MPI_BYTE); 
-	  }
-      } 
-      break;    
-    case PLL_THREAD_RATE_CATS: 
-      {
-	/* this is for optimizing per-site rate categories under PSR, let's worry about this later */
-
-	ASSIGN_DBL( localTree->lower_spacing,  tr->lower_spacing);
-	ASSIGN_DBL( localTree->upper_spacing,  tr->upper_spacing);
-
-	optRateCatPthreads(localTree, localPr, localTree->lower_spacing, localTree->upper_spacing, localTree->lhs, n, tid);
-
-	broadcastAfterRateOpt(tr, localTree, localPr, n,  tid);
-      }
-      break;
-    case PLL_THREAD_COPY_RATE_CATS:
-      {
-	/* 
-	   this is invoked when we have changed the per-site rate category assignment
-	   In essence it distributes the new per site rates to all threads 
-
-	   The pthread-version here simply assigns everything as ought to
-	   be. The MPI-version is configured to write to a buffer instead
-	   and SEND (master) or RECV (workers) it.
-
-	*/
-
-	/* 
-	   start of communication part 
-	*/
-
-	int i, 
-	  /* buf[localPr->numberOfPartitions], */
-	  /* assertCtr = 0,  */
-	  dblBufSize = 0; 
-
-#ifdef _FINE_GRAIN_MPI
-	int bufSize = localPr->numberOfPartitions * sizeof(int); 
-	char buf[bufSize]; 
-	char *bufPtr = buf; 
-#endif
-     
-	RECV_BUF(buf, bufSize, MPI_BYTE);
-
-	for( model = 0; model < localPr->numberOfPartitions; ++model)
-	  {
-	    ASSIGN_BUF(localPr->partitionData[model]->numberOfCategories, pr->partitionData[model]->numberOfCategories, int);
-	    dblBufSize += localPr->partitionData[model]->numberOfCategories * sizeof(double);
-	  }
-
-	SEND_BUF(buf, bufSize, MPI_BYTE); 
-
-
-	dblBufSize += 2 * localTree->originalCrunchedLength * sizeof(double); 
-
-#ifdef _FINE_GRAIN_MPI
-	char bufDbl[dblBufSize],
-	  *bufPtrDbl = bufDbl;
-#endif
-
-	RECV_BUF(bufDbl, dblBufSize, MPI_BYTE); 
-
-	for(i = 0; i < localTree->originalCrunchedLength; ++i)
-	  {	 
-	    ASSIGN_BUF_DBL(localTree->patrat[i], tr->patrat[i]); 
-	    ASSIGN_BUF_DBL(localTree->patratStored[i], tr->patratStored[i]); 
-	  }
-
-	for( model = 0; model < localPr->numberOfPartitions; ++model)
-	  for(i = 0; i < localPr->partitionData[model]->numberOfCategories; i++)
-	    ASSIGN_BUF_DBL(localPr->partitionData[model]->perSiteRates[i], pr->partitionData[model]->perSiteRates[i]);
-
-	SEND_BUF(bufDbl, dblBufSize, MPI_BYTE); 
-
-
-	/* lets test, if it is a good idea to send around the basic categories  */
-#ifdef _FINE_GRAIN_MPI
-	/* TODO this is inefficient, but is seems to have a small impact on performance */
-	MPI_Bcast(tr->rateCategory, tr->originalCrunchedLength, MPI_INT, 0, MPI_COMM_WORLD); 
-#endif
-
-
-	/* 
-	   now re-assign values 
-	*/
-	for(model = 0; model < localPr->numberOfPartitions; model++)
-	  {
-	    if(localTree->manyPartitions)
-	      {
-		if(isThisMyPartition(localPr, tid, model))
-		  for(localCounter = 0, i = localPr->partitionData[model]->lower;  i < localPr->partitionData[model]->upper; i++, localCounter++)
-		    {	     
-		      localPr->partitionData[model]->rateCategory[localCounter] = tr->rateCategory[i];
-		    } 
-	      }
-	    else	  
-	      {
-		for(localCounter = 0, i = localPr->partitionData[model]->lower;  i < localPr->partitionData[model]->upper; i++)
-		  {
-		    if(i % n == tid)
-		      {		 
-			localPr->partitionData[model]->rateCategory[localCounter] = tr->rateCategory[i];
-
-			localCounter++;
-		      }
-		  }
-	      }
-	  }
-      }
-      break;
-    case PLL_THREAD_PER_SITE_LIKELIHOODS:      
-      {
-
-	/* compute per-site log likelihoods for the sites/partitions 
-	   that are handled by this thread */
-	perSiteLogLikelihoodsPthreads(localTree, localPr, localTree->lhs, n, tid);
-
-	/* do a parallel gather operation, the threads will write their results 
-	   into the global buffer tr->lhs that will then contain all per-site log likelihoods
-	   in the proper order 
-	*/
-
-	collectDouble(tr->lhs,                localTree->lhs,                  localTree, localPr, n, tid);
-
-      }
-      break;
-      /* check for errors */
-    case PLL_THREAD_NEWVIEW_ANCESTRAL:       
-      assert(0);
-      break; 
-    case PLL_THREAD_GATHER_ANCESTRAL:
-      assert(0); 
-      break; 
-    case PLL_THREAD_EXIT_GRACEFULLY: 
-      {
-	/* cleans up the workers memory */
-
-#ifdef _USE_PTHREADS
-	/* TODO destroying the tree does not work yet in a highly
-	   generic manner. */
-
-	if(NOT MASTER_P)
-	  {
-	    pllPartitionsDestroy (localTree, &localPr);
-	    /* pllTreeDestroy (localTree); */
-	  }
-	else 
-	  {
-	    //pllPartitionsDestroy (tr, &pr);
-	    /* pllTreeDestroy (tr); */
-	  }
-
-#else 
-	//pllPartitionsDestroy (tr, &pr);
-	/* pllTreeDestroy (tr); */
-	
-	//MPI_Finalize();
-	//exit(0); 
-#endif	
-	return PLL_FALSE; 
-      }
-      break; 
-    case PLL_THREAD_EVALUATE_PER_SITE_LIKES: 
-      {
-	reduceEvaluateIterative(tr, localTree, localPr, tid, PLL_TRUE);
-      }
-      break;
-    default:
-      printf("Job %d\n", currentJob);
-      assert(0);
-    }
-
-  return PLL_TRUE; 
-}
-
-
-
-
-/**  Target function where the threads/processes are trapped
-
-     The threads/processes spend all of their time in this function
-     running operations on the data (computing likelihoods).
-
-     @param tData
-       Structure that contains the vital information for the thread/process, 
-       i.e. PLL instance, list of partitions and thread ID
-
-     @note
-       The data in \a tData are different for pthreads and MPI. 
-       Expand this section.
- */ 
-static void *likelihoodThread(void *tData)
-{
-  threadData *td = (threadData*)tData;
-  pllInstance 
-    *tr = td->tr;
-  partitionList *pr = td->pr;
-
-#ifdef _USE_PTHREADS
-  pllInstance *localTree = rax_calloc(1,sizeof(pllInstance )); 
-  partitionList *localPr = rax_calloc(1,sizeof(partitionList));
-
-  int
-    myCycle = 0,
-    localTrap = 1;
-
-  const int 
-    n = td->tr->numberOfThreads,
-    tid = td->threadNumber;
-
-#ifndef _PORTABLE_PTHREADS
-  pinToCore(tid);
-#endif
-
-  /* printf("\nThis is RAxML Worker Pthread Number: %d\n", tid); */
-
-  while(localTrap)
-    {
-
-      while (myCycle == threadJob);
-      myCycle = threadJob;
-
-      if ((threadJob >> 16) != PLL_THREAD_INIT_PARTITION) {
-    	  localPr->perGeneBranchLengths = pr->perGeneBranchLengths;
-      	  localPr->numberOfPartitions = pr->numberOfPartitions;
-      }
-      localTrap = execFunction(tr, localTree, pr, localPr, tid, n);
-
-      barrierBuffer[tid] = 1;     
-    }
-    rax_free (localTree->td[0].executeModel); //localTree->td[0].executeModel = NULL;
-    rax_free (localTree->td[0].parameterValues); //localTree->td[0].parameterValues = NULL;
-    rax_free (localTree->rateCategory); //localTree->rateCategory = NULL;
-    rax_free (localTree->lhs); //localTree->lhs = NULL;
-    rax_free (localTree->patrat); //localTree->patrat = NULL;
-    rax_free (localTree->patratStored); //localTree->patratStored = NULL;
-    rax_free (localTree->td[0].ti); //localTree->td[0].ti = NULL;
-    rax_free (localTree);
-#else 
-  const int
-    n = processes, 
-    tid = td->threadNumber;
-  int i;
-
-  /* printf("\nThis is RAxML Worker Process Number: %d\n", tid); */
-
-  while(execFunction(tr, tr, pr, pr, tid,n));
-
-  rax_free (tr->lhs);
-  rax_free (tr->td[0].ti);
-  rax_free (tr->td[0].executeModel);
-  rax_free (tr->td[0].parameterValues);
-  rax_free (tr->patrat);
-  rax_free (tr->patratStored);
-  rax_free (tr->aliaswgt);
-  rax_free (tr->y_ptr);
-  for (i = 0; i < pr->numberOfPartitions; ++ i)
-    rax_free (pr->partitionData[i]);
-  rax_free (pr->partitionData);
-  rax_free (pr);
-  rax_free (tr);
-#endif
-
-  return (void*)NULL;
-}
-
-
-/**
-   @brief Cleanup step once the master barrier succeeded. 
-
-   This is master specific code called once the barrier is
-   passed. Stuff such as reduction operations.  If we execute this
-   here, we can keep the code mostly free from parallel -specific
-   code.
-   
-   @param tr 
-     PLL instance
-
-   @param pr
-     List of partitions
-
-   @param jobType 
-     Job that is to be executed
-*/
-void pllMasterPostBarrier(pllInstance *tr, partitionList *pr, int jobType)
-{
-  assert(tr->threadID == 0); 
-  
-  switch(jobType)
-    {
-    case PLL_THREAD_EVALUATE: 
-    case PLL_THREAD_OPT_RATE: 
-    case PLL_THREAD_OPT_ALPHA:
-    case PLL_THREAD_OPT_LG4X_RATE:
-    case PLL_THREAD_EVALUATE_PER_SITE_LIKES: 
-      {
-#ifdef _REPRODUCIBLE_MPI_OR_PTHREADS
-	int i,j;
-	volatile double partitionResult;	
-
-	for(j = 0; j < pr->numberOfPartitions; j++)
-	  {
-	    for(i = 0, partitionResult = 0.0; i < tr->numberOfThreads; i++) 
-	      partitionResult += globalResult[i * pr->numberOfPartitions+ j];
-
-	    pr->partitionData[j]->partitionLH = partitionResult;
-	  }
-#endif      
-
-	break; 
-      } 
-    case PLL_THREAD_PER_SITE_LIKELIHOODS:
-      {
-	int i; 
-	/* now just compute the sum over per-site log likelihoods for error checking */      
-	double accumulatedPerSiteLikelihood = 0.; 
-	for(i = 0; i < tr->originalCrunchedLength; i++)
-	  accumulatedPerSiteLikelihood += tr->lhs[i];
-
-	/* printf("RESULT: %f\t%f", tr->likelihood, accumulatedPerSiteLikelihood);  */
-	assert(PLL_ABS(tr->likelihood - accumulatedPerSiteLikelihood) < 0.00001);
-      }
-      break;
-    default: 
-      ; 			/* dont do anything on default,
-				   mostly, we can skip that */
-    } 
-}
-
-/**
-   @brief A generic master barrier for executing parallel parts of the code
-
-   A generic master barrier through which the master thread/process controls
-   the work job execution. Through the parameter \a jobType the master instructs
-   the slaves of what type of work they must conduct.
-
-   @param tr
-     PLL instance
-
-   @param pr
-     List of partitions
-
-   @param jobType 
-     Type of job to be conducted
- */ 
-void pllMasterBarrier(pllInstance *tr, partitionList *pr, int jobType)
-{
-
-#ifdef MEASURE_TIME_PARALLEL
-  assert(jobType < NUM_PAR_JOBS); 
-  timePerRegion[NUM_PAR_JOBS]  += gettime()- masterTimePerPhase ; 
-  masterTimePerPhase = gettime();
-#endif
-
-#ifdef _USE_PTHREADS
-  const int 
-    n = tr->numberOfThreads;
-
-  tr->td[0].functionType = jobType;
-
-  jobCycle = !jobCycle;
-  threadJob = (jobType << 16) + jobCycle;
-
-  execFunction(tr, tr, pr, pr, 0, n);
-
-  int 
-    i, 
-    sum;
-
-  do
-    {
-      for(i = 1, sum = 1; i < n; i++)
-	sum += barrierBuffer[i];
-    }
-  while(sum < n);  
-
-  for(i = 1; i < n; i++)
-    barrierBuffer[i] = 0;
-#else 
-  tr->td[0].functionType = jobType; 
-  execFunction(tr,tr,pr,pr,0,processes);
-#endif
-
-  /* code executed by the master, once the barrier is crossed */
-  pllMasterPostBarrier(tr, pr, jobType);
-
-#ifdef MEASURE_TIME_PARALLEL
-  timePerRegion[jobType] += gettime() - masterTimePerPhase; 
-  masterTimePerPhase = gettime();
-#endif
-}
-
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-
-/** @brief Initialize structures for slave process/threads
- 
-    Allocate all memory structures required by slave threads/processes
-
-    @param tr 
-      PLL Instance
-
-    @param localTree 
-      A local PLL instance for the slave process/thread which is initialized in this function based on \a tr
-
-    @pram pr
-      List of partitions
-
-    @param localPr
-      A local list of partitions for the slave process/thread which will be initialized based on \a pr 
-
-    @pram tid
-      The slave process/thread ID
-
-    @note
-      This function should never be called by the master thread, but is called by master process in MPI implementation.
- */ 
-static void assignAndInitPart1(pllInstance *localTree, pllInstance *tr, partitionList *localPr, partitionList *pr, int *tid)
-{
-  size_t
-    model; 
-  int
-    totalLength = 0; 
-
-#ifdef _USE_PTHREADS
-  localTree->threadID = *tid; 
-  /* printf("my id is %d\n", *tid);  */
-  assert(localTree != tr);
-  localTree->numberOfThreads = tr->numberOfThreads;
-#else  /* => MPI */
-  *tid = processID; 
-  localTree->threadID = processID; 
-  tr->numberOfThreads = processes;
-
-  int bufSize = (9 + pr->numberOfPartitions* 8) * sizeof(int);
-  char buf[bufSize], 
-    *bufPtr = buf;  
-#endif
-
-  RECV_BUF(buf, bufSize, MPI_BYTE); 
-
-  ASSIGN_BUF( localTree->useRecom,                  tr->useRecom, int);
-  ASSIGN_BUF( localTree->rateHetModel,              tr->rateHetModel, int);
-  ASSIGN_BUF( localTree->useMedian,                 tr->useMedian, int); 
-  ASSIGN_BUF( localTree->saveMemory,                tr->saveMemory, int);
-  ASSIGN_BUF( localTree->maxCategories,             tr->maxCategories, int);
-  ASSIGN_BUF( localTree->originalCrunchedLength,    tr->originalCrunchedLength, int);
-  ASSIGN_BUF( localTree->mxtips,                    tr->mxtips, int);
-  ASSIGN_BUF( localPr->numberOfPartitions,          pr->numberOfPartitions, int);
-  ASSIGN_BUF( localPr->perGeneBranchLengths,        pr->perGeneBranchLengths, pllBoolean);
-
-  localTree->td[0].count = 0; 
-
-  if(NOT MASTER_P)
-    {
-      localTree->lhs                     = (double*)rax_calloc((size_t)localTree->originalCrunchedLength, sizeof(double));     
-      localPr->partitionData           = (pInfo**)rax_calloc(PLL_NUM_BRANCHES,sizeof(pInfo*));
-      for(model = 0; model < (size_t)localPr->numberOfPartitions; model++) {
-    	localPr->partitionData[model] = (pInfo*)rax_calloc(1,sizeof(pInfo));
-      }
-      localTree->td[0].ti              = (traversalInfo *)rax_malloc(sizeof(traversalInfo) * (size_t)localTree->mxtips);
-      localTree->td[0].executeModel    = (pllBoolean *)rax_malloc(sizeof(pllBoolean) * PLL_NUM_BRANCHES);
-      localTree->td[0].parameterValues = (double *)rax_malloc(sizeof(double) * PLL_NUM_BRANCHES);
-      localTree->patrat       = (double*)rax_malloc(sizeof(double) * (size_t)localTree->originalCrunchedLength);
-      localTree->patratStored = (double*)rax_malloc(sizeof(double) * (size_t)localTree->originalCrunchedLength);            
-    }
-  
-  for(model = 0; model < (size_t)localPr->numberOfPartitions; model++)
-    {
-      ASSIGN_BUF(localPr->partitionData[model]->numberOfCategories,     pr->partitionData[model]->numberOfCategories, int);
-      ASSIGN_BUF(localPr->partitionData[model]->states,                 pr->partitionData[model]->states, int);
-      ASSIGN_BUF(localPr->partitionData[model]->maxTipStates ,          pr->partitionData[model]->maxTipStates, int);
-      ASSIGN_BUF(localPr->partitionData[model]->dataType ,              pr->partitionData[model]->dataType, int);
-      ASSIGN_BUF(localPr->partitionData[model]->protModels ,            pr->partitionData[model]->protModels, int);
-      ASSIGN_BUF(localPr->partitionData[model]->protUseEmpiricalFreqs , pr->partitionData[model]->protUseEmpiricalFreqs, int);
-      ASSIGN_BUF(localPr->partitionData[model]->lower ,                 pr->partitionData[model]->lower, int);
-      ASSIGN_BUF(localPr->partitionData[model]->upper ,                 pr->partitionData[model]->upper, int);
-      ASSIGN_BUF(localPr->partitionData[model]->ascBias,                pr->partitionData[model]->ascBias, pllBoolean);
-
-      localPr->partitionData[model]->partitionLH = 0.0;      
-
-      totalLength += (localPr->partitionData[model]->upper -  localPr->partitionData[model]->lower);
-    }
-
-  SEND_BUF(buf, bufSize, MPI_BYTE); 
-
-  assert(totalLength == localTree->originalCrunchedLength);
-
-  ASSIGN_DBL(localTree->vectorRecomFraction, tr->vectorRecomFraction); 
-}
-#endif
-
-
-/** @brief Distribute y-vectors during initialization. 
-
-    Distribute the alignment data to the slave process/threads. Each slave
-    copies the data (alignment) from its assigned partition to its local 
-    partition structure.
-
-    @param tr 
-      PLL instance
-    
-    @param localTree 
-      Local library instance for the current thread
-
-    @param localPr
-      Local list of partitions structure for the current thread
- */ 
-static void distributeYVectors(pllInstance *localTree, pllInstance *tr, partitionList *localPr)
-{
-  size_t 
-    i,
-    n = localTree->numberOfThreads,
-    globalCounter = 0,
-    localCounter = 0,
-    model = 0, 
-    j; 
-  int tid = localTree->threadID; 
-  
-
-  /* distribute the y-vectors */
-  for(j = 1 ; j <= (size_t)localTree->mxtips; j++)	
-    {
-#ifdef _FINE_GRAIN_MPI
-      unsigned char yBuf[tr->originalCrunchedLength]; 	  
-      if(MASTER_P)
-	memcpy(yBuf, tr->yVector[j], tr->originalCrunchedLength * sizeof(unsigned char));
-      MPI_Bcast(  yBuf, tr->originalCrunchedLength, MPI_UNSIGNED_CHAR,0,MPI_COMM_WORLD); 
-#endif	  
-
-      for(model = 0, globalCounter = 0; model < (size_t)localPr->numberOfPartitions; model++)
-	{
-	  if(tr->manyPartitions)
-	    {
-	      if(isThisMyPartition(localPr, tid, model))
-		{
-		  assert(localPr->partitionData[model]->upper - localPr->partitionData[model]->lower == localPr->partitionData[model]->width);
-		  for(localCounter = 0, i = (size_t)localPr->partitionData[model]->lower;  i < (size_t)localPr->partitionData[model]->upper; i++, localCounter++, globalCounter++)
-#ifdef _USE_PTHREADS
-		    localPr->partitionData[model]->yVector[j][localCounter] = tr->yVector[j][globalCounter];
-#else 
-		  localPr->partitionData[model]->yVector[j][localCounter] = yBuf[globalCounter];
-#endif
-
-
-		}
-	      else
-		globalCounter += (localPr->partitionData[model]->upper - localPr->partitionData[model]->lower);
-	    }
-	  else 
-	    {
-	      for(localCounter = 0, i = (size_t)localPr->partitionData[model]->lower;  i < (size_t)localPr->partitionData[model]->upper; i++, globalCounter++)
-		{
-		  if(i % (size_t)n == (size_t)tid)
-		    {
-#ifdef _USE_PTHREADS
-		      localPr->partitionData[model]->yVector[j][localCounter] = tr->yVector[j][globalCounter];
-#else 
-		      localPr->partitionData[model]->yVector[j][localCounter] = yBuf[globalCounter];
-#endif
-		      ++localCounter; 
-		    }
-		}	   
-	    }
-	}
-    }
-}
-
-/** @brief Distribute the weights in the alignment of slave process/threads
-
-    Allocate space in the local tree structure for the alignment weights. Then
-    copy the weights vector from the master process/thread to the slaves.
-
-    @param tr 
-      PLL instance
-    
-    @param localTree 
-      Local library instance for the current process/thread
-
-    @param localPr
-      Local list of partitions for the current process/thread
-
-    @todo
-      The alignment weights should go to the partitions structure rather than the tree structure
- */ 
-static void distributeWeights(pllInstance *localTree, pllInstance *tr, partitionList *localPr)
-{
-  int tid = localTree->threadID; 
-  int n = localTree->numberOfThreads; 
-
-  size_t     
-    globalCounter = 0,
-    i,
-    localCounter  = 0,
-    model; 
-
-
-
-  /* distribute the weights  */
-#ifdef _FINE_GRAIN_MPI 		/* need to broadcast a few things first */
-  if(NOT MASTER_P)
-    tr->aliaswgt = rax_malloc(sizeof(int) * tr->originalCrunchedLength); 
-  MPI_Bcast(tr->aliaswgt, tr->originalCrunchedLength, MPI_INT, 0, MPI_COMM_WORLD);      
-#endif
-  for(model = 0, globalCounter = 0; model < (size_t)localPr->numberOfPartitions; model++)
-    { 
-      if(tr->manyPartitions)
-	{
-	  if(isThisMyPartition(localPr, tid, model))
-	    {
-	      assert(localPr->partitionData[model]->upper - localPr->partitionData[model]->lower == localPr->partitionData[model]->width);
-	      for(localCounter = 0, i = (size_t)localPr->partitionData[model]->lower;  i < (size_t)localPr->partitionData[model]->upper; i++, localCounter++, globalCounter++)
-		localPr->partitionData[model]->wgt[localCounter]          = tr->aliaswgt[globalCounter];
-	    }
-	  else
-	    globalCounter += (localPr->partitionData[model]->upper - localPr->partitionData[model]->lower);
-	}
-      else 
-	{ 
-	  for(localCounter = 0, i = (size_t)localPr->partitionData[model]->lower;  i < (size_t)localPr->partitionData[model]->upper; i++, globalCounter++)
-	    {
-	      if(i % (size_t)n == (size_t)tid)
-		localPr->partitionData[model]->wgt[localCounter++]       = tr->aliaswgt[globalCounter];
-	    }	   
-	}
-    }
-}
-
-
-/** @brief Initialize the partitioning scheme (master function) in parallel environment.
-    
-    Initialize the partition scheme in all processes/threads. This is a wrapper function
-    that calls all necessary functions for allocating the local structures for slave threads
-    and for distributing all necessary data from the master threads, such as alignment data,
-    and weight vectors.
-
-    @param tr 
-      PLL instance
-
-    @param localTree 
-      Local PLL instance for the slave process/thread
-
-    @param pr
-      List of partitions
-
-    @param localPr
-      Local partition structure for the slave process/thread
-
-    @param tid
-      Process/thread id
-
-    @param n 
-      Number of processes/threads
-*/ 
-static void initializePartitionsMaster(pllInstance *tr, pllInstance *localTree, partitionList *pr, partitionList *localPr, int tid, int n)
-{ 
-  size_t
-    model;
-
-  treeIsInitialized = PLL_TRUE; 
-
-  ASSIGN_INT(localTree->manyPartitions, tr->manyPartitions);
-  ASSIGN_INT(localTree->numberOfThreads, tr->numberOfThreads);
-  ASSIGN_INT(localPr->numberOfPartitions, pr->numberOfPartitions);
-
-#ifdef _USE_PTHREADS
-  if(MASTER_P)
-    globalResult = rax_calloc((size_t) tr->numberOfThreads * (size_t)pr->numberOfPartitions* 2 ,sizeof(double));
-  else 
-    assignAndInitPart1(localTree, tr, localPr, pr, &tid);
-#else 
-  globalResult = rax_calloc((size_t) tr->numberOfThreads * (size_t)pr->numberOfPartitions* 2 ,sizeof(double));
-  assignAndInitPart1(localTree, tr, localPr, pr, &tid);
-  defineTraversalInfoMPI();
-#endif
-
-  for(model = 0; model < (size_t)localPr->numberOfPartitions; model++)
-    localPr->partitionData[model]->width        = 0;
-
-  if(tr->manyPartitions)    
-    {
-      multiprocessorScheduling(localTree, localPr, tid);
-      computeFractionMany(localPr, tid);
-    }
-  else
-    computeFraction(localPr, tid, n);
-
-  initializePartitionData(localTree, localPr);
-
-  {
-    size_t 
-      model,  
-      i,      
-      countOffset,
-      myLength = 0;
-
-    for(model = 0; model < (size_t)localPr->numberOfPartitions; model++)
-      myLength += localPr->partitionData[model]->width;
-
-    /* assign local memory for storing sequence data */
-    
-    localTree->y_ptr = (unsigned char *)rax_malloc(myLength * (size_t)(localTree->mxtips) * sizeof(unsigned char));
-    assert(localTree->y_ptr != NULL);
-
-    for(i = 0; i < (size_t)localTree->mxtips; i++)
-      {
-	for(model = 0, countOffset = 0; model < (size_t)localPr->numberOfPartitions; model++)
-	  {	    
-	    localPr->partitionData[model]->yVector[i+1]   = &localTree->y_ptr[i * myLength + countOffset];
-	    countOffset +=  localPr->partitionData[model]->width;
-	  }
-	assert(countOffset == myLength);
-      }
-
-    /* figure in data */
-
-    distributeWeights(localTree, tr, localPr);
-
-    distributeYVectors(localTree, tr, localPr);
-
-  }
-
-  initMemorySavingAndRecom(localTree, localPr);
-}
diff --git a/pllrepo/src/genericParallelization.h b/pllrepo/src/genericParallelization.h
deleted file mode 100644
index 576f8e9..0000000
--- a/pllrepo/src/genericParallelization.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file genericParallelization.h
- */
-#ifndef _GENERIC_PARALL_H 
-#define _GENERIC_PARALL_H 
-
-
-extern double *globalResult; 
-
-
-/**********/
-/* CONFIG */
-/**********/
-
-/* #define MEASURE_TIME_PARALLEL */
-#define _PORTABLE_PTHREADS
-/* #define DEBUG_PARALLEL */ 
-/* #define DEBUG_MPI_EACH_SEND */
-/* #define _REPRODUCIBLE_MPI_OR_PTHREADS */
-#ifdef _USE_PTHREADS
-#ifndef _PORTABLE_PTHREADS
-void pinToCore(int tid);
-#endif
-#endif
-
-
-#define NOT ! 
-#define IS_PARALLEL (defined(_USE_PTHREADS) || defined(_FINE_GRAIN_MPI)) 
-
-
-
-#ifdef MEASURE_TIME_PARALLEL
-#define NUM_PAR_JOBS 16
-extern double masterTimePerPhase; 
-#endif
-
-
-/******************/
-/* MPI SPECIFIC   */
-/******************/
-#ifdef _FINE_GRAIN_MPI
-#include <mpi.h>
-#ifdef DEBUG_MPI_EACH_SEND
-#define DEBUG_PRINT(text, elem) printf(text, elem)
-#else 
-#define DEBUG_PRINT(text, elem) NULL
-#endif
-
-/* for the broadcast of traversal descriptor */
-#define TRAVERSAL_LENGTH 5
-#define traversalSize sizeof(traversalInfo)
-#define messageSize(x)   (3 * sizeof(int) +  x * (sizeof(int)+ sizeof(double)) + TRAVERSAL_LENGTH * traversalSize)
-
-#define VOLATILE_PAR 
-#define MASTER_P (processID == 0)
-#define POP_OR_PUT_BYTES(bufPtr, elem, type) (MASTER_P ? (bufPtr = addBytes((bufPtr), &(elem), sizeof(type))) : (bufPtr = popBytes((bufPtr), &(elem), sizeof(type))))
-
-#define ASSIGN_INT(x,y) (MPI_Bcast(&y,1,MPI_INT,0,MPI_COMM_WORLD),DEBUG_PRINT("\tSEND/RECV %d\n", y)) 
-#define ASSIGN_BUF(x,y,type) (POP_OR_PUT_BYTES(bufPtr, y,type))
-#define ASSIGN_BUF_DBL(x,y) (POP_OR_PUT_BYTES(bufPtrDbl,y, double))
-#define ASSIGN_DBL(x,y) (MPI_Bcast(&y,1,MPI_DOUBLE, 0, MPI_COMM_WORLD), DEBUG_PRINT("\tSEND/RECV %f\n", y)) 
-#define ASSIGN_DBLS(tar,src,length) MPI_Bcast(tar, length, MPI_DOUBLE, 0, MPI_COMM_WORLD)
-#define PLL_DOUBLE MPI_DOUBLE
-#define ASSIGN_GATHER(tar,src,length,type,tid) MPI_Gather(src,length,type,tar,length,type,0, MPI_COMM_WORLD)
-#define SEND_BUF(buf, bufSize,type) if(MASTER_P) MPI_Bcast(buf, bufSize, type, 0, MPI_COMM_WORLD) 
-#define RECV_BUF(buf, bufSize,type) if(NOT MASTER_P) MPI_Bcast(buf, bufSize, type, 0, MPI_COMM_WORLD) 
-#define BCAST_BUF(buf, bufSize,type,who)  MPI_Bcast(buf, bufSize, type, who,MPI_COMM_WORLD )
-
-
-
-extern int processes; 
-extern int processID; 
-#endif 
-
-/*********************/
-/* PTHREAD SPECIFIC  */
-/*********************/
-#ifdef _USE_PTHREADS
-#if defined (_MSC_VER)
-#include "pthread.h"
-#else
-#include <pthread.h>
-#endif
-#define _REPRODUCIBLE_MPI_OR_PTHREADS
-#define VOLATILE_PAR volatile 
-#define MASTER_P (tid == 0)
-#define ASSIGN_INT(x,y) (x = y)
-#define ASSIGN_BUF(x,y,type) (x = y)
-#define ASSIGN_BUF_DBL(x,y) (x = y)
-#define ASSIGN_DBL(x,y) (x = y)
-#define ASSIGN_DBLS(tar,src,length) memmove(tar, src, length * sizeof(double))
-#define PLL_DOUBLE double 	/* just rededining that to make the source code less confusing */
-#define ASSIGN_GATHER(tar,src,length,type,tid) (memmove((tar) + (tid) * (length) ,src, length * sizeof(type)))
-#define SEND_BUF(buf, bufSize, type) 
-#define RECV_BUF(buf, bufSize, type) 
-#define BCAST_BUF(buf, bufSize,type,who)  
-#define TRAVERSAL_LENGTH 5
-#define messageSize(x) 0
-#endif
-
-
-#endif	/* end include guard  */
diff --git a/pllrepo/src/globalVariables.h b/pllrepo/src/globalVariables.h
deleted file mode 100644
index 1c76da8..0000000
--- a/pllrepo/src/globalVariables.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/*  RAxML-VI-HPC (version 2.2) a program for sequential and parallel estimation of phylogenetic trees 
- *  Copyright August 2006 by Alexandros Stamatakis
- *
- *  Partially derived from
- *  fastDNAml, a program for estimation of phylogenetic trees from sequences by Gary J. Olsen
- *  
- *  and 
- *
- *  Programs of the PHYLIP package by Joe Felsenstein.
- *
- *  This program is free software; you may redistribute it and/or modify its
- *  under the terms of the GNU General Public License as published by the Free
- *  Software Foundation; either version 2 of the License, or (at your option)
- *  any later version.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- * 
- *
- *  For any other enquiries send an Email to Alexandros Stamatakis
- *  Alexandros.Stamatakis at epfl.ch
- *
- *  When publishing work that is based on the results from RAxML-VI-HPC please cite:
- *
- *  Alexandros Stamatakis:"RAxML-VI-HPC: maximum likelihood-based phylogenetic analyses with thousands of taxa and mixed models". 
- *  Bioinformatics 2006; doi: 10.1093/bioinformatics/btl446
- */
-
-#ifdef GLOBAL_VARIABLES_DEFINITION
-
-
-const char *protModels[PLL_NUM_PROT_MODELS] = {"DAYHOFF", "DCMUT", "JTT", "MTREV", "WAG", "RTREV", "CPREV", "VT", "BLOSUM62", "MTMAM", "LG", "MTART", "MTZOA", "PMB", 
-					   "HIVB", "HIVW", "JTTDCMUT", "FLU", "AUTO", "LG4M", "LG4X", "GTR"};
-
-const char binaryStateNames[2]   = {'0', '1'};  
-
-const char dnaStateNames[4]      = {'A', 'C', 'G', 'T'};
-
-const char protStateNames[20]    = {'A','R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 
-				    'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 
-				    'Y', 'V'};
-
-const char inverseMeaningBINARY[4] = {'_', '0', '1', '-'};
-const char inverseMeaningDNA[16]   = {'_', 'A', 'C', 'M', 'G', 'R', 'S', 'V', 'T', 'W', 'Y', 'H', 'K', 'D', 'B', '-'};
-const char inverseMeaningPROT[23]  = {'A','R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 
-			       'T', 'W', 'Y', 'V', 'B', 'Z', '-'};
-const char inverseMeaningGeneric32[33] = {'0', '1', '2', '3', '4', '5', '6', '7', 
-				    '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
-				    'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
-				    'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
-				    '-'};
-const char inverseMeaningGeneric64[33] = {'0', '1', '2', '3', '4', '5', '6', '7', 
-				    '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
-				    'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
-				    'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
-				    '-'};
-
-const unsigned int bitVectorIdentity[256] = {0 ,1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 ,9 ,10 ,11 ,12 ,13 ,14 ,15 ,16 ,17 ,18 ,19 ,20 ,21 ,22 ,23 ,24 ,25 ,26 ,
-					     27 ,28 ,29 ,30 ,31 ,32 ,33 ,34 ,35 ,36 ,37 ,38 ,39 ,40 ,41 ,42 ,43 ,44 ,45 ,46 ,47 ,48 ,49 ,50 ,51 ,
-					     52 ,53 ,54 ,55 ,56 ,57 ,58 ,59 ,60 ,61 ,62 ,63 ,64 ,65 ,66 ,67 ,68 ,69 ,70 ,71 ,72 ,73 ,74 ,75 ,76 ,
-					     77 ,78 ,79 ,80 ,81 ,82 ,83 ,84 ,85 ,86 ,87 ,88 ,89 ,90 ,91 ,92 ,93 ,94 ,95 ,96 ,97 ,98 ,99 ,100 ,101 ,
-					     102 ,103 ,104 ,105 ,106 ,107 ,108 ,109 ,110 ,111 ,112 ,113 ,114 ,115 ,116 ,117 ,118 ,119 ,120 ,121 ,122 ,
-					     123 ,124 ,125 ,126 ,127 ,128 ,129 ,130 ,131 ,132 ,133 ,134 ,135 ,136 ,137 ,138 ,139 ,140 ,141 ,142 ,143 ,
-					     144 ,145 ,146 ,147 ,148 ,149 ,150 ,151 ,152 ,153 ,154 ,155 ,156 ,157 ,158 ,159 ,160 ,161 ,162 ,163 ,164 ,
-					     165 ,166 ,167 ,168 ,169 ,170 ,171 ,172 ,173 ,174 ,175 ,176 ,177 ,178 ,179 ,180 ,181 ,182 ,183 ,184 ,185 ,
-					     186 ,187 ,188 ,189 ,190 ,191 ,192 ,193 ,194 ,195 ,196 ,197 ,198 ,199 ,200 ,201 ,202 ,203 ,204 ,205 ,206 ,
-					     207 ,208 ,209 ,210 ,211 ,212 ,213 ,214 ,215 ,216 ,217 ,218 ,219 ,220 ,221 ,222 ,223 ,224 ,225 ,226 ,227 ,
-					     228 ,229 ,230 ,231 ,232 ,233 ,234 ,235 ,236 ,237 ,238 ,239 ,240 ,241 ,242 ,243 ,244 ,245 ,246 ,247 ,248 ,
-					     249 ,250 ,251 ,252 ,253 ,254 ,255};
-
-
-
-const unsigned int bitVectorAA[23] = {1, 2, 4, 8, 16, 32, 64, 128, 
-				      256, 512, 1024, 2048, 4096, 
-				      8192, 16384, 32768, 65536, 131072, 262144, 
-				      524288, 12 /* N | D */, 96 /*Q | E*/, 1048575 /* - */};
-
-const unsigned int bitVectorSecondary[256] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
-					      10, 11, 12, 13, 14, 15, 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 
-					      208, 224, 240, 0, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 
-					      255, 0, 256, 512, 768, 1024, 1280, 1536, 1792, 2048, 2304, 2560, 2816, 3072, 3328, 
-					      3584, 3840, 0, 257, 514, 771, 1028, 1285, 1542, 1799, 2056, 2313, 2570, 2827, 3084, 
-					      3341, 3598, 3855, 0, 272, 544, 816, 1088, 1360, 1632, 1904, 2176, 2448, 2720, 2992, 
-					      3264, 3536, 3808, 4080, 0, 273, 546, 819, 1092, 1365, 1638, 1911, 2184, 2457, 2730, 
-					      3003, 3276, 3549, 3822, 4095, 0, 4096, 8192, 12288, 16384, 20480, 24576, 28672, 32768, 
-					      36864, 40960, 45056, 49152, 53248, 57344, 61440, 0, 4097, 8194, 12291, 16388, 20485, 24582, 
-					      28679, 32776, 36873, 40970, 45067, 49164, 53261, 57358, 61455, 0, 4112, 8224, 12336, 16448, 
-					      20560, 24672, 28784, 32896, 37008, 41120, 45232, 49344, 53456, 57568, 61680, 0, 4113, 8226, 
-					      12339, 16452, 20565, 24678, 28791, 32904, 37017, 41130, 45243, 49356, 53469, 57582, 61695, 
-					      0, 4352, 8704, 13056, 17408, 21760, 26112, 30464, 34816, 39168, 43520, 47872, 52224, 56576, 
-					      60928, 65280, 0, 4353, 8706, 13059, 17412, 21765, 26118, 30471, 34824, 39177, 43530, 47883, 
-					      52236, 56589, 60942, 65295, 0, 4368, 8736, 13104, 17472, 21840, 26208, 30576, 34944, 39312, 
-					      43680, 48048, 52416, 56784, 61152, 65520, 0, 4369, 8738, 13107, 17476, 21845, 26214, 30583, 
-					      34952, 39321, 43690, 48059, 52428, 56797, 61166, 65535};
-
-const unsigned int bitVector32[33] = {1,     2,    4,    8,   16,   32,    64,   128,
-                                      256, 512, 1024, 2048, 4096, 8192, 16384, 32768,
-                                      65536, 131072, 262144, 524288, 1048576, 2097152, 4194304, 8388608,
-                                      16777216, 33554432, 67108864, 134217728, 268435456, 536870912, 1073741824, 2147483648u, 
-				      4294967295u};
-
-/*const unsigned int bitVector64[65] = {};*/
-/** @brief Array for setting bits 0 .. 31 in a bit vector, used in saveMemory technique for the gapVector */
-const unsigned int mask32[32] = {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 
-					262144, 524288, 1048576, 2097152, 4194304, 8388608, 16777216, 33554432, 67108864, 134217728, 
-					268435456, 536870912, 1073741824, 2147483648U};
-
-const char *secondaryModelList[21] = { "S6A (GTR)", "S6B", "S6C", "S6D", "S6E", "S7A (GTR)", "S7B", "S7C", "S7D", "S7E", "S7F", "S16 (GTR)", "S16A", "S16B", "S16C", 
-				       "S16D", "S16E", "S16F", "S16I", "S16J", "S16K"};
-
-const partitionLengths pLengths[PLL_MAX_MODEL] = {
-  
-  /* BINARY */
-  {4,   4,   2,  4,  4, 1, 2,  8, 2, 2, PLL_FALSE, PLL_FALSE, 3, inverseMeaningBINARY, 2, PLL_FALSE, bitVectorIdentity},
-  
-  /* DNA */
-  {16,  16,  4, 16, 16, 6, 4, 64, 6, 4, PLL_FALSE, PLL_FALSE, 15, inverseMeaningDNA, 4, PLL_FALSE, bitVectorIdentity},
-        
-  /* AA */
-  {400, 400, 20, 400, 400, 190, 20, 460, 190, 20, PLL_FALSE, PLL_FALSE, 22, inverseMeaningPROT, 20, PLL_TRUE, bitVectorAA},
-  
-  /* SECONDARY_DATA */
-
-  {256, 256, 16, 256, 256, 120, 16, 4096, 120, 16, PLL_FALSE, PLL_FALSE, 255, (char*)NULL, 16, PLL_TRUE, bitVectorSecondary},
-
-  
-  /* SECONDARY_DATA_6 */
-  {36, 36,  6, 36, 36, 15, 6, 384, 15, 6, PLL_FALSE, PLL_FALSE, 63, (char*)NULL, 6, PLL_TRUE, bitVectorIdentity},
-
-  
-  /* SECONDARY_DATA_7 */
-  {49,   49,    7,   49, 49,  21, 7, 896, 21, 7, PLL_FALSE, PLL_FALSE, 127, (char*)NULL, 7, PLL_TRUE, bitVectorIdentity},
-
-  /* 32 states */
-  {1024, 1024, 32, 1024, 1024, 496, 32, 1056, 496, 32, PLL_FALSE, PLL_FALSE, 32, inverseMeaningGeneric32, 32, PLL_TRUE, bitVector32},
-  
-  /* 64 states */
-  {4096, 4096, 64, 4096, 4096, 2016, 64, 4160, 64, 2016, PLL_FALSE, PLL_FALSE, 64, (char*)NULL, 64, PLL_TRUE, (unsigned int*)NULL}
-};
-
-
-#if (defined(_USE_PTHREADS) || defined(_FINE_GRAIN_MPI))
-double *globalResult;
-pllBoolean treeIsInitialized;
-#ifdef MEASURE_TIME_PARALLEL
-double masterTimePerPhase; 
-#endif
-#endif
-
-#ifdef _USE_PTHREADS
-volatile int             jobCycle = 0;
-volatile int             threadJob = 0;
-volatile char            *barrierBuffer;
-#endif
-
-#ifdef _FINE_GRAIN_MPI
-int processes;
-int processID; 
-MPI_Datatype TRAVERSAL_MPI; 
-#endif
-
-#else
-extern const partitionLengths pLengths[PLL_MAX_MODEL];
-extern const char * protModels[PLL_NUM_PROT_MODELS];
-extern char * secondaryModelList[21];
-//extern const unsigned int * mask32;
-
-#endif
diff --git a/pllrepo/src/hardware.c b/pllrepo/src/hardware.c
deleted file mode 100644
index 3607568..0000000
--- a/pllrepo/src/hardware.c
+++ /dev/null
@@ -1,165 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/stat.h>
-#ifndef WIN32
-#include <unistd.h>
-#endif
-
-#include <string.h>
-#include "hardware.h"
-
-#define PLL_FEAT_AVAIL(x,y) (((x) & (y)) == (y))
-#define PLL_SYS_CPU_DIR_PATH "/sys/devices/system/cpu/"
-
-//#ifdef _MSC_VER
-//#define inline __inline
-//#endif
-
-static __inline void cpuid(unsigned int op, int count,
-                         unsigned int *eax, unsigned int *ebx,
-                         unsigned int *ecx, unsigned int *edx)
-{
-#ifdef WIN32
-	__int32 regs[4];
-	__cpuid((int*)regs, (int)op);
-	*eax = regs[0];
-	*ebx = regs[1];
-	*ecx = regs[2];
-	*edx = regs[3];
-#else
-	*eax = op;
-  *ecx = count;
-  asm volatile("cpuid"
-        : "=a" (*eax),
-          "=b" (*ebx),
-          "=c" (*ecx),
-          "=d" (*edx)
-
-        : "0" (*eax), "2" (*ecx)
-        : "memory");
-#endif
-}
-
-
-void show_hardware_info(pllHardwareInfo * hw)
-{
-  printf ("MMX.........: %d\n"
-          "SSE.........: %d\n"
-          "SSE2........: %d\n"
-          "SSE3........: %d\n"
-          "SSSE3.......: %d\n"
-          "FMA.........: %d\n"
-          "SSE4.1......: %d\n"
-          "SSE4.2......: %d\n"
-          "AVX.........: %d\n"
-          "AVX2........: %d\n"
-          "SSE4A.......: %d\n"
-          "FMA4........: %d\n\n"
-          "Core(s).....: %d\n"
-          "CPU Sockets.: %d\n",
-
-          hw->has_mmx, hw->has_sse, hw->has_sse2, hw->has_sse3, hw->has_ssse3,
-          hw->has_fma, hw->has_sse41, hw->has_sse42, hw->has_avx, hw->has_avx2,
-          hw->has_sse4a, hw->has_fma4, hw->cores, hw->cpu_sockets);
-}
-
-static int pll_probe_cpu (pllHardwareInfo * hw)
-{
-  struct stat cpustat;
-  char cpu[30];
-  char cpupath[100];
-  int i, id, max_physical_id = -1;
-  char * physical_id_path = "/topology/physical_package_id";
-  FILE * fd;
-
-  /* check whether the sys cpu dir exists */
-  if (stat(PLL_SYS_CPU_DIR_PATH, &cpustat)) return (0);
-  
-  /* and also check whether it is a dir */
-  if (!S_ISDIR(cpustat.st_mode)) return (0);
-
-  /* detect number of processors */
-  for (i = 0; ; ++i)
-   {
-     sprintf(cpu, "cpu%d", i);
-     strcpy (cpupath, PLL_SYS_CPU_DIR_PATH);
-     strcat (cpupath, cpu);
-     if (stat(cpupath, &cpustat)) break;
-
-     strcat (cpupath, physical_id_path);
-     if (!stat(cpupath, &cpustat))
-      {
-        fd = fopen (cpupath,"r");
-        fscanf (fd, "%d", &id);
-        /* printf ("Detected processor %d belonging to package %d\n", i, id); */
-        if (id > max_physical_id) max_physical_id = id;
-        fclose (fd);
-      }
-   }
-  
-  hw->cores       = i;
-  hw->cpu_sockets = max_physical_id + 1;
-
-  return (1);
-}
-
-static void pll_probe_hardware (pllHardwareInfo * hw)
-{
-  unsigned int a, b, c, d;
-  c = 0;
-
-  cpuid(0,0,&a,&b,&c,&d);
-  *((unsigned int *)(hw->vendor)    ) = b;
-  *((unsigned int *)(hw->vendor + 4)) = d;
-  *((unsigned int *)(hw->vendor + 8)) = c;
-  hw->vendor[12] = 0;
-
-  printf ("%s\n", hw->vendor);
-
-  cpuid(1,0,&a,&b,&c,&d);
-
-  hw->has_mmx   = PLL_FEAT_AVAIL(d,PLL_HAS_MMX); 
-  hw->has_sse   = PLL_FEAT_AVAIL(d,PLL_HAS_SSE);
-  hw->has_sse2  = PLL_FEAT_AVAIL(d,PLL_HAS_SSE2);
-
-  hw->has_sse3  = PLL_FEAT_AVAIL(c,PLL_HAS_SSE3);
-  hw->has_ssse3 = PLL_FEAT_AVAIL(c,PLL_HAS_SSSE3);
-  hw->has_fma   = PLL_FEAT_AVAIL(c,PLL_HAS_FMA);
-  hw->has_sse41 = PLL_FEAT_AVAIL(c,PLL_HAS_SSE41);
-  hw->has_sse42 = PLL_FEAT_AVAIL(c,PLL_HAS_SSE42);
-  hw->has_avx   = PLL_FEAT_AVAIL(c,PLL_HAS_AVX);
-
-  cpuid(7,0,&a,&b,&c,&d);
-
-  hw->has_avx2  = PLL_FEAT_AVAIL(b,PLL_HAS_AVX2);
-
-  /* TODO: note, here we have to check whether leaf 0x80000001 exists */
-  cpuid(0x80000001,0,&a,&b,&c,&d);
-
-  hw->has_sse4a = PLL_FEAT_AVAIL(c,PLL_HAS_SSE4A);
-  hw->has_fma4  = PLL_FEAT_AVAIL(c,PLL_HAS_FMA4);
-}
-
-int pllGetHardwareInfo (pllHardwareInfo * hw)
-{
-  pll_probe_hardware (hw);
-  pll_probe_cpu (hw);
-
-  /* TODO: finish failure checks in probe_hardware and probe_cpu */
-  return (1);
-
-}
-
-/* TODO: Remove after testing */
-/* 
-int main (int argc, char * argv[])
-{ 
-  pllHardwareInfo hw;
-
-  pll_probe_hardware(&hw);
-  pll_probe_cpu(&hw);
-
-  show_hardware_info(&hw);
-  return (EXIT_SUCCESS);
-}
-*/
diff --git a/pllrepo/src/hardware.h b/pllrepo/src/hardware.h
deleted file mode 100644
index d1bfa33..0000000
--- a/pllrepo/src/hardware.h
+++ /dev/null
@@ -1,48 +0,0 @@
-#ifndef PLL_HARDWARE
-#define PLL_HARDWARE
-
-/* leaf 1 */
-/* edx */
-#define PLL_HAS_MMX             1 << 23
-#define PLL_HAS_SSE             1 << 25
-#define PLL_HAS_SSE2            1 << 26
-
-/* ecx */
-#define PLL_HAS_SSE3            1
-#define PLL_HAS_SSSE3           1 <<  9
-#define PLL_HAS_FMA             1 << 12
-#define PLL_HAS_SSE41           1 << 19
-#define PLL_HAS_SSE42           1 << 20
-#define PLL_HAS_AVX             1 << 28
-
-
-/* leaf 7 */
-/* ebx */
-#define PLL_HAS_AVX2            1 <<  5
-
-/* leaf 0x80000001 */
-/* ecx*/
-#define PLL_HAS_SSE4A           1 <<  6
-#define PLL_HAS_FMA4            1 << 16
-
-typedef struct
-{
-  int has_mmx;
-  int has_sse;
-  int has_sse2;
-  int has_sse3;
-  int has_ssse3;
-  int has_sse41;
-  int has_sse42;
-  int has_sse4a;
-  int has_avx;
-  int has_avx2;
-  int has_fma;
-  int has_fma4;
-  int cpu_sockets;
-  int cores;
-  char vendor[13];
-
-} pllHardwareInfo;
-
-#endif
diff --git a/pllrepo/src/hash.c b/pllrepo/src/hash.c
deleted file mode 100644
index 4a68225..0000000
--- a/pllrepo/src/hash.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file hash.c
- */
-#include <stdio.h>
-#include <string.h>
-#include "pll.h"
-#include "mem_alloc.h"
-
-static const unsigned int initTable[] = 
-  {
-    53,         97,         193,       389,       769,    
-    1543,       3079,       6151,      12289,     24593, 
-    49157,      98317,      196613,    393241,    786433, 
-    1572869,    3145739,    6291469,   12582917,  25165843, 
-    50331653,   100663319,  201326611, 402653189, 805306457, 
-    1610612741, 3221225473, 4294967291
-  };
-       
-/** @brief Generate the hash value for a string 
-
-    Generates the hash value of a string \a s.
-
-    @param s     The string to compute the hash for
-    @param size  Size of the hash table
-    @return      String hash \a s, i.e. index in hash table
-*/
-unsigned int pllHashString (const char * s, unsigned int size)
-{
-  unsigned int hash = 0;
-
-  for (; *s; ++s) hash = (hash << 5) - hash + (unsigned int )*s;
-
-  return (hash % size);
-}
-
-/** @brief Add a string and its data to a hashtable
-    
-    Add an \a item and possibly a string \a s to hashtable \a hTable at position
-    \a hash, where \a hash must be a value between 0 and \a hTable->size - 1. If
-    string \a s is given and another record with the same computed hash and the
-    same associated string exists in the hash table, then the new record will \b not be added and the
-    value \b PLL_FALSE is returned. Otherwise, the new item is added at the
-    beginning of the corresponding linked list and the value \b PLL_TRUE is
-    returned.
-
-    @param hTable Hashtable
-    @param hash   Position where to store in hash table
-    @param s      String
-    @param item   Data associated with \a s
-    @return       Returns \b PLL_TRUE if added with success, otherwise \b PLL_FALSE
-*/
-int pllHashAdd  (pllHashTable * hTable, unsigned int hash, const char * s, void * item)
-{
-  pllHashItem * hItem;
-
-  hItem = hTable->Items[hash];
-
-  /* If a string was given, check whether the record already exists */
-  if (s)
-   {
-     for (; hItem; hItem = hItem->next)
-      {
-        if (hItem->str && !strcmp (s, hItem->str)) return (PLL_FALSE);
-      }
-   }
-
-  hItem = (pllHashItem *) rax_malloc (sizeof (pllHashItem));
-
-  /* store the string together with the element if given */
-  if (s)
-   {
-     hItem->str = (char *) rax_malloc ((strlen(s) + 1) * sizeof (char));
-     strcpy (hItem->str, s);
-   }
-  else
-   hItem->str = NULL;
-
-  hItem->data = item;
-
-  hItem->next = hTable->Items[hash];
-  hTable->Items[hash] = hItem;
-  hTable->entries += 1;
-
-  return (PLL_TRUE);
-}
-
-       
-/** @brief Initialize hash table
-    
-    Create a hash table of size at least \a n. The size of the hash table will
-    be the first prime number higher or equal to \a n.
-
-    @param n  Minimum size of hash table
-    @return   In case of success, returns a pointer to the created hash table, otherwise returns \b NULL
-*/
-pllHashTable * pllHashInit (unsigned int n)
-{ 
-  pllHashTable * hTable;
-  unsigned int i;
-  unsigned int primeTableLength;
-       
-  hTable = (pllHashTable *) rax_malloc (sizeof (pllHashTable));
-  if (!hTable) return (NULL);
-  
-  primeTableLength = sizeof (initTable) / sizeof(initTable[0]);
-
-  i = 0;
- 
-  while (initTable[i] < n && i < primeTableLength) ++ i;
- 
-  n = initTable[i];  
- 
-  hTable->Items = (pllHashItem **) rax_calloc (n, sizeof (pllHashItem *));
-  if (!hTable->Items)
-   {
-     rax_free (hTable);
-     return (NULL);
-   }
-  hTable->size    = n;
-  hTable->entries = 0;
- 
-  return (hTable);
-}
-
-/** @brief Retrieve the data stored in hash table for a given string
-
-    Retrieve the data stored in hash table \a hTable under a given string \a s.
-    In case the string is found in the hash table, the associated data are
-    stored in \a item and the function returns \b PLL_TRUE. In the opposite
-    case, or if \a s is given as \b NULL then \b PLL_FALSE is returned.
-
-    @param hTable   Hash table to be searched
-    @param s        String to look for
-    @param item     Where to store the retrieved data
-    @return         Returns \b PLL_TRUE if the string was found, otherwise \b PLL_FALSE
-*/
-int pllHashSearch (pllHashTable * hTable, char * s, void ** item)
-{
-  unsigned int pos;
-  pllHashItem * hItem;
-
-  if (!s) return (PLL_FALSE);
-
-  pos   = pllHashString (s, hTable->size);
-  hItem = hTable->Items[pos];
-
-  for (; hItem; hItem = hItem->next)
-   {
-     if (hItem->str && !strcmp (s, hItem->str))
-      {
-        *item = hItem->data;
-        return (PLL_TRUE);
-      }
-   }
-
-  return (PLL_FALSE);
-}
-
-/** @brief Deallocate a hash table
-
-    Deallocates the hash table. A callback function may be specified as \a
-    cbDealloc which will be executed upon all \a data elements of the hash
-    table, for deallocating custom data. If no deallocation is required for the
-    custom data, then \a cbDealloc must be set to \b NULL. The strings
-    associated with each hash element are deallocated.
-
-    @param hTable    Hash table to be deallocated
-    @pram  cbDealloc Callback function to perform deallocation of each data element of the hash table
-    @notes
-      Deallocates the structure for the hash table. Note that the 
-      data associated with the indexed strings are not deallocated.
-*/
-void pllHashDestroy (pllHashTable ** hTable, void (*cbDealloc)(void *))
-{
-  unsigned int i;
-  pllHashItem * hItem;
-  pllHashItem * tmp;
-
-  for (i = 0; i < (*hTable)->size; ++ i)
-  {
-    hItem = (*hTable)->Items[i];
-    while (hItem)
-     {
-       tmp   = hItem;
-       hItem = hItem->next;
-       if (tmp->str)  rax_free (tmp->str);
-       if (cbDealloc) cbDealloc (tmp->data);
-       rax_free (tmp);
-     }
-  }
-  rax_free ((*hTable)->Items);
-  rax_free (*hTable);
-  *hTable = NULL;
-}
diff --git a/pllrepo/src/hash.h b/pllrepo/src/hash.h
deleted file mode 100644
index a550f38..0000000
--- a/pllrepo/src/hash.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file hash.h
- */
-#ifndef __pll_HASH__
-#define __pll_HASH__
-
-struct pllHashItem
-{
-  void * data;
-  char * str;
-  struct pllHashItem * next;
-};
-
-struct pllHashTable
-{
-  unsigned int size;
-  struct pllHashItem ** Items;
-};
-
-unsigned int pllHashString (const char * s, unsigned int size);
-int pllHashAdd  (struct pllHashTable * hTable, const char * s, void * item);
-struct pllHashTable * pllHashInit (unsigned int n);
-int pllHashSearch (struct pllHashTable * hTable, char * s, void ** item);
-void pllHashDestroy (struct pllHashTable ** hTable, int);
-#endif
diff --git a/pllrepo/src/lexer.c b/pllrepo/src/lexer.c
deleted file mode 100644
index 1cbf614..0000000
--- a/pllrepo/src/lexer.c
+++ /dev/null
@@ -1,299 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file lexer.c
- */
-#include <stdio.h>
-#include "lexer.h"
-
-static const char * rawtext;
-static long rawtext_size;
-static long pos = 0;
-
-int lex_table[PLL_ASCII_SIZE] = {
-/*      */ PLL_SYM_UNKNOWN, PLL_SYM_UNKNOWN, PLL_SYM_UNKNOWN,   PLL_SYM_UNKNOWN,
-/*      */ PLL_SYM_UNKNOWN, PLL_SYM_UNKNOWN, PLL_SYM_UNKNOWN,   PLL_SYM_UNKNOWN,
-/*      */ PLL_SYM_UNKNOWN,     PLL_SYM_TAB,      PLL_SYM_CR,   PLL_SYM_UNKNOWN,
-/*      */ PLL_SYM_UNKNOWN,      PLL_SYM_LF, PLL_SYM_UNKNOWN,   PLL_SYM_UNKNOWN,
-/*      */ PLL_SYM_UNKNOWN, PLL_SYM_UNKNOWN, PLL_SYM_UNKNOWN,   PLL_SYM_UNKNOWN,
-/*      */ PLL_SYM_UNKNOWN, PLL_SYM_UNKNOWN, PLL_SYM_UNKNOWN,   PLL_SYM_UNKNOWN,
-/*      */ PLL_SYM_UNKNOWN, PLL_SYM_UNKNOWN, PLL_SYM_UNKNOWN,   PLL_SYM_UNKNOWN,
-/*      */ PLL_SYM_UNKNOWN, PLL_SYM_UNKNOWN, PLL_SYM_UNKNOWN,   PLL_SYM_UNKNOWN,
-/*  !"# */   PLL_SYM_SPACE, PLL_SYM_UNKNOWN, PLL_SYM_UNKNOWN,   PLL_SYM_UNKNOWN,
-/* $%&' */ PLL_SYM_UNKNOWN, PLL_SYM_UNKNOWN, PLL_SYM_UNKNOWN,   PLL_SYM_UNKNOWN,
-/* ()*+ */  PLL_SYM_OPAREN,  PLL_SYM_CPAREN, PLL_SYM_UNKNOWN,      PLL_SYM_PLUS,
-/* ,-./ */   PLL_SYM_COMMA,    PLL_SYM_DASH,     PLL_SYM_DOT,     PLL_SYM_SLASH,
-/* 0123 */   PLL_SYM_DIGIT,   PLL_SYM_DIGIT,   PLL_SYM_DIGIT,     PLL_SYM_DIGIT,
-/* 4567 */   PLL_SYM_DIGIT,   PLL_SYM_DIGIT,   PLL_SYM_DIGIT,     PLL_SYM_DIGIT,
-/* 89:; */   PLL_SYM_DIGIT,   PLL_SYM_DIGIT,   PLL_SYM_COLON, PLL_SYM_SEMICOLON,
-/* <=>? */ PLL_SYM_UNKNOWN,   PLL_SYM_EQUAL, PLL_SYM_UNKNOWN,      PLL_SYM_CHAR,
-/* @ABC */ PLL_SYM_UNKNOWN,    PLL_SYM_CHAR,    PLL_SYM_CHAR,      PLL_SYM_CHAR,
-/* DEFG */    PLL_SYM_CHAR,    PLL_SYM_CHAR,    PLL_SYM_CHAR,      PLL_SYM_CHAR,
-/* HIJK */    PLL_SYM_CHAR,    PLL_SYM_CHAR,    PLL_SYM_CHAR,      PLL_SYM_CHAR,
-/* LMNO */    PLL_SYM_CHAR,    PLL_SYM_CHAR,    PLL_SYM_CHAR,      PLL_SYM_CHAR,
-/* PQRS */    PLL_SYM_CHAR,    PLL_SYM_CHAR,    PLL_SYM_CHAR,      PLL_SYM_CHAR,
-/* TUVW */    PLL_SYM_CHAR,    PLL_SYM_CHAR,    PLL_SYM_CHAR,      PLL_SYM_CHAR,
-/* XYZ[ */    PLL_SYM_CHAR,    PLL_SYM_CHAR,    PLL_SYM_CHAR,   PLL_SYM_UNKNOWN,
-/* \]^_ */ PLL_SYM_UNKNOWN, PLL_SYM_UNKNOWN, PLL_SYM_UNKNOWN,      PLL_SYM_CHAR,
-/* `abc */ PLL_SYM_UNKNOWN,    PLL_SYM_CHAR,    PLL_SYM_CHAR,      PLL_SYM_CHAR,
-/* defg */    PLL_SYM_CHAR,    PLL_SYM_CHAR,    PLL_SYM_CHAR,      PLL_SYM_CHAR,
-/* hijk */    PLL_SYM_CHAR,    PLL_SYM_CHAR,    PLL_SYM_CHAR,      PLL_SYM_CHAR,
-/* lmno */    PLL_SYM_CHAR,    PLL_SYM_CHAR,    PLL_SYM_CHAR,      PLL_SYM_CHAR,
-/* pqrs */    PLL_SYM_CHAR,    PLL_SYM_CHAR,    PLL_SYM_CHAR,      PLL_SYM_CHAR,
-/* tuvw */    PLL_SYM_CHAR,    PLL_SYM_CHAR,    PLL_SYM_CHAR,      PLL_SYM_CHAR,
-/* xyz{ */    PLL_SYM_CHAR,    PLL_SYM_CHAR,    PLL_SYM_CHAR,   PLL_SYM_UNKNOWN,
-/* |}~  */    PLL_SYM_CHAR, PLL_SYM_UNKNOWN, PLL_SYM_UNKNOWN,   PLL_SYM_UNKNOWN
- };
-
-int 
-get_next_byte (void)
-{
-  if (pos == rawtext_size) 
-   {
-     ++pos;
-     return (PLL_EOS);
-   }
-
-  return (rawtext[pos++]);
-}
-
-int
-get_next_symbol (void)
-{
-  int ch, sym;
-
-  ch = get_next_byte ();
-
-  if (ch == PLL_EOS) return (PLL_SYM_EOF);
-  if (ch >= PLL_ASCII_SIZE) return (PLL_SYM_UNKNOWN);
-
-  sym = lex_table[ch];
-
-  if (sym == PLL_SYM_LF)
-   {
-     if (get_next_byte() == '\n')
-      {
-        sym = PLL_SYM_LFCR;
-      }
-     else
-      {
-        --pos;
-      }
-   }
-
-  return sym;
-}
-
-pllLexToken
-get_token (int * input)
-{
-  pllLexToken token;
-  int
-    start_pos,
-    isFloating = 0;
-
-  token.lexeme = rawtext + pos - 1;
-  start_pos    = pos;
-
-  switch (*input)
-   {
-     case PLL_SYM_SLASH:
-       token.tokenType = PLL_TOKEN_SLASH;
-       *input = get_next_symbol();
-       break;
-
-     case PLL_SYM_DASH:
-       token.tokenType = PLL_TOKEN_DASH;
-       *input = get_next_symbol();
-       break;
-
-     case PLL_SYM_EQUAL:
-       token.tokenType = PLL_TOKEN_EQUAL;
-       *input = get_next_symbol();
-       break;
-
-     case PLL_SYM_SEMICOLON:
-       token.tokenType = PLL_TOKEN_SEMICOLON;
-       *input = get_next_symbol();
-       break;
-
-     case PLL_SYM_COMMA:
-       token.tokenType = PLL_TOKEN_COMMA;
-       *input = get_next_symbol();
-       break;
-
-     case PLL_SYM_COLON:
-       token.tokenType = PLL_TOKEN_COLON;
-       *input = get_next_symbol();
-       break;
-
-     case PLL_SYM_OPAREN:
-       token.tokenType = PLL_TOKEN_OPAREN;
-       *input = get_next_symbol();
-       break;
-
-     case PLL_SYM_CPAREN:
-       token.tokenType = PLL_TOKEN_CPAREN;
-       *input = get_next_symbol();
-       break;
-
-     case PLL_SYM_SPACE:
-     case PLL_SYM_TAB:
-       do
-        {
-          *input = get_next_symbol();
-        } while (*input == PLL_SYM_SPACE || *input == PLL_SYM_TAB);
-       token.len   = pos - start_pos;
-       token.tokenType = PLL_TOKEN_WHITESPACE; 
-       if (*input == PLL_SYM_LFCR) --token.len;
-       break;
-       
-     case PLL_SYM_DIGIT:
-       do
-        {
-          *input = get_next_symbol();   
-        } while (*input == PLL_SYM_DIGIT);
-
-       if (*input == PLL_SYM_DOT)
-        {
-          isFloating = 1;
-          do
-           {
-             *input = get_next_symbol ();
-           } while (*input == PLL_SYM_DIGIT);
-        }
-
-       if (*input != PLL_SYM_CHAR)
-        {
-          token.len   = pos - start_pos;
-          if (!isFloating)
-            token.tokenType = PLL_TOKEN_NUMBER;
-          else
-            token.tokenType = PLL_TOKEN_FLOAT;
-        }
-       else
-        {
-          /* check for E notation */
-          if (rawtext[pos - 1] == 'E' || rawtext[pos - 1] == 'e')
-           {
-             *input = get_next_symbol ();
-
-             if (*input == PLL_SYM_PLUS || *input == PLL_SYM_DASH || *input == PLL_SYM_DIGIT)
-              {
-                do
-                 {
-                   *input = get_next_symbol ();
-                 } while (*input == PLL_SYM_DIGIT);
-
-                if (*input != PLL_SYM_CHAR)
-                 {
-                   token.len = pos - start_pos;
-                   token.tokenType = PLL_TOKEN_FLOAT;
-                 }
-              }
-             else
-              {
-                token.len = pos - start_pos;
-                token.tokenType = PLL_TOKEN_STRING;
-              }
-           }
-
-          if (*input == PLL_SYM_CHAR)
-           {
-             do {
-               *input = get_next_symbol();
-             } while (*input == PLL_SYM_CHAR || *input == PLL_SYM_DIGIT || *input == PLL_SYM_DOT);
-             token.len   = pos - start_pos;
-             token.tokenType = PLL_TOKEN_STRING;
-           }
-        }
-
-       if (*input == PLL_SYM_LFCR) --token.len;
-       break;
-
-     case PLL_SYM_CHAR:
-       do
-        {
-          *input = get_next_symbol();
-        } 
-       while (*input == PLL_SYM_CHAR  || 
-              *input == PLL_SYM_DIGIT || 
-              *input == PLL_SYM_DASH  ||
-              *input == PLL_SYM_DOT);
-       token.len   = pos - start_pos;
-       token.tokenType = PLL_TOKEN_STRING;
-       if (*input == PLL_SYM_LFCR) --token.len;
-       break;
-       
-     case PLL_SYM_EOF:
-       token.tokenType = PLL_TOKEN_EOF;
-       break;
-
-     case PLL_SYM_CR:
-     case PLL_SYM_LF:
-     case PLL_SYM_LFCR:
-       do
-        {
-          *input = get_next_symbol();
-        } while (*input == PLL_SYM_CR || *input == PLL_SYM_LFCR || *input == PLL_SYM_LF);
-       token.tokenType = PLL_TOKEN_NEWLINE;
-       break;
-     case PLL_SYM_UNKNOWN:
-     default:
-       token.tokenType = PLL_TOKEN_UNKNOWN;
-       break;
-   }
-
-  return (token);
-}
-
-void
-lex_table_amend_phylip (void)
-{
-  lex_table['-'] = lex_table['.'] = PLL_SYM_CHAR; 
-}
-
-void
-lex_table_amend_fasta (void)
-{
-  lex_table['-'] = lex_table['.'] = lex_table['>'] = PLL_SYM_CHAR; 
-}
-
-void
-lex_table_restore (void)
-{
-  lex_table['-'] = PLL_SYM_DASH;
-  lex_table['.'] = PLL_SYM_DOT; 
-  lex_table['>'] = PLL_SYM_UNKNOWN;
-}
-
-void
-init_lexan (const char * text, long n)
-{
-  rawtext      = text;
-  rawtext_size = n;
-  pos          = 0;
-}
diff --git a/pllrepo/src/lexer.h b/pllrepo/src/lexer.h
deleted file mode 100644
index 6924259..0000000
--- a/pllrepo/src/lexer.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file lexer.h
- */
-#ifndef __pll_LEXER__
-#define __pll_LEXER__
-
-#define  PLL_ASCII_SIZE                128
-#define  PLL_EOS                       0x00000200
-
-#define  PLL_SYM_CR                    1 << 0
-#define  PLL_SYM_LF                    1 << 1
-#define  PLL_SYM_LFCR                  1 << 2
-#define  PLL_SYM_DIGIT                 1 << 3
-#define  PLL_SYM_CHAR                  1 << 4
-#define  PLL_SYM_SPACE                 1 << 5
-#define  PLL_SYM_TAB                   1 << 6
-#define  PLL_SYM_EOF                   1 << 7
-#define  PLL_SYM_UNKNOWN               1 << 8
-#define  PLL_SYM_DOT                   1 << 9
-#define  PLL_SYM_COLON                 1 << 10
-#define  PLL_SYM_OPAREN                1 << 11
-#define  PLL_SYM_CPAREN                1 << 12
-#define  PLL_SYM_COMMA                 1 << 13
-#define  PLL_SYM_SEMICOLON             1 << 14
-#define  PLL_SYM_EQUAL                 1 << 15
-#define  PLL_SYM_DASH                  1 << 16
-#define  PLL_SYM_SLASH                 1 << 17
-#define  PLL_SYM_PLUS                  1 << 18
-
-#define  PLL_TOKEN_NUMBER              1 << 0
-#define  PLL_TOKEN_STRING              1 << 1
-#define  PLL_TOKEN_EOF                 1 << 2
-#define  PLL_TOKEN_WHITESPACE          1 << 3
-#define  PLL_TOKEN_NEWLINE             1 << 4
-#define  PLL_TOKEN_UNKNOWN             1 << 5
-#define  PLL_TOKEN_COLON               1 << 6
-#define  PLL_TOKEN_OPAREN              1 << 7
-#define  PLL_TOKEN_CPAREN              1 << 8
-#define  PLL_TOKEN_FLOAT               1 << 9
-#define  PLL_TOKEN_COMMA               1 << 10
-#define  PLL_TOKEN_SEMICOLON           1 << 11
-#define  PLL_TOKEN_EQUAL               1 << 12
-#define  PLL_TOKEN_DASH                1 << 13
-#define  PLL_TOKEN_SLASH               1 << 14
-
-#define CONSUME(x)         while (token.tokenType & (x)) token = get_token (&input);
-#define NEXT_TOKEN         token = get_token (&input);
-
-typedef struct
- {
-   int 	        tokenType;
-   const char * lexeme;
-   int          len;
- } pllLexToken;
-
-int get_next_byte (void);
-int get_next_symbol (void);
-pllLexToken get_token (int * input);
-void init_lexan (const char * text, long n);
-void lex_table_amend_phylip (void);
-void lex_table_amend_fasta (void);
-void lex_table_restore (void);
-#endif
diff --git a/pllrepo/src/makenewzGenericSpecial.c b/pllrepo/src/makenewzGenericSpecial.c
deleted file mode 100644
index b2b114a..0000000
--- a/pllrepo/src/makenewzGenericSpecial.c
+++ /dev/null
@@ -1,3145 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file bipartitionList.c
- */
-#include "mem_alloc.h"
-
-#ifndef WIN32
-#include <unistd.h>
-#endif
-
-#include <math.h>
-#include <time.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <assert.h>
-
-#include "pll.h"
-#include "pllInternal.h"
-
-#ifdef __SSE3
-#include <xmmintrin.h>
-#include <pmmintrin.h>
-/*#include <tmmintrin.h>*/
-#endif
-
-#ifdef __MIC_NATIVE
-#include "mic_native.h"
-#endif
-
-
-/** @file makenewzGenericSpecial.c
- *  
- *  @brief Branch length optimization
- */
-
-
-
-/* pointers to reduction buffers for storing and gathering the first and second derivative 
-   of the likelihood in Pthreads and MPI */
-
-#if IS_PARALLEL
-void branchLength_parallelReduce(pllInstance *tr, double *dlnLdlz,  double *d2lnLdlz2, int numBranches ) ;
-//extern double *globalResult;
-#endif
-
-
-extern const unsigned int mask32[32];
-
-#if (defined(__SSE3) || defined(__AVX))
-static void sumGAMMA_BINARY(int tipCase, double *sumtable, double *x1_start, double *x2_start, double *tipVector,
-                            unsigned char *tipX1, unsigned char *tipX2, int n);
-static void coreGTRGAMMA_BINARY(const int upper, double *sumtable,
-                                volatile double *d1,   volatile double *d2, double *EIGN, double *gammaRates, double lz, int *wrptr);
-static void coreGTRCAT_BINARY(int upper, int numberOfCategories, double *sum,
-                              volatile double *d1, volatile double *d2, 
-                              double *rptr, double *EIGN, int *cptr, double lz, int *wgt);
-static void sumCAT_BINARY(int tipCase, double *sum, double *x1_start, double *x2_start, double *tipVector,
-                          unsigned char *tipX1, unsigned char *tipX2, int n);
-#endif
-
-/*******************/
-
-
-/* generic function to get the required pointers to the data associated with the left and right node that define a branch */
-
-static void getVects(pllInstance *tr, 
-                     partitionList *pr, 
-                     unsigned char **tipX1, unsigned char **tipX2, 
-                     double **x1_start, double **x2_start, 
-                     int *tipCase, 
-                     int model, 
-                     double **x1_gapColumn, double **x2_gapColumn, 
-                     unsigned int **x1_gap, unsigned int **x2_gap,
-                     double ** x1_start_asc,
-                     double ** x2_start_asc)
-{
-  int    
-    rateHet = (int)discreteRateCategories(tr->rateHetModel),
-            states = pr->partitionData[model]->states,
-            pNumber, 
-            qNumber; 
-
-  /* get the left and right node number of the nodes defining the branch we want to optimize */
-
-  pNumber = tr->td[0].ti[0].pNumber;
-  qNumber = tr->td[0].ti[0].qNumber;
-
-  /* get the index where the ancestral vector is expected to be found */
-  int p_slot, q_slot;
-  if(tr->useRecom)
-  {
-    p_slot = tr->td[0].ti[0].slot_p; 
-    q_slot = tr->td[0].ti[0].slot_q;
-  }
-  else
-  {
-    p_slot = pNumber - tr->mxtips - 1;
-    q_slot = qNumber - tr->mxtips - 1;
-  }
-   
-
-  /* initialize to NULL */
-
-  *x1_start = (double*)NULL,
-  *x2_start = (double*)NULL;
-  
-  *tipX1 = (unsigned char*)NULL,
-  *tipX2 = (unsigned char*)NULL;
-
-  *x1_start_asc = NULL;
-  *x2_start_asc = NULL;
-
-  /* switch over the different tip cases again here */
-
-  if(isTip(pNumber, tr->mxtips) || isTip(qNumber, tr->mxtips))
-  {      
-    if(!( isTip(pNumber, tr->mxtips) && isTip(qNumber, tr->mxtips)) )
-    {
-      *tipCase = PLL_TIP_INNER;
-      if(isTip(qNumber, tr->mxtips))
-      {
-        *tipX1 = pr->partitionData[model]->yVector[qNumber];
-        *x2_start = pr->partitionData[model]->xVector[p_slot];
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-        if(pr->partitionData[model]->ascBias && tr->threadID == 0)
-#else
-          if(pr->partitionData[model]->ascBias)
-#endif
-          {
-            *x2_start_asc = &pr->partitionData[model]->ascVector[(pNumber - tr->mxtips - 1) * pr->partitionData[model]->ascOffset];
-          }
-
-        if(tr->saveMemory)
-        {
-          *x2_gap = &(pr->partitionData[model]->gapVector[pNumber * pr->partitionData[model]->gapVectorLength]);
-          *x2_gapColumn   = &pr->partitionData[model]->gapColumn[(pNumber - tr->mxtips - 1) * states * rateHet];
-        }
-      }
-      else
-      {
-        *tipX1 = pr->partitionData[model]->yVector[pNumber];
-        *x2_start = pr->partitionData[model]->xVector[q_slot];
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-        if(pr->partitionData[model]->ascBias && tr->threadID == 0)
-#else
-          if(pr->partitionData[model]->ascBias)
-#endif  
-          {
-            *x2_start_asc = &pr->partitionData[model]->ascVector[(qNumber - tr->mxtips - 1) * pr->partitionData[model]->ascOffset];
-          }
-
-        if(tr->saveMemory)
-        {
-          *x2_gap = &(pr->partitionData[model]->gapVector[qNumber * pr->partitionData[model]->gapVectorLength]);
-          *x2_gapColumn   = &pr->partitionData[model]->gapColumn[(qNumber - tr->mxtips - 1) * states * rateHet];
-        }
-      }
-    }
-    else
-    {
-      /* note that tip tip should normally not occur since this means that we are trying to optimize 
-         a branch in a two-taxon tree. However, this has been inherited be some RAxML function 
-         that optimized pair-wise distances between all taxa in a tree */
-
-      *tipCase = PLL_TIP_TIP;
-      *tipX1 = pr->partitionData[model]->yVector[pNumber];
-      *tipX2 = pr->partitionData[model]->yVector[qNumber];
-    }
-  }
-  else
-  {
-    *tipCase = PLL_INNER_INNER;
-
-    *x1_start = pr->partitionData[model]->xVector[p_slot];
-    *x2_start = pr->partitionData[model]->xVector[q_slot];
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-      if(pr->partitionData[model]->ascBias && tr->threadID == 0)
-#else
-        if(pr->partitionData[model]->ascBias)
-#endif
-        {
-          *x1_start_asc = &pr->partitionData[model]->ascVector[(pNumber - tr->mxtips - 1) * pr->partitionData[model]->ascOffset];
-          *x2_start_asc = &pr->partitionData[model]->ascVector[(qNumber - tr->mxtips - 1) * pr->partitionData[model]->ascOffset];
-        }           
-    if(tr->saveMemory)
-    {
-      *x1_gap = &(pr->partitionData[model]->gapVector[pNumber * pr->partitionData[model]->gapVectorLength]);
-      *x1_gapColumn   = &pr->partitionData[model]->gapColumn[(pNumber - tr->mxtips - 1) * states * rateHet];
-
-      *x2_gap = &(pr->partitionData[model]->gapVector[qNumber * pr->partitionData[model]->gapVectorLength]);
-      *x2_gapColumn   = &pr->partitionData[model]->gapColumn[(qNumber - tr->mxtips - 1) * states * rateHet];
-    }
-  }
-
-}
-
-
-/* this is actually a pre-computation and storage of values that remain constant while we change the value of the branch length 
-   we want to adapt. the target pointer sumtable is a single pre-allocated array that has the same 
-   size as a conditional likelihood vector at an inner node.
-
-   So if we want to do a Newton-Raphson optimization we only execute this function once in the beginning for each new branch we are considering !
-   */
-
-#if (!defined(__SSE3) && !defined(__AVX))
-static void sumCAT_FLEX(int tipCase, double *sumtable, double *x1, double *x2, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n, const int states)
-{
-  int 
-    i, 
-    l;
-
-  double 
-    *sum, 
-    *left, 
-    *right;
-
-  switch(tipCase)
-  {
-
-    /* switch over possible configurations of the nodes p and q defining the branch */
-
-    case PLL_TIP_TIP:
-      for (i = 0; i < n; i++)
-      {
-        left  = &(tipVector[states * tipX1[i]]);
-        right = &(tipVector[states * tipX2[i]]);
-        sum = &sumtable[states * i];
-
-        /* just multiply the values with each other for each site, note the similarity with evaluate() 
-           we precompute the product which will remain constant and then just multiply this pre-computed 
-           product with the changing P matrix exponentaions that depend on the branch lengths */
-
-        for(l = 0; l < states; l++)
-          sum[l] = left[l] * right[l];
-      }
-      break;
-    case PLL_TIP_INNER:
-
-      /* same as for PLL_TIP_TIP only that 
-         we now access on tip vector and one 
-         inner vector. 
-
-         You may also observe that we do not consider using scaling vectors anywhere here.
-
-         This is because we are interested in the first and second derivatives of the likelihood and 
-         hence the addition of the log() of the scaling factor times the number of scaling events
-         becomes obsolete through the derivative */
-
-      for (i = 0; i < n; i++)
-      {
-        left = &(tipVector[states * tipX1[i]]);
-        right = &x2[states * i];
-        sum = &sumtable[states * i];
-
-        for(l = 0; l < states; l++)
-          sum[l] = left[l] * right[l];
-      }
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-      {
-        left  = &x1[states * i];
-        right = &x2[states * i];
-        sum = &sumtable[states * i];
-
-        for(l = 0; l < states; l++)
-          sum[l] = left[l] * right[l];
-      }
-      break;
-    default:
-      assert(0);
-  }
-}
-#endif
-
-
-
-#if (!defined(__SSE3) && !defined(__AVX))
-
-/* same thing for GAMMA models. The only noteworthy thing here is that we have an additional inner loop over the 
-   number of discrete gamma rates. The data access pattern is also different since for tip vector accesses through our 
-   lookup table, we do not distnguish between rates 
-
-   Note the different access pattern in PLL_TIP_INNER:
-
-   left = &(tipVector[states * tipX1[i]]);        
-   right = &(x2[span * i + l * states]);
-
-*/
-
-static void sumGAMMA_FLEX(int tipCase, double *sumtable, double *x1, double *x2, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n, const int states)
-{
-  int 
-    i, 
-    l, 
-    k;
-
-  const int 
-    span = 4 * states;
-
-  double 
-    *left, 
-    *right, 
-    *sum;
-
-
-
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:
-      for(i = 0; i < n; i++)
-      {
-        left  = &(tipVector[states * tipX1[i]]);
-        right = &(tipVector[states * tipX2[i]]);
-
-        for(l = 0; l < 4; l++)
-        {
-          sum = &sumtable[i * span + l * states];
-
-          for(k = 0; k < states; k++)
-            sum[k] = left[k] * right[k];
-
-        }
-      }
-      break;
-    case PLL_TIP_INNER:
-      //reorder_back( x2, n, span );
-      for(i = 0; i < n; i++)
-      {
-        left = &(tipVector[states * tipX1[i]]);
-
-        for(l = 0; l < 4; l++)
-        {
-          right = &(x2[span * i + l * states]);
-          sum = &sumtable[i * span + l * states];
-
-          for(k = 0; k < states; k++)
-            sum[k] = left[k] * right[k];
-
-        }
-      }
-      //reorder( x2, n, span );
-      break;
-    case PLL_INNER_INNER:
-      //reorder_back( x1, n, span );
-      //reorder_back( x2, n, span );
-      for(i = 0; i < n; i++)
-      {
-        for(l = 0; l < 4; l++)
-        {
-          left  = &(x1[span * i + l * states]);
-          right = &(x2[span * i + l * states]);
-          sum   = &(sumtable[i * span + l * states]);
-
-
-          for(k = 0; k < states; k++)
-            sum[k] = left[k] * right[k];
-        }
-      }
-      //reorder( x1, n, span );
-      //reorder( x2, n, span );
-      break;
-    default:
-      assert(0);
-  }
-}
-#endif
-
-/* optimized functions for branch length optimization */
-
-
-#if (defined(__SSE3) || defined(__AVX))
-
-static void sumCAT_SAVE(int tipCase, double *sum, double *x1_start, double *x2_start, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n, double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap);
-
-static void sumGAMMA_GAPPED_SAVE(int tipCase, double *sumtable, double *x1_start, double *x2_start, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n, 
-    double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap);
-
-static void sumGAMMA(int tipCase, double *sumtable, double *x1_start, double *x2_start, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n);
-
-static void sumCAT(int tipCase, double *sum, double *x1_start, double *x2_start, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n);
-
-static void sumGAMMAPROT_GAPPED_SAVE(int tipCase, double *sumtable, double *x1, double *x2, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n, 
-    double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap);
-
-static void sumGAMMAPROT_LG4(int tipCase, double *sumtable, double *x1, double *x2, double *tipVector[4],
-                             unsigned char *tipX1, unsigned char *tipX2, int n);
-
-static void sumGAMMAPROT(int tipCase, double *sumtable, double *x1, double *x2, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n);
-
-static void sumGTRCATPROT(int tipCase, double *sumtable, double *x1, double *x2, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n);
-
-static void sumGTRCATPROT_SAVE(int tipCase, double *sumtable, double *x1, double *x2, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n, 
-    double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap);
-
-static void coreGTRGAMMAPROT_LG4(double *gammaRates, double *EIGN[4], double *sumtable, int upper, int *wrptr,
-                                 volatile double *ext_dlnLdlz,  volatile double *ext_d2lnLdlz2, double lz,
-                                 double * lg4_weights);
-
-static void coreGTRGAMMA(const int upper, double *sumtable,
-    volatile double *ext_dlnLdlz,  volatile double *ext_d2lnLdlz2, double *EIGN, double *gammaRates, double lz, int *wrptr);
-
-static void coreGTRCAT(int upper, int numberOfCategories, double *sum,
-    volatile double *d1, volatile double *d2, int *wgt, 
-    double *rptr, double *EIGN, int *cptr, double lz);
-
-
-static void coreGTRGAMMAPROT(double *gammaRates, double *EIGN, double *sumtable, int upper, int *wrptr,
-    volatile double *ext_dlnLdlz,  volatile double *ext_d2lnLdlz2, double lz);
-
-static void coreGTRCATPROT(double *EIGN, double lz, int numberOfCategories, double *rptr, int *cptr, int upper,
-    int *wgt, volatile double *ext_dlnLdlz,  volatile double *ext_d2lnLdlz2, double *sumtable);
-
-#endif
-
-
-/* now this is the core function of the newton-Raphson based branch length optimization that actually computes 
-   the first and second derivative of the likelihood given a new proposed branch length lz */
-
-static void ascertainmentBiasSequence(unsigned char tip[32], int numStates)
-{ 
-  assert(numStates <= 32 && numStates > 1);
-
-  switch(numStates)
-    {
-    case 2:     
-      tip[0] = 1;
-      tip[1] = 2;
-      break;
-    case 4:
-      tip[0] = 1;
-      tip[1] = 2;
-      tip[2] = 4;
-      tip[3] = 8;
-      break;
-    default:
-      {
-	int 
-	  i;
-	for(i = 0; i < numStates; i++)
-	  {
-	    tip[i] = i;
-	    //printf("%c ", inverseMeaningPROT[i]);
-	  }
-	//printf("\n");
-      }
-      break;
-    }
-}
-
-static double coreCatAsc(double *EIGN, double *sumtable, int upper,
-			 volatile double *ext_dlnLdlz,  volatile double *ext_d2lnLdlz2, double lz, const int numStates,
-			 double *ascScaler)
-{
-  double  
-    diagptable[1024], 
-    lh = 0.0,
-    dlnLdlz = 0.0,
-    d2lnLdlz2 = 0.0,
-    ki, 
-    kisqr;
-
-  int     
-    i,     
-    l;  
-
- 
-  ki = 1.0;
-  kisqr = 1.0;
-
-  for(l = 1; l < numStates; l++)
-    {
-      diagptable[l * 4]     = exp(EIGN[l-1] * ki * lz);
-      diagptable[l * 4 + 1] = EIGN[l-1] * ki;
-      diagptable[l * 4 + 2] = EIGN[l-1] * EIGN[l-1] * kisqr;
-    }
-
-  for (i = 0; i < upper; i++)
-    {
-      double
-	*sum = &sumtable[i * numStates],
-	tmp,
-	inv_Li   = 0.0,
-	dlnLidlz = 0.0,
-	d2lnLidlz2 = 0.0;
-
-    
-      inv_Li += sum[0];
-
-      for(l = 1; l < numStates; l++)
-	{
-	  inv_Li     += (tmp = diagptable[l * 4] * sum[l]);
-	  dlnLidlz   += tmp * diagptable[l * 4 + 1];
-	  d2lnLidlz2 += tmp * diagptable[l * 4 + 2];
-	}	            
-            
-      inv_Li = fabs(inv_Li);             
-       
-      lh        += inv_Li * ascScaler[i];
-      dlnLdlz   += dlnLidlz * ascScaler[i];
-      d2lnLdlz2 += d2lnLidlz2 * ascScaler[i];
-    } 
-
-  *ext_dlnLdlz   = (dlnLdlz / (lh - 1.0));
-  *ext_d2lnLdlz2 = (((lh - 1.0) * (d2lnLdlz2) - (dlnLdlz * dlnLdlz)) / ((lh - 1.0) * (lh - 1.0)));  
-
-  return lh;
-}
-
-
-static double coreGammaAsc(double *gammaRates, double *EIGN, double *sumtable, int upper,
-			   volatile double *ext_dlnLdlz,  volatile double *ext_d2lnLdlz2, double lz, const int numStates,
-			   double *ascScaler)
-{
-  double  
-    diagptable[1024], 
-    lh = 0.0,
-    dlnLdlz = 0.0,
-    d2lnLdlz2 = 0.0,
-    ki, 
-    kisqr;
-
-  int     
-    i, 
-    j, 
-    l;  
-
-  const int 
-    gammaStates = 4 * numStates;
-
-  for(i = 0; i < 4; i++)
-    {
-      ki = gammaRates[i];
-      kisqr = ki * ki;
-
-      for(l = 1; l < numStates; l++)
-	{
-	  diagptable[i * gammaStates + l * 4]     = exp(EIGN[l-1] * ki * lz);
-	  diagptable[i * gammaStates + l * 4 + 1] = EIGN[l-1] * ki;
-	  diagptable[i * gammaStates + l * 4 + 2] = EIGN[l-1] * EIGN[l-1] * kisqr;
-	}
-    }
-
-  for (i = 0; i < upper; i++)
-    {
-      double
-	*sum = &sumtable[i * gammaStates],
-	tmp,
-	inv_Li   = 0.0,
-	dlnLidlz = 0.0,
-	d2lnLidlz2 = 0.0;
-
-      for(j = 0; j < 4; j++)
-	{
-	  inv_Li += sum[j * numStates];
-
-	  for(l = 1; l < numStates; l++)
-	    {
-	      inv_Li     += (tmp = diagptable[j * gammaStates + l * 4] * sum[j * numStates + l]);
-	      dlnLidlz   += tmp * diagptable[j * gammaStates + l * 4 + 1];
-	      d2lnLidlz2 += tmp * diagptable[j * gammaStates + l * 4 + 2];
-	    }	  
-	}    
-            
-      inv_Li = 0.25 * fabs(inv_Li);         
-      dlnLidlz *= 0.25;
-      d2lnLidlz2 *= 0.25;
-       
-      lh        += inv_Li * ascScaler[i];
-      dlnLdlz   += dlnLidlz * ascScaler[i];
-      d2lnLdlz2 += d2lnLidlz2 * ascScaler[i];
-    } 
-
-  *ext_dlnLdlz   = (dlnLdlz / (lh - 1.0));
-  *ext_d2lnLdlz2 = (((lh - 1.0) * (d2lnLdlz2) - (dlnLdlz * dlnLdlz)) / ((lh - 1.0) * (lh - 1.0)));  
-
-  return lh;
-}
-
-static void sumCatAsc(int tipCase, double *sumtable, double *x1, double *x2, double *tipVector,
-			int n, const int numStates)
-{
-  int i, k;
-  double *left, *right, *sum;
-
-  unsigned char 
-    tip[32];
-
-  ascertainmentBiasSequence(tip, numStates);
-
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:
-      for(i = 0; i < n; i++)
-	{
-	  left  = &(tipVector[numStates * tip[i]]);
-	  right = &(tipVector[numStates * tip[i]]);
-
-	  
-	  sum = &sumtable[i * numStates];
-	  
-	  for(k = 0; k < numStates; k++)
-	    sum[k] = left[k] * right[k];	  
-	}
-      break;
-    case PLL_TIP_INNER:
-      for(i = 0; i < n; i++)
-	{
-	  left = &(tipVector[numStates * tip[i]]);
-
-	  
-	  right = &(x2[i * numStates]);
-	  sum = &sumtable[i * numStates];
-
-	  for(k = 0; k < numStates; k++)
-	    sum[k] = left[k] * right[k];	 
-	}
-      break;
-    case PLL_INNER_INNER:
-      for(i = 0; i < n; i++)
-	{
-	  left  = &(x1[i * numStates]);
-	  right = &(x2[i * numStates]);
-	  sum   = &(sumtable[i * numStates]);
-
-	  for(k = 0; k < numStates; k++)
-	    sum[k] = left[k] * right[k];	 
-	}
-      break;
-    default:
-      assert(0);
-    }
-}
-
-static void sumGammaAsc(int tipCase, double *sumtable, double *x1, double *x2, double *tipVector,
-			int n, const int numStates)
-{
-  int i, l, k;
-  double *left, *right, *sum;
-
-  const int gammaStates = numStates * 4;
-
-  unsigned char 
-    tip[32];
-
-  ascertainmentBiasSequence(tip, numStates);
-
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:
-      for(i = 0; i < n; i++)
-	{
-	  left  = &(tipVector[numStates * tip[i]]);
-	  right = &(tipVector[numStates * tip[i]]);
-
-	  for(l = 0; l < 4; l++)
-	    {
-	      sum = &sumtable[i * gammaStates + l * numStates];
-	      for(k = 0; k < numStates; k++)
-		sum[k] = left[k] * right[k];
-	    }
-	}
-      break;
-    case PLL_TIP_INNER:
-      for(i = 0; i < n; i++)
-	{
-	  left = &(tipVector[numStates * tip[i]]);
-
-	  for(l = 0; l < 4; l++)
-	    {
-	      right = &(x2[gammaStates * i + l * numStates]);
-	      sum = &sumtable[i * gammaStates + l * numStates];
-
-	      for(k = 0; k < numStates; k++)
-		sum[k] = left[k] * right[k];
-	    }
-	}
-      break;
-    case PLL_INNER_INNER:
-      for(i = 0; i < n; i++)
-	{
-	  for(l = 0; l < 4; l++)
-	    {
-	      left  = &(x1[gammaStates * i + l * numStates]);
-	      right = &(x2[gammaStates * i + l * numStates]);
-	      sum   = &(sumtable[i * gammaStates + l * numStates]);
-
-	      for(k = 0; k < numStates; k++)
-		sum[k] = left[k] * right[k];
-	    }
-	}
-      break;
-    default:
-      assert(0);
-    }
-}
-
-
-
-
-#if (!defined(__AVX) && !defined(__SSE3))
-static void coreCAT_FLEX(int upper, int numberOfCategories, double *sum,
-    volatile double *d1, volatile double *d2, int *wgt,
-    double *rptr, double *EIGN, int *cptr, double lz, const int states)
-    /* rptr perSiteRates pointer, cptr rateCategory pointer */
-{
-  int 
-    i, 
-    l;
-
-  double 
-    *d, 
-
-    /* arrays to store stuff we can pre-compute */
-    *d_start = NULL,
-    *e = NULL,
-    *s = NULL,
-    *dd = NULL,
-    inv_Li, 
-    dlnLidlz, 
-    d2lnLidlz2,
-    dlnLdlz = 0.0,
-    d2lnLdlz2 = 0.0;
-
-  rax_posix_memalign ((void **) &d_start, PLL_BYTE_ALIGNMENT, numberOfCategories * states * sizeof(double));
-  rax_posix_memalign ((void **) &e,       PLL_BYTE_ALIGNMENT, (states * sizeof(double)));
-  rax_posix_memalign ((void **) &s,       PLL_BYTE_ALIGNMENT, states * sizeof(double));
-  rax_posix_memalign ((void **) &dd,      PLL_BYTE_ALIGNMENT, states * sizeof(double)),
-  d = d_start;
-
-  e[0] = 0.0;
-  s[0] = 0.0; 
-  dd[0] = 0.0;
-
-
-  /* we are pre-computing values for computing the first and second derivative of P(lz)
-     since this requires an exponetial that the only thing we really have to derive here */
-
-  for(l = 1; l < states; l++)
-  { 
-    s[l]  = EIGN[l];
-    e[l]  = EIGN[l] * EIGN[l];     
-    dd[l] = s[l] * lz;
-  }
-
-  /* compute the P matrices and their derivatives for 
-     all per-site rate categories */
-
-  for(i = 0; i < numberOfCategories; i++)
-  {      
-    d[states * i] = 1.0;
-    for(l = 1; l < states; l++)
-      d[states * i + l] = exp(dd[l] * rptr[i]);
-  }
-
-
-  /* now loop over the sites in this partition to obtain the per-site 1st and 2nd derivatives */
-
-  for (i = 0; i < upper; i++)
-  {    
-    /* get the correct p matrix for the rate at the current site i */
-
-    d = &d_start[states * cptr[i]];      
-
-    /* this is the likelihood at site i, NOT the log likelihood, we don't need the log 
-       likelihood to compute derivatives ! */
-
-    inv_Li     = sum[states * i]; 
-
-    /* those are for storing the first and second derivative of the Likelihood at site i */
-
-    dlnLidlz   = 0.0;
-    d2lnLidlz2 = 0.0;
-
-    /* now multiply the likelihood and the first and second derivative with the 
-       appropriate derivatives of P(lz) */
-
-    for(l = 1; l < states; l++)
-    {
-      double
-        tmpv = d[l] * sum[states * i + l];
-
-      inv_Li     += tmpv;                 
-      dlnLidlz   += tmpv * s[l];       
-      d2lnLidlz2 += tmpv * e[l];
-    }     
-
-    /* below we are implementing the other mathematical operations that are required 
-       to obtain the deirivatives */
-
-    inv_Li = 1.0 / fabs (inv_Li);
-
-    dlnLidlz   *= inv_Li;
-    d2lnLidlz2 *= inv_Li;
-
-    /* compute the accumulated first and second derivatives of this site */
-
-    dlnLdlz  += wgt[i] * rptr[cptr[i]] * dlnLidlz;
-    d2lnLdlz2 += wgt[i] * rptr[cptr[i]] * rptr[cptr[i]] * (d2lnLidlz2 - dlnLidlz * dlnLidlz);
-  }
-
-  /* 
-     set the result values, i.e., the sum of the per-site first and second derivatives of the likelihood function 
-     for this partition. 
-     */
-
-  *d1  = dlnLdlz;
-  *d2 = d2lnLdlz2;
-
-  /* free the temporary arrays */
-
-  rax_free(d_start);
-  rax_free(e);
-  rax_free(s);
-  rax_free(dd);
-}
-
-static void coreGAMMA_FLEX(int upper, double *sumtable, volatile double *ext_dlnLdlz,  volatile double *ext_d2lnLdlz2, 
-    double *EIGN, double *gammaRates, double lz, int *wrptr, const int states)
-{
-  double  
-    *sum, 
-    diagptable[1024], /* TODO make this dynamic */
-    dlnLdlz = 0.0,
-    d2lnLdlz2 = 0.0,
-    ki, 
-    kisqr,
-    tmp,
-    inv_Li, 
-    dlnLidlz, 
-    d2lnLidlz2;
-
-  int     
-    i, 
-    j, 
-    l;  
-
-  const int 
-    gammaStates = 4 * states;
-
-  /* pre-compute the derivatives of the P matrix for all discrete GAMMA rates */
-
-  for(i = 0; i < 4; i++)
-  {
-    ki = gammaRates[i];
-    kisqr = ki * ki;
-
-    for(l = 1; l < states; l++)
-    {
-      diagptable[i * gammaStates + l * 4]     = exp(EIGN[l] * ki * lz);
-      diagptable[i * gammaStates + l * 4 + 1] = EIGN[l] * ki;
-      diagptable[i * gammaStates + l * 4 + 2] = EIGN[l] * EIGN[l] * kisqr;
-    }
-  }
-
-  /* loop over sites in this partition */
-
-  for (i = 0; i < upper; i++)
-  {
-    /* access the array with pre-computed values */
-    sum = &sumtable[i * gammaStates];
-
-    /* initial per-site likelihood and 1st and 2nd derivatives */
-
-    inv_Li   = 0.0;
-    dlnLidlz = 0.0;
-    d2lnLidlz2 = 0.0;
-
-    /* loop over discrete GAMMA rates */
-
-    for(j = 0; j < 4; j++)
-    {
-      inv_Li += sum[j * states];
-
-      for(l = 1; l < states; l++)
-      {
-        inv_Li     += (tmp = diagptable[j * gammaStates + l * 4] * sum[j * states + l]);
-        dlnLidlz   +=  tmp * diagptable[j * gammaStates + l * 4 + 1];
-        d2lnLidlz2 +=  tmp * diagptable[j * gammaStates + l * 4 + 2];
-      }
-    }
-
-    /* finalize derivative computation */
-    /* note that wrptr[] here unlike in CAT above is the 
-       integer weight vector of the current site 
-
-       The operations:
-
-       EIGN[l] * ki;
-       EIGN[l] * EIGN[l] * kisqr;
-
-       that are hidden in CAT in wrptr (at least the * ki and * ki *ki part of them 
-       are done explicitely here 
-
-*/
-
-    inv_Li = 1.0 / fabs (inv_Li);
-
-    dlnLidlz   *= inv_Li;
-    d2lnLidlz2 *= inv_Li;
-
-    dlnLdlz   += wrptr[i] * dlnLidlz;
-    d2lnLdlz2 += wrptr[i] * (d2lnLidlz2 - dlnLidlz * dlnLidlz);
-  }
-
-  *ext_dlnLdlz   = dlnLdlz;
-  *ext_d2lnLdlz2 = d2lnLdlz2;
-
-}
-#endif
-
-//void sumGAMMA_FLEX_reorder(int tipCase, double *sumtable, double *x1, double *x2, double *tipVector,
-//    unsigned char *tipX1, unsigned char *tipX2, int n, const int states);
-
-/** @brief Precompute values (sumtable) from the 2 likelihood vectors of a given branch
- *
- * @warning These precomputations are stored in \a tr->partitionData[model].sumBuffer, which is used by function \a execCore
- *
- * @param tr
- *   Library instance
- *
- * @warning the given branch is implicitly defined in \a tr by these nodes:
- * pNumber = tr->td[0].ti[0].pNumber;
- * qNumber = tr->td[0].ti[0].qNumber;
- *
- *
- * @note This function should be called only once at the very beginning of each Newton-Raphson procedure for optimizing barnch lengths. It initially invokes an iterative newview call to get a consistent pair of vectors at the left and the right end of the branch and thereafter invokes the one-time only precomputation of values (sumtable) that can be re-used in each Newton-Raphson iteration. Once this function has been called we can execute the actual NR procedure
- *
- *
- */
-void makenewzIterative(pllInstance *tr, partitionList * pr)
-{
-  int 
-    model, 
-    tipCase;
-
-  double
-    *x1_start     = NULL,
-    *x2_start     = NULL,
-    *x1_start_asc = NULL,
-    *x2_start_asc = NULL;
-
-
-  unsigned char
-    *tipX1,
-    *tipX2;
-
-  double
-    *x1_gapColumn = (double*)NULL,
-    *x2_gapColumn = (double*)NULL;
-
-  unsigned int
-    *x1_gap = (unsigned int*)NULL,
-    *x2_gap = (unsigned int*)NULL;                            
-
-  /* call newvieIterative to get the likelihood arrays to the left and right of the branch */
-
-  pllNewviewIterative(tr, pr, 1);
-
-
-  /* 
-     loop over all partoitions to do the precomputation of the sumTable buffer 
-     This is analogous to the pllNewviewIterative() and pllEvaluateIterative() 
-     implementations.
-     */
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-  { 
-    int 
-      width = pr->partitionData[model]->width;
-
-    if(tr->td[0].executeModel[model] && width > 0)
-    {
-      int          
-        states = pr->partitionData[model]->states;
-
-
-      getVects(tr, pr, &tipX1, &tipX2, &x1_start, &x2_start, &tipCase, model, &x1_gapColumn, &x2_gapColumn, &x1_gap, &x2_gap, &x1_start_asc, &x2_start_asc);
-
-#if (!defined(__SSE3) && !defined(__AVX) && !defined(__MIC_NATIVE))
-      assert(!tr->saveMemory);
-      if(tr->rateHetModel == PLL_CAT)
-        sumCAT_FLEX(tipCase, pr->partitionData[model]->sumBuffer, x1_start, x2_start, pr->partitionData[model]->tipVector, tipX1, tipX2,
-            width, states);
-      else
-        //sumGAMMA_FLEX_reorder(tipCase, pr->partitionData[model]->sumBuffer, x1_start, x2_start, pr->partitionData[model]->tipVector, tipX1, tipX2,
-          sumGAMMA_FLEX(tipCase, pr->partitionData[model]->sumBuffer, x1_start, x2_start, pr->partitionData[model]->tipVector, tipX1, tipX2,
-            width, states);
-#else
-      switch(states)
-      {
-      case 2: /* BINARY */
-          assert(!tr->saveMemory);
-          if (tr->rateHetModel == PLL_CAT)
-            sumCAT_BINARY(tipCase, pr->partitionData[model]->sumBuffer, x1_start, x2_start, pr->partitionData[model]->tipVector, tipX1, tipX2,
-                          width);
-          else
-            sumGAMMA_BINARY(tipCase, pr->partitionData[model]->sumBuffer, x1_start, x2_start, pr->partitionData[model]->tipVector, tipX1, tipX2,
-                            width);
-          break;
-      case 4: /* DNA */
-#ifdef __MIC_NATIVE
-      assert(!tr->saveMemory);
-      assert(tr->rateHetModel == PLL_GAMMA);
-
-      sumGTRGAMMA_MIC(tipCase, pr->partitionData[model]->sumBuffer, x1_start, x2_start, pr->partitionData[model]->tipVector, tipX1, tipX2,
-          width);
-#else
-          if(tr->rateHetModel == PLL_CAT)
-          {
-            if(tr->saveMemory)
-              sumCAT_SAVE(tipCase, pr->partitionData[model]->sumBuffer, x1_start, x2_start, pr->partitionData[model]->tipVector, tipX1, tipX2,
-                  width, x1_gapColumn, x2_gapColumn, x1_gap, x2_gap);
-            else
-              sumCAT(tipCase, pr->partitionData[model]->sumBuffer, x1_start, x2_start, pr->partitionData[model]->tipVector, tipX1, tipX2,
-                  width);
-          }
-          else
-          {
-            if(tr->saveMemory)
-              sumGAMMA_GAPPED_SAVE(tipCase, pr->partitionData[model]->sumBuffer, x1_start, x2_start, pr->partitionData[model]->tipVector, tipX1, tipX2,
-                  width, x1_gapColumn, x2_gapColumn, x1_gap, x2_gap);
-            else
-              sumGAMMA(tipCase, pr->partitionData[model]->sumBuffer, x1_start, x2_start, pr->partitionData[model]->tipVector, tipX1, tipX2,
-                  width);
-          }
-#endif
-          break;                
-        case 20: /* proteins */
-#ifdef __MIC_NATIVE
-          assert(!tr->saveMemory);
-          assert(tr->rateHetModel == PLL_GAMMA);
-
-              if(pr->partitionData[model]->protModels == PLL_LG4M || pr->partitionData[model]->protModels == PLL_LG4X)
-                          sumGTRGAMMAPROT_LG4_MIC(tipCase, pr->partitionData[model]->sumBuffer, x1_start, x2_start, pr->partitionData[model]->tipVector_LG4, tipX1, tipX2,
-                                  width);
-              else
-                          sumGTRGAMMAPROT_MIC(tipCase, pr->partitionData[model]->sumBuffer, x1_start, x2_start, pr->partitionData[model]->tipVector, tipX1, tipX2,
-                                  width);
-#else
-
-            if(tr->rateHetModel == PLL_CAT)
-          {
-            if(tr->saveMemory)
-              sumGTRCATPROT_SAVE(tipCase, pr->partitionData[model]->sumBuffer, x1_start, x2_start, pr->partitionData[model]->tipVector,
-                  tipX1, tipX2, width, x1_gapColumn, x2_gapColumn, x1_gap, x2_gap);
-            else                      
-              sumGTRCATPROT(tipCase, pr->partitionData[model]->sumBuffer, x1_start, x2_start, pr->partitionData[model]->tipVector,
-                  tipX1, tipX2, width);
-          }
-          else
-          {
-
-            if(tr->saveMemory)
-              sumGAMMAPROT_GAPPED_SAVE(tipCase, pr->partitionData[model]->sumBuffer, x1_start, x2_start, pr->partitionData[model]->tipVector, tipX1, tipX2,
-                  width, x1_gapColumn, x2_gapColumn, x1_gap, x2_gap);
-              else
-                    {
-                      if(pr->partitionData[model]->protModels == PLL_LG4M || pr->partitionData[model]->protModels == PLL_LG4X)
-                        sumGAMMAPROT_LG4(tipCase,  pr->partitionData[model]->sumBuffer, x1_start, x2_start, pr->partitionData[model]->tipVector_LG4,
-                                         tipX1, tipX2, width);
-            else
-              sumGAMMAPROT(tipCase, pr->partitionData[model]->sumBuffer, x1_start, x2_start, pr->partitionData[model]->tipVector,
-                  tipX1, tipX2, width);
-                    }
-          }
-#endif
-          break;                
-        default:
-          assert(0);
-      }
-#endif
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-      if (pr->partitionData[model]->ascBias && tr->threadID == 0)
-#else
-      if (pr->partitionData[model]->ascBias)
-#endif
-       {
-            int pNumber = tr->td[0].ti[0].pNumber, qNumber =
-                    tr->td[0].ti[0].qNumber, i, *ex1_asc =
-                    &pr->partitionData[model]->ascExpVector[(pNumber
-                            - tr->mxtips - 1) * states], *ex2_asc =
-                    &pr->partitionData[model]->ascExpVector[(qNumber
-                            - tr->mxtips - 1) * states];
-            switch (tipCase)
-            {
-            case PLL_TIP_TIP:
-                assert(0);
-                break;
-            case PLL_TIP_INNER:
-                if (isTip(pNumber, tr->mxtips))
-                {
-                    for (i = 0; i < states; i++)
-                        pr->partitionData[model]->ascScaler[i] = pow(
-                                PLL_MINLIKELIHOOD, (double) ex2_asc[i]);
-                }
-                else
-                {
-                    for (i = 0; i < states; i++)
-                        pr->partitionData[model]->ascScaler[i] = pow(
-                                PLL_MINLIKELIHOOD, (double) ex1_asc[i]);
-                }
-                break;
-            case PLL_INNER_INNER:
-                for (i = 0; i < states; i++)
-                    pr->partitionData[model]->ascScaler[i] = pow(
-                            PLL_MINLIKELIHOOD,
-                            (double) (ex1_asc[i] + ex2_asc[i]));
-                break;
-            default:
-                assert(0);
-            }
-         if (tr->rateHetModel == PLL_CAT)
-           sumCatAsc  (tipCase, pr->partitionData[model]->ascSumBuffer, x1_start_asc, x2_start_asc, pr->partitionData[model]->tipVector, states, states);
-         else
-           sumGammaAsc(tipCase, pr->partitionData[model]->ascSumBuffer, x1_start_asc, x2_start_asc, pr->partitionData[model]->tipVector, states, states);
-       }
-    }
-  }
-}
-
-
-/** @brief Compute first and second derivatives of the likelihood with respect to a given branch length 
- *
- * @param tr
- *   library instance
- *
- * @param _dlnLdlz 
- *   First derivative dl/dlz
- *
- * @param _d2lnLdlz2
- *   Second derivative d(dl/dlz)/dlz
- *
- * @warning \a makenewzIterative should have been called to precompute \a tr->partitionData[model].sumBuffer at the given branch
- *
- * @note  this function actually computes the first and second derivatives of the likelihood for a given branch stored in tr->coreLZ[model] Note that in the parallel case coreLZ must always be broadcasted together with the traversal descriptor, at least for optimizing branch lengths 
- *
- */
-void execCore(pllInstance *tr, partitionList *pr, volatile double *_dlnLdlz, volatile double *_d2lnLdlz2)
-{
-  int model, branchIndex;
-  int numBranches = pr->perGeneBranchLengths?pr->numberOfPartitions:1;
-
-  double lz;
-
-  _dlnLdlz[0]   = 0.0;
-  _d2lnLdlz2[0] = 0.0;
-
-  /* loop over partitions */
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-  {
-    int 
-      width = pr->partitionData[model]->width;
-
-    /* check if we (the present thread for instance) needs to compute something at 
-       all for the present partition */
-
-    if(tr->td[0].executeModel[model] && width > 0)
-    {
-      int           
-        states = pr->partitionData[model]->states;
-
-      double 
-        *sumBuffer       = (double*)NULL;
-
-
-      volatile double
-        dlnLdlz   = 0.0,
-                  d2lnLdlz2 = 0.0;
-
-      /* set a pointer to the part of the pre-computed sumBuffer we are going to access */
-
-      sumBuffer = pr->partitionData[model]->sumBuffer;
-
-      /* figure out if we are optimizing branch lengths individually per partition or jointly across 
-         all partitions. If we do this on a per partition basis, we also need to compute and store 
-         the per-partition derivatives of the likelihood separately, otherwise not */
-
-      if(numBranches > 1)
-      {
-        branchIndex = model;          
-        lz = tr->td[0].parameterValues[model];
-        _dlnLdlz[model]   = 0.0;
-        _d2lnLdlz2[model] = 0.0;
-      }
-      else
-      {
-        branchIndex = 0;              
-        lz = tr->td[0].parameterValues[0];
-      }
-
-#if (!defined(__SSE3) && !defined(__AVX) && !defined(__MIC_NATIVE))
-      /* compute first and second derivatives with the slow generic functions */
-
-      if(tr->rateHetModel == PLL_CAT)
-        coreCAT_FLEX(width, pr->partitionData[model]->numberOfCategories, sumBuffer,
-            &dlnLdlz, &d2lnLdlz2, pr->partitionData[model]->wgt,
-            pr->partitionData[model]->perSiteRates, pr->partitionData[model]->EIGN,  pr->partitionData[model]->rateCategory, lz, states);
-      else
-        coreGAMMA_FLEX(width, sumBuffer,
-            &dlnLdlz, &d2lnLdlz2, pr->partitionData[model]->EIGN, pr->partitionData[model]->gammaRates, lz,
-            pr->partitionData[model]->wgt, states);
-#else
-      switch(states)
-       {    
-         case 2: /* BINARY */
-           if (tr->rateHetModel == PLL_CAT)
-              coreGTRCAT_BINARY(width, 
-                                pr->partitionData[model]->numberOfCategories, 
-                                sumBuffer,
-                                &dlnLdlz, 
-                                &d2lnLdlz2, 
-                                pr->partitionData[model]->perSiteRates, 
-                                pr->partitionData[model]->EIGN,  
-                                pr->partitionData[model]->rateCategory, 
-                                lz, 
-                                pr->partitionData[model]->wgt);
-           else
-              coreGTRGAMMA_BINARY(width, 
-                                   sumBuffer,
-                                   &dlnLdlz, 
-                                   &d2lnLdlz2, 
-                                   pr->partitionData[model]->EIGN,
-                                   pr->partitionData[model]->gammaRates, 
-                                   lz,
-                                   pr->partitionData[model]->wgt);
-           break;
-         case 4: /* DNA */
-#ifdef __MIC_NATIVE
-           assert(tr->rateHetModel == PLL_GAMMA);
-
-           coreGTRGAMMA_MIC(width, 
-                            sumBuffer,
-                            &dlnLdlz, 
-                            &d2lnLdlz2, 
-                            pr->partitionData[model]->EIGN, 
-                            pr->partitionData[model]->gammaRates, 
-                            lz,
-                            pr->partitionData[model]->wgt);
-#else
-          if(tr->rateHetModel == PLL_CAT)
-            coreGTRCAT(width, pr->partitionData[model]->numberOfCategories, sumBuffer,
-                &dlnLdlz, &d2lnLdlz2, pr->partitionData[model]->wgt,
-                pr->partitionData[model]->perSiteRates, pr->partitionData[model]->EIGN,  pr->partitionData[model]->rateCategory, lz);
-          else 
-            coreGTRGAMMA(width, sumBuffer,
-                &dlnLdlz, &d2lnLdlz2, pr->partitionData[model]->EIGN, pr->partitionData[model]->gammaRates, lz,
-                pr->partitionData[model]->wgt);
-
-#endif
-          break;                    
-        case 20: /* proteins */
-
-#ifdef __MIC_NATIVE
-      assert(tr->rateHetModel == PLL_GAMMA);
-
-          if(pr->partitionData[model]->protModels == PLL_LG4M || pr->partitionData[model]->protModels == PLL_LG4X)
-                  coreGTRGAMMAPROT_LG4_MIC(width, sumBuffer,
-                          &dlnLdlz, &d2lnLdlz2, pr->partitionData[model]->EIGN_LG4, pr->partitionData[model]->gammaRates, lz,
-                          pr->partitionData[model]->wgt, pr->partitionData[model]->lg4x_weights);
-          else
-                  coreGTRGAMMAPROT_MIC(width, sumBuffer,
-                          &dlnLdlz, &d2lnLdlz2, pr->partitionData[model]->EIGN, pr->partitionData[model]->gammaRates, lz,
-                          pr->partitionData[model]->wgt);
-#else
-
-          if(tr->rateHetModel == PLL_CAT)
-            coreGTRCATPROT(pr->partitionData[model]->EIGN, lz, pr->partitionData[model]->numberOfCategories,  pr->partitionData[model]->perSiteRates,
-                pr->partitionData[model]->rateCategory, width,
-                pr->partitionData[model]->wgt,
-                &dlnLdlz, &d2lnLdlz2,
-                sumBuffer);
-            else
-                { 
-                  if(pr->partitionData[model]->protModels == PLL_LG4M || pr->partitionData[model]->protModels == PLL_LG4X)
-                    coreGTRGAMMAPROT_LG4(pr->partitionData[model]->gammaRates, pr->partitionData[model]->EIGN_LG4,
-                                         sumBuffer, width, pr->partitionData[model]->wgt,
-                                         &dlnLdlz, &d2lnLdlz2, lz, pr->partitionData[model]->lg4x_weights);
-          else
-
-            coreGTRGAMMAPROT(pr->partitionData[model]->gammaRates, pr->partitionData[model]->EIGN,
-                sumBuffer, width, pr->partitionData[model]->wgt,
-                &dlnLdlz, &d2lnLdlz2, lz);
-            
-                }
-#endif
-          break;                   
-        default:
-          assert(0);
-      }
-#endif
-
-      /* store first and second derivative */
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-     if(pr->partitionData[model]->ascBias && tr->threadID == 0)
-#else
-     if(pr->partitionData[model]->ascBias)
-#endif  
-       {
-         size_t
-           i;
-
-         double 
-           correction;
-
-         int            
-           w = 0;
-         
-         volatile double 
-           d1 = 0.0,
-           d2 = 0.0;                   
-         
-         for(i = (size_t)pr->partitionData[model]->lower; i < (size_t)pr->partitionData[model]->upper; i++)
-           w += tr->aliaswgt[i];     
-         
-          switch(tr->rateHetModel)
-            {
-            case PLL_CAT:
-              correction = coreCatAsc(pr->partitionData[model]->EIGN, pr->partitionData[model]->ascSumBuffer, states,
-                                        &d1,  &d2, lz, states, pr->partitionData[model]->ascScaler);
-              break;
-            case PLL_GAMMA:
-              correction = coreGammaAsc(pr->partitionData[model]->gammaRates, pr->partitionData[model]->EIGN, pr->partitionData[model]->ascSumBuffer, states,
-                                        &d1,  &d2, lz, states, pr->partitionData[model]->ascScaler);
-              break;
-            default:
-              assert(0);
-            }
-        
-         correction = 1.0 - correction;
-     
-         /* Lewis correction */
-         _dlnLdlz[branchIndex]   =  _dlnLdlz[branchIndex] + dlnLdlz - (double)w * d1;
-         _d2lnLdlz2[branchIndex] =  _d2lnLdlz2[branchIndex] + d2lnLdlz2-  (double)w * d2;
-           
-       }  
-      else
-       {
-         _dlnLdlz[branchIndex]   = _dlnLdlz[branchIndex]   + dlnLdlz;
-         _d2lnLdlz2[branchIndex] = _d2lnLdlz2[branchIndex] + d2lnLdlz2;
-       }
-    }
-    else
-    {
-      /* set to 0 to make the reduction operation consistent */
-
-      if(width == 0 && (numBranches > 1))
-      {
-        _dlnLdlz[model]   = 0.0;
-        _d2lnLdlz2[model] = 0.0;
-      }                                    
-    }
-  }
-
-}
-
-
-/* the function below actually implements the iterative Newton-Raphson procedure.
-   It is particularly messy and hard to read because for the case of per-partition branch length 
-   estimates it needs to keep track of whetehr the Newton Raphson procedure has 
-   converged for each partition individually. 
-
-   The rational efor doing it like this is also provided in:
-
-
-   A. Stamatakis, M. Ott: "Load Balance in the Phylogenetic Likelihood Kernel". Proceedings of ICPP 2009,
-
-*/
-
-static void topLevelMakenewz(pllInstance *tr, partitionList * pr, double *z0, int _maxiter, double *result)
-{
-  double   z[PLL_NUM_BRANCHES], zprev[PLL_NUM_BRANCHES], zstep[PLL_NUM_BRANCHES];
-  volatile double  dlnLdlz[PLL_NUM_BRANCHES], d2lnLdlz2[PLL_NUM_BRANCHES];
-  int i, maxiter[PLL_NUM_BRANCHES], model;
-  int numBranches = pr->perGeneBranchLengths?pr->numberOfPartitions:1;
-  pllBoolean firstIteration = PLL_TRUE;
-  pllBoolean outerConverged[PLL_NUM_BRANCHES];
-  pllBoolean loopConverged;
-
-
-  /* figure out if this is on a per partition basis or jointly across all partitions */
-
-
-
-  /* initialize loop convergence variables etc. 
-     maxiter is the maximum number of NR iterations we are going to do before giving up */
-
-  for(i = 0; i < numBranches; i++)
-  {
-    z[i] = z0[i];
-    maxiter[i] = _maxiter;
-    outerConverged[i] = PLL_FALSE;
-    tr->curvatOK[i]       = PLL_TRUE;
-  }
-
-
-  /* nested do while loops of Newton-Raphson */
-
-  do
-  {
-
-    /* check if we ar done for partition i or if we need to adapt the branch length again */
-
-    for(i = 0; i < numBranches; i++)
-    {
-      if(outerConverged[i] == PLL_FALSE && tr->curvatOK[i] == PLL_TRUE)
-      {
-        tr->curvatOK[i] = PLL_FALSE;
-
-        zprev[i] = z[i];
-
-        zstep[i] = (1.0 - PLL_ZMAX) * z[i] + PLL_ZMIN;
-      }
-    }
-
-    for(i = 0; i < numBranches; i++)
-    {
-      /* other case, the outer loop hasn't converged but we are trying to approach 
-         the maximum from the wrong side */
-
-      if(outerConverged[i] == PLL_FALSE && tr->curvatOK[i] == PLL_FALSE)
-      {
-        double lz;
-
-        if (z[i] < PLL_ZMIN) z[i] = PLL_ZMIN;
-        else if (z[i] > PLL_ZMAX) z[i] = PLL_ZMAX;
-        lz    = log(z[i]);
-
-        tr->coreLZ[i] = lz;
-      }
-    }
-
-
-    /* set the execution mask */
-
-    if(numBranches > 1)
-    {
-      for(model = 0; model < pr->numberOfPartitions; model++)
-      {
-        if(pr->partitionData[model]->executeModel)
-          pr->partitionData[model]->executeModel = !tr->curvatOK[model];
-
-      }
-    }
-    else
-    {
-      for(model = 0; model < pr->numberOfPartitions; model++)
-        pr->partitionData[model]->executeModel = !tr->curvatOK[0];
-    }
-
-
-    /* store it in traversal descriptor */
-
-    storeExecuteMaskInTraversalDescriptor(tr, pr);
-
-    /* store the new branch length values to be tested in traversal descriptor */
-
-    storeValuesInTraversalDescriptor(tr, pr, &(tr->coreLZ[0]));
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-
-    /* if this is the first iteration of NR we will need to first do this one-time call 
-       of maknewzIterative() Note that, only this call requires broadcasting the traversal descriptor,
-       subsequent calls to pllMasterBarrier(PLL_THREAD_MAKENEWZ, tr); will not require this
-       */
-
-    if(firstIteration)
-      {
-        tr->td[0].traversalHasChanged = PLL_TRUE; 
-        pllMasterBarrier (tr, pr, PLL_THREAD_MAKENEWZ_FIRST);
-        firstIteration = PLL_FALSE; 
-        tr->td[0].traversalHasChanged = PLL_FALSE; 
-      }
-    else 
-      pllMasterBarrier(tr, pr, PLL_THREAD_MAKENEWZ);
-    branchLength_parallelReduce(tr, (double*)dlnLdlz, (double*)d2lnLdlz2, numBranches);
-#else 
-    /* sequential part, if this is the first newton-raphson implementation,
-       do the precomputations as well, otherwise just execute the computation
-       of the derivatives */
-    if(firstIteration)
-      {
-        makenewzIterative(tr, pr);
-        firstIteration = PLL_FALSE;
-      }
-    execCore(tr, pr, dlnLdlz, d2lnLdlz2);
-#endif
-
-    /* do a NR step, if we are on the correct side of the maximum that's okay, otherwise 
-       shorten branch */
-
-    for(i = 0; i < numBranches; i++)
-    {
-      if(outerConverged[i] == PLL_FALSE && tr->curvatOK[i] == PLL_FALSE)
-      {
-        if ((d2lnLdlz2[i] >= 0.0) && (z[i] < PLL_ZMAX))
-          zprev[i] = z[i] = 0.37 * z[i] + 0.63;  /*  Bad curvature, shorten branch */
-        else
-          tr->curvatOK[i] = PLL_TRUE;
-      }
-    }
-
-    /* do the standard NR step to obrain the next value, depending on the state for eahc partition */
-
-    for(i = 0; i < numBranches; i++)
-    {
-      if(tr->curvatOK[i] == PLL_TRUE && outerConverged[i] == PLL_FALSE)
-      {
-        if (d2lnLdlz2[i] < 0.0)
-        {
-          double tantmp = -dlnLdlz[i] / d2lnLdlz2[i];
-          if (tantmp < 100)
-          {
-            z[i] *= exp(tantmp);
-            if (z[i] < PLL_ZMIN)
-              z[i] = PLL_ZMIN;
-
-            if (z[i] > 0.25 * zprev[i] + 0.75)
-              z[i] = 0.25 * zprev[i] + 0.75;
-          }
-          else
-            z[i] = 0.25 * zprev[i] + 0.75;
-        }
-        if (z[i] > PLL_ZMAX) z[i] = PLL_ZMAX;
-
-        /* decrement the maximum number of itarations */
-
-        maxiter[i] = maxiter[i] - 1;
-
-        /* check if the outer loop has converged */
-
-        //old code below commented out, integrated new PRELIMINARY BUG FIX !
-        //this needs further work at some point!
-
-        /*
-        if(maxiter[i] > 0 && (PLL_ABS(z[i] - zprev[i]) > zstep[i]))
-          outerConverged[i] = PLL_FALSE;
-        else
-          outerConverged[i] = PLL_TRUE;
-        */
-
-        if((PLL_ABS(z[i] - zprev[i]) > zstep[i]))
-         {
-           /* We should make a more informed decision here,
-              based on the log like improvement */
-
-           if(maxiter[i] < -20)
-            {
-              z[i] = z0[i];
-              outerConverged[i] = PLL_TRUE;
-            }
-           else
-             outerConverged[i] = PLL_FALSE;
-         }
-        else
-          outerConverged[i] = PLL_TRUE;
-      }
-    }
-
-    /* check if the loop has converged for all partitions */
-
-    loopConverged = PLL_TRUE;
-    for(i = 0; i < numBranches; i++)
-      loopConverged = loopConverged && outerConverged[i];
-  }
-  while (!loopConverged);
-
-
-  /* reset  partition execution mask */
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-    pr->partitionData[model]->executeModel = PLL_TRUE;
-
-  /* copy the new branches in the result array of branches.
-     if we don't do a per partition estimate of 
-     branches this will only set result[0]
-     */
-
-  for(i = 0; i < numBranches; i++)
-    result[i] = z[i];
-}
-
-
-/** @brief Optimize branch length value(s) of a given branch with the Newton-Raphtson procedure 
- *
- * @warning A given branch may have one or several branch length values (up to PLL_NUM_BRANCHES), usually the later refers to partition-specific branch length values. Thus z0 and result represent collections rather than double values. The number of branch length values is given by \a tr->numBranches 
- *
- * @param tr
- *   Library instance
- *
- * @param p
- *   One node that defines the branch (p->z)
- *
- * @param q
- *   The other node side of the branch (usually p->back), but the branch length can be estimated even if p and q are
- *   not connected, e.g. before the insertion of a subtree.
- *
- * @param z0 
- *   Initial branch length value(s) for the given branch \a p->z 
- *
- * @param maxiter 
- *   Maximum number of iterations in the Newton-Raphson procedure 
- *
- * @param result 
- *   Resulting branch length value(s) for the given branch \a p->z 
- *
- * @param mask 
- *   Specifies if a mask to track partition convergence (\a tr->partitionConverged) is being used.
- *
- * @sa typical values for \a maxiter are constants \a iterations and \a PLL_NEWZPERCYCLE
- * @note Requirement: q->z == p->z
- */
-void makenewzGeneric(pllInstance *tr, partitionList * pr, nodeptr p, nodeptr q, double *z0, int maxiter, double *result, pllBoolean mask)
-{
-  int i;
-  //boolean originalExecute[PLL_NUM_BRANCHES];
-  int numBranches = pr->perGeneBranchLengths?pr->numberOfPartitions:1;
-
-  pllBoolean 
-    p_recom = PLL_FALSE, /* if one of was missing, we will need to force recomputation */
-    q_recom = PLL_FALSE;
-
-  /* the first entry of the traversal descriptor stores the node pair that defines 
-     the branch */
-
-  tr->td[0].ti[0].pNumber = p->number;
-  tr->td[0].ti[0].qNumber = q->number;
-
-  for(i = 0; i < numBranches; i++)
-  {
-    //originalExecute[i] =  pr->partitionData[i]->executeModel;
-    tr->td[0].ti[0].qz[i] =  z0[i];
-    if(mask)
-    {
-      if (tr->partitionConverged[i])
-        pr->partitionData[i]->executeModel = PLL_FALSE;
-      else
-        pr->partitionData[i]->executeModel = PLL_TRUE;
-    }
-  }
-  if (tr->useRecom)
-  {
-    int
-      slot = -1;
-      //count = 0;
-
-    /* Ensure p and q get a unpinnable slot in physical memory */
-    if(!isTip(q->number, tr->mxtips))
-    {
-      q_recom = getxVector(tr->rvec, q->number, &slot, tr->mxtips);
-      tr->td[0].ti[0].slot_q = slot;
-    }
-    if(!isTip(p->number, tr->mxtips))
-    {
-      p_recom = getxVector(tr->rvec, p->number, &slot, tr->mxtips);
-      tr->td[0].ti[0].slot_p = slot;
-    }
-  }
-
-
-  /* compute the traversal descriptor of the likelihood vectors  that need to be re-computed 
-     first in makenewzIterative */
-
-  tr->td[0].count = 1;
-
-  if(p_recom || needsRecomp(tr->useRecom, tr->rvec, p, tr->mxtips))
-    computeTraversal(tr, p, PLL_TRUE, numBranches);
-
-  if(q_recom || needsRecomp(tr->useRecom, tr->rvec, q, tr->mxtips))
-    computeTraversal(tr, q, PLL_TRUE, numBranches);
-
-  /* call the Newton-Raphson procedure */
-
-  topLevelMakenewz(tr, pr, z0, maxiter, result);
-
-  /* Mark node as unpinnable */
-  if(tr->useRecom)
-  {
-    unpinNode(tr->rvec, p->number, tr->mxtips);
-    unpinNode(tr->rvec, q->number, tr->mxtips);
-  }
-
-  /* fix eceuteModel this seems to be a bit redundant with topLevelMakenewz */ 
-
-  for(i = 0; i < numBranches; i++)
-    pr->partitionData[i]->executeModel = PLL_TRUE;
-}
-
-
-/* below are, once again the optimized functions */
-
-#if (defined(__SSE3) || defined(__AVX))
-
-
-static void sumCAT_BINARY(int tipCase, double *sum, double *x1_start, double *x2_start, double *tipVector,
-                          unsigned char *tipX1, unsigned char *tipX2, int n)
-
-{
-  int i;
-  
-#if (!defined(__SSE3) && !defined(__AVX))
-  int j;
-#endif
-  double *x1, *x2;
-
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:
-      for (i = 0; i < n; i++)
-        {
-          x1 = &(tipVector[2 * tipX1[i]]);
-          x2 = &(tipVector[2 * tipX2[i]]);
-
-#if (!defined(__SSE3) && !defined(__AVX))
-          for(j = 0; j < 2; j++)
-            sum[i * 2 + j]     = x1[j] * x2[j];
-#else
-          _mm_store_pd(&sum[i * 2], _mm_mul_pd( _mm_load_pd(x1), _mm_load_pd(x2)));
-#endif
-        }
-      break;
-    case PLL_TIP_INNER:
-      for (i = 0; i < n; i++)
-        {
-          x1 = &(tipVector[2 * tipX1[i]]);
-          x2 = &x2_start[2 * i];
-
-#if (!defined(__SSE3) && !defined(__AVX))
-          for(j = 0; j < 2; j++)
-            sum[i * 2 + j]     = x1[j] * x2[j];
-#else
-          _mm_store_pd(&sum[i * 2], _mm_mul_pd( _mm_load_pd(x1), _mm_load_pd(x2)));  
-#endif
-        }
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-        {
-          x1 = &x1_start[2 * i];
-          x2 = &x2_start[2 * i];
-#if (!defined(__SSE3) && !defined(__AVX))
-          for(j = 0; j < 2; j++)
-            sum[i * 2 + j]     = x1[j] * x2[j];
-#else
-          _mm_store_pd(&sum[i * 2], _mm_mul_pd( _mm_load_pd(x1), _mm_load_pd(x2)));   
-#endif
-        }
-      break;
-    default:
-      assert(0);
-    }
-}
-
-
-static void sumCAT_SAVE(int tipCase, double *sum, double *x1_start, double *x2_start, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n, double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap)
-{
-  int i;
-  double 
-    *x1, 
-    *x2,    
-    *x1_ptr = x1_start,
-    *x2_ptr = x2_start;
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:
-      for (i = 0; i < n; i++)
-      {
-        x1 = &(tipVector[4 * tipX1[i]]);
-        x2 = &(tipVector[4 * tipX2[i]]);
-
-        _mm_store_pd( &sum[i*4 + 0], _mm_mul_pd( _mm_load_pd( &x1[0] ), _mm_load_pd( &x2[0] )));
-        _mm_store_pd( &sum[i*4 + 2], _mm_mul_pd( _mm_load_pd( &x1[2] ), _mm_load_pd( &x2[2] )));
-      }
-      break;
-    case PLL_TIP_INNER:
-      for (i = 0; i < n; i++)
-      {
-        x1 = &(tipVector[4 * tipX1[i]]);
-        if(isGap(x2_gap, i))
-          x2 = x2_gapColumn;
-        else
-        {
-          x2 = x2_ptr;
-          x2_ptr += 4;
-        }
-
-        _mm_store_pd( &sum[i*4 + 0], _mm_mul_pd( _mm_load_pd( &x1[0] ), _mm_load_pd( &x2[0] )));
-        _mm_store_pd( &sum[i*4 + 2], _mm_mul_pd( _mm_load_pd( &x1[2] ), _mm_load_pd( &x2[2] )));
-      }
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-      {
-        if(isGap(x1_gap, i))
-          x1 = x1_gapColumn;
-        else
-        {
-          x1 = x1_ptr;
-          x1_ptr += 4;
-        }
-
-        if(isGap(x2_gap, i))
-          x2 = x2_gapColumn;
-        else
-        {
-          x2 = x2_ptr;
-          x2_ptr += 4;
-        }
-
-        _mm_store_pd( &sum[i*4 + 0], _mm_mul_pd( _mm_load_pd( &x1[0] ), _mm_load_pd( &x2[0] )));
-        _mm_store_pd( &sum[i*4 + 2], _mm_mul_pd( _mm_load_pd( &x1[2] ), _mm_load_pd( &x2[2] )));
-
-      }    
-      break;
-    default:
-      assert(0);
-  }
-}
-
-static void sumGAMMA_BINARY(int tipCase, double *sumtable, double *x1_start, double *x2_start, double *tipVector,
-                            unsigned char *tipX1, unsigned char *tipX2, int n)
-{
-  double *x1, *x2, *sum;
-  int i, j;
-#if (!defined(_USE_PTHREADS) && !defined(_FINE_GRAIN_MPI))
-  int k;
-#endif
-
-  /* C-OPT once again switch over possible configurations at inner node */
-
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:
-      /* C-OPT main for loop overt alignment length */
-      for (i = 0; i < n; i++)
-        {
-          x1 = &(tipVector[2 * tipX1[i]]);
-          x2 = &(tipVector[2 * tipX2[i]]);
-          sum = &sumtable[i * 8];
-#if (!defined(_USE_PTHREADS) && !defined(_FINE_GRAIN_MPI))
-          for(j = 0; j < 4; j++)
-            for(k = 0; k < 2; k++)
-              sum[j * 2 + k] = x1[k] * x2[k];
-#else
-          for(j = 0; j < 4; j++)
-            _mm_store_pd( &sum[j*2], _mm_mul_pd( _mm_load_pd( &x1[0] ), _mm_load_pd( &x2[0] )));         
-#endif
-        }
-      break;
-    case PLL_TIP_INNER:
-      for (i = 0; i < n; i++)
-        {
-          x1  = &(tipVector[2 * tipX1[i]]);
-          x2  = &x2_start[8 * i];
-          sum = &sumtable[8 * i];
-
-#if (!defined(_USE_PTHREADS) && !defined(_FINE_GRAIN_MPI))
-          for(j = 0; j < 4; j++)
-            for(k = 0; k < 2; k++)
-              sum[j * 2 + k] = x1[k] * x2[j * 2 + k];
-#else
-          for(j = 0; j < 4; j++)
-            _mm_store_pd( &sum[j*2], _mm_mul_pd( _mm_load_pd( &x1[0] ), _mm_load_pd( &x2[j * 2] )));
-#endif
-        }
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-        {
-          x1  = &x1_start[8 * i];
-          x2  = &x2_start[8 * i];
-          sum = &sumtable[8 * i];
-#if (!defined(_USE_PTHREADS) && !defined(_FINE_GRAIN_MPI))
-          for(j = 0; j < 4; j++)
-            for(k = 0; k < 2; k++)
-              sum[j * 2 + k] = x1[j * 2 + k] * x2[j * 2 + k];
-#else
-          for(j = 0; j < 4; j++)
-            _mm_store_pd( &sum[j*2], _mm_mul_pd( _mm_load_pd( &x1[j * 2] ), _mm_load_pd( &x2[j * 2] )));
-#endif
-        }
-      break;
-    default:
-      assert(0);
-    }
-}
-
-
-static void sumGAMMA_GAPPED_SAVE(int tipCase, double *sumtable, double *x1_start, double *x2_start, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n, 
-    double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap)
-{
-  double 
-    *x1, 
-    *x2, 
-    *sum,
-    *x1_ptr = x1_start,
-    *x2_ptr = x2_start;
-
-  int i, j, k; 
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:     
-      for (i = 0; i < n; i++)
-      {
-        x1 = &(tipVector[4 * tipX1[i]]);
-        x2 = &(tipVector[4 * tipX2[i]]);
-        sum = &sumtable[i * 16];
-
-        for(j = 0; j < 4; j++)      
-          for(k = 0; k < 4; k+=2)
-            _mm_store_pd( &sum[j*4 + k], _mm_mul_pd( _mm_load_pd( &x1[k] ), _mm_load_pd( &x2[k] )));
-      }
-      break;
-    case PLL_TIP_INNER:
-      for (i = 0; i < n; i++)
-      {
-        x1  = &(tipVector[4 * tipX1[i]]);
-
-        if(x2_gap[i / 32] & mask32[i % 32])
-          x2 = x2_gapColumn;
-        else
-        {
-          x2  = x2_ptr;
-          x2_ptr += 16;
-        }
-
-        sum = &sumtable[16 * i];
-
-        for(j = 0; j < 4; j++)      
-          for(k = 0; k < 4; k+=2)
-            _mm_store_pd( &sum[j*4 + k], _mm_mul_pd( _mm_load_pd( &x1[k] ), _mm_load_pd( &x2[j * 4 + k] )));
-      }
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-      {
-        if(x1_gap[i / 32] & mask32[i % 32])
-          x1 = x1_gapColumn;
-        else
-        {
-          x1  = x1_ptr;
-          x1_ptr += 16;
-        }
-
-        if(x2_gap[i / 32] & mask32[i % 32])
-          x2 = x2_gapColumn;
-        else
-        {
-          x2  = x2_ptr;
-          x2_ptr += 16;
-        }
-
-        sum = &sumtable[16 * i];
-
-
-        for(j = 0; j < 4; j++)      
-          for(k = 0; k < 4; k+=2)
-            _mm_store_pd( &sum[j*4 + k], _mm_mul_pd( _mm_load_pd( &x1[j * 4 + k] ), _mm_load_pd( &x2[j * 4 + k] )));
-      }
-      break;
-    default:
-      assert(0);
-  }
-}
-
-
-
-
-static void sumGAMMA(int tipCase, double *sumtable, double *x1_start, double *x2_start, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n)
-{
-  double *x1, *x2, *sum;
-  int i, j, k;
-
-  /* C-OPT once again switch over possible configurations at inner node */
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:
-      /* C-OPT main for loop overt alignment length */
-      for (i = 0; i < n; i++)
-      {
-        x1 = &(tipVector[4 * tipX1[i]]);
-        x2 = &(tipVector[4 * tipX2[i]]);
-        sum = &sumtable[i * 16];
-
-        for(j = 0; j < 4; j++)      
-          for(k = 0; k < 4; k+=2)
-            _mm_store_pd( &sum[j*4 + k], _mm_mul_pd( _mm_load_pd( &x1[k] ), _mm_load_pd( &x2[k] )));
-      }
-      break;
-    case PLL_TIP_INNER:
-      for (i = 0; i < n; i++)
-      {
-        x1  = &(tipVector[4 * tipX1[i]]);
-        x2  = &x2_start[16 * i];
-        sum = &sumtable[16 * i];
-
-        for(j = 0; j < 4; j++)      
-          for(k = 0; k < 4; k+=2)
-            _mm_store_pd( &sum[j*4 + k], _mm_mul_pd( _mm_load_pd( &x1[k] ), _mm_load_pd( &x2[j * 4 + k] )));
-      }
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-      {
-        x1  = &x1_start[16 * i];
-        x2  = &x2_start[16 * i];
-        sum = &sumtable[16 * i];
-
-        for(j = 0; j < 4; j++)      
-          for(k = 0; k < 4; k+=2)
-            _mm_store_pd( &sum[j*4 + k], _mm_mul_pd( _mm_load_pd( &x1[j * 4 + k] ), _mm_load_pd( &x2[j * 4 + k] )));
-      }
-      break;
-    default:
-      assert(0);
-  }
-}
-
-
-static void sumCAT(int tipCase, double *sum, double *x1_start, double *x2_start, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n)
-{
-  int i;
-  double 
-    *x1, 
-    *x2;
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:
-      for (i = 0; i < n; i++)
-      {
-        x1 = &(tipVector[4 * tipX1[i]]);
-        x2 = &(tipVector[4 * tipX2[i]]);
-
-        _mm_store_pd( &sum[i*4 + 0], _mm_mul_pd( _mm_load_pd( &x1[0] ), _mm_load_pd( &x2[0] )));
-        _mm_store_pd( &sum[i*4 + 2], _mm_mul_pd( _mm_load_pd( &x1[2] ), _mm_load_pd( &x2[2] )));
-      }
-      break;
-    case PLL_TIP_INNER:
-      for (i = 0; i < n; i++)
-      {
-        x1 = &(tipVector[4 * tipX1[i]]);
-        x2 = &x2_start[4 * i];
-
-        _mm_store_pd( &sum[i*4 + 0], _mm_mul_pd( _mm_load_pd( &x1[0] ), _mm_load_pd( &x2[0] )));
-        _mm_store_pd( &sum[i*4 + 2], _mm_mul_pd( _mm_load_pd( &x1[2] ), _mm_load_pd( &x2[2] )));
-      }
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-      {
-        x1 = &x1_start[4 * i];
-        x2 = &x2_start[4 * i];
-
-        _mm_store_pd( &sum[i*4 + 0], _mm_mul_pd( _mm_load_pd( &x1[0] ), _mm_load_pd( &x2[0] )));
-        _mm_store_pd( &sum[i*4 + 2], _mm_mul_pd( _mm_load_pd( &x1[2] ), _mm_load_pd( &x2[2] )));
-
-      }    
-      break;
-    default:
-      assert(0);
-  }
-}
-static void sumGAMMAPROT_GAPPED_SAVE(int tipCase, double *sumtable, double *x1, double *x2, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n, 
-    double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap)
-{
-  int i, l, k;
-  double 
-    *left, 
-    *right, 
-    *sum,
-    *x1_ptr = x1,
-    *x2_ptr = x2,
-    *x1v,
-    *x2v;
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:
-      for(i = 0; i < n; i++)
-      {
-        left  = &(tipVector[20 * tipX1[i]]);
-        right = &(tipVector[20 * tipX2[i]]);
-
-        for(l = 0; l < 4; l++)
-        {
-          sum = &sumtable[i * 80 + l * 20];
-
-          for(k = 0; k < 20; k+=2)
-          {
-            __m128d sumv = _mm_mul_pd(_mm_load_pd(&left[k]), _mm_load_pd(&right[k]));
-
-            _mm_store_pd(&sum[k], sumv);                 
-          }
-
-        }
-      }
-      break;
-    case PLL_TIP_INNER:
-      for(i = 0; i < n; i++)
-      {
-        left = &(tipVector[20 * tipX1[i]]);
-
-        if(x2_gap[i / 32] & mask32[i % 32])
-          x2v = x2_gapColumn;
-        else
-        {
-          x2v = x2_ptr;
-          x2_ptr += 80;
-        }
-
-        for(l = 0; l < 4; l++)
-        {
-          right = &(x2v[l * 20]);
-          sum = &sumtable[i * 80 + l * 20];
-
-          for(k = 0; k < 20; k+=2)
-          {
-            __m128d sumv = _mm_mul_pd(_mm_load_pd(&left[k]), _mm_load_pd(&right[k]));
-
-            _mm_store_pd(&sum[k], sumv);                 
-          }
-        }
-      }
-      break;
-    case PLL_INNER_INNER:
-      for(i = 0; i < n; i++)
-      {
-        if(x1_gap[i / 32] & mask32[i % 32])
-          x1v = x1_gapColumn;
-        else
-        {
-          x1v  = x1_ptr;
-          x1_ptr += 80;
-        }
-
-        if(x2_gap[i / 32] & mask32[i % 32])
-          x2v = x2_gapColumn;
-        else
-        {
-          x2v  = x2_ptr;
-          x2_ptr += 80;
-        }
-
-        for(l = 0; l < 4; l++)
-        {
-          left  = &(x1v[l * 20]);
-          right = &(x2v[l * 20]);
-          sum   = &(sumtable[i * 80 + l * 20]);
-
-          for(k = 0; k < 20; k+=2)
-          {
-            __m128d sumv = _mm_mul_pd(_mm_load_pd(&left[k]), _mm_load_pd(&right[k]));
-
-            _mm_store_pd(&sum[k], sumv);                 
-          }
-        }
-      }
-      break;
-    default:
-      assert(0);
-  }
-}
-
-
-static void sumGAMMAPROT_LG4(int tipCase, double *sumtable, double *x1, double *x2, double *tipVector[4],
-                             unsigned char *tipX1, unsigned char *tipX2, int n)
-{
-  int i, l, k;
-  double *left, *right, *sum;
-
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:
-      for(i = 0; i < n; i++)
-        {         
-          for(l = 0; l < 4; l++)
-            {
-              left  = &(tipVector[l][20 * tipX1[i]]);
-              right = &(tipVector[l][20 * tipX2[i]]);
-
-              sum = &sumtable[i * 80 + l * 20];
-#ifdef __SSE3
-              for(k = 0; k < 20; k+=2)
-                {
-                  __m128d sumv = _mm_mul_pd(_mm_load_pd(&left[k]), _mm_load_pd(&right[k]));
-                  
-                  _mm_store_pd(&sum[k], sumv);           
-                }
-#else
-              for(k = 0; k < 20; k++)
-                sum[k] = left[k] * right[k];
-#endif
-            }
-        }
-      break;
-    case PLL_TIP_INNER:
-      for(i = 0; i < n; i++)
-        {
-         
-
-          for(l = 0; l < 4; l++)
-            { 
-              left = &(tipVector[l][20 * tipX1[i]]);
-              right = &(x2[80 * i + l * 20]);
-              sum = &sumtable[i * 80 + l * 20];
-#ifdef __SSE3
-              for(k = 0; k < 20; k+=2)
-                {
-                  __m128d sumv = _mm_mul_pd(_mm_load_pd(&left[k]), _mm_load_pd(&right[k]));
-                  
-                  _mm_store_pd(&sum[k], sumv);           
-                }
-#else
-              for(k = 0; k < 20; k++)
-                sum[k] = left[k] * right[k];
-#endif
-            }
-        }
-      break;
-    case PLL_INNER_INNER:
-      for(i = 0; i < n; i++)
-        {
-          for(l = 0; l < 4; l++)
-            {
-              left  = &(x1[80 * i + l * 20]);
-              right = &(x2[80 * i + l * 20]);
-              sum   = &(sumtable[i * 80 + l * 20]);
-
-#ifdef __SSE3
-              for(k = 0; k < 20; k+=2)
-                {
-                  __m128d sumv = _mm_mul_pd(_mm_load_pd(&left[k]), _mm_load_pd(&right[k]));
-                  
-                  _mm_store_pd(&sum[k], sumv);           
-                }
-#else
-              for(k = 0; k < 20; k++)
-                sum[k] = left[k] * right[k];
-#endif
-            }
-        }
-      break;
-    default:
-      assert(0);
-    }
-}
-
-
-static void sumGAMMAPROT(int tipCase, double *sumtable, double *x1, double *x2, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n)
-{
-  int i, l, k;
-  double *left, *right, *sum;
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:
-      for(i = 0; i < n; i++)
-      {
-        left  = &(tipVector[20 * tipX1[i]]);
-        right = &(tipVector[20 * tipX2[i]]);
-
-        for(l = 0; l < 4; l++)
-        {
-          sum = &sumtable[i * 80 + l * 20];
-
-          for(k = 0; k < 20; k+=2)
-          {
-            __m128d sumv = _mm_mul_pd(_mm_load_pd(&left[k]), _mm_load_pd(&right[k]));
-
-            _mm_store_pd(&sum[k], sumv);                 
-          }
-
-        }
-      }
-      break;
-    case PLL_TIP_INNER:
-      for(i = 0; i < n; i++)
-      {
-        left = &(tipVector[20 * tipX1[i]]);
-
-        for(l = 0; l < 4; l++)
-        {
-          right = &(x2[80 * i + l * 20]);
-          sum = &sumtable[i * 80 + l * 20];
-
-          for(k = 0; k < 20; k+=2)
-          {
-            __m128d sumv = _mm_mul_pd(_mm_load_pd(&left[k]), _mm_load_pd(&right[k]));
-
-            _mm_store_pd(&sum[k], sumv);                 
-          }
-
-        }
-      }
-      break;
-    case PLL_INNER_INNER:
-      for(i = 0; i < n; i++)
-      {
-        for(l = 0; l < 4; l++)
-        {
-          left  = &(x1[80 * i + l * 20]);
-          right = &(x2[80 * i + l * 20]);
-          sum   = &(sumtable[i * 80 + l * 20]);
-
-
-          for(k = 0; k < 20; k+=2)
-          {
-            __m128d sumv = _mm_mul_pd(_mm_load_pd(&left[k]), _mm_load_pd(&right[k]));
-
-            _mm_store_pd(&sum[k], sumv);                 
-          }
-        }
-      }
-      break;
-    default:
-      assert(0);
-  }
-}
-
-
-static void sumGTRCATPROT(int tipCase, double *sumtable, double *x1, double *x2, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n)
-{
-  int i, l;
-  double *sum, *left, *right;
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:
-      for (i = 0; i < n; i++)
-      {
-        left  = &(tipVector[20 * tipX1[i]]);
-        right = &(tipVector[20 * tipX2[i]]);
-        sum = &sumtable[20 * i];
-
-        for(l = 0; l < 20; l+=2)
-        {
-          __m128d sumv = _mm_mul_pd(_mm_load_pd(&left[l]), _mm_load_pd(&right[l]));
-
-          _mm_store_pd(&sum[l], sumv);           
-        }
-
-      }
-      break;
-    case PLL_TIP_INNER:
-      for (i = 0; i < n; i++)
-      {
-        left = &(tipVector[20 * tipX1[i]]);
-        right = &x2[20 * i];
-        sum = &sumtable[20 * i];
-
-        for(l = 0; l < 20; l+=2)
-        {
-          __m128d sumv = _mm_mul_pd(_mm_load_pd(&left[l]), _mm_load_pd(&right[l]));
-
-          _mm_store_pd(&sum[l], sumv);           
-        }
-
-      }
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-      {
-        left  = &x1[20 * i];
-        right = &x2[20 * i];
-        sum = &sumtable[20 * i];
-
-        for(l = 0; l < 20; l+=2)
-        {
-          __m128d sumv = _mm_mul_pd(_mm_load_pd(&left[l]), _mm_load_pd(&right[l]));
-
-          _mm_store_pd(&sum[l], sumv);           
-        }
-
-      }
-      break;
-    default:
-      assert(0);
-  }
-}
-
-
-static void sumGTRCATPROT_SAVE(int tipCase, double *sumtable, double *x1, double *x2, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n, 
-    double *x1_gapColumn, double *x2_gapColumn, unsigned int *x1_gap, unsigned int *x2_gap)
-{
-  int 
-    i, 
-    l;
-
-  double 
-    *sum, 
-    *left, 
-    *right,
-    *left_ptr = x1,
-    *right_ptr = x2;
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:
-      for (i = 0; i < n; i++)
-      {
-        left  = &(tipVector[20 * tipX1[i]]);
-        right = &(tipVector[20 * tipX2[i]]);
-        sum = &sumtable[20 * i];
-
-        for(l = 0; l < 20; l+=2)
-        {
-          __m128d sumv = _mm_mul_pd(_mm_load_pd(&left[l]), _mm_load_pd(&right[l]));
-
-          _mm_store_pd(&sum[l], sumv);           
-        }
-
-      }
-      break;
-    case PLL_TIP_INNER:
-      for (i = 0; i < n; i++)
-      {
-        left = &(tipVector[20 * tipX1[i]]);       
-
-        if(isGap(x2_gap, i))
-          right = x2_gapColumn;
-        else
-        {
-          right = right_ptr;
-          right_ptr += 20;
-        }
-
-        sum = &sumtable[20 * i];
-
-        for(l = 0; l < 20; l+=2)
-        {
-          __m128d sumv = _mm_mul_pd(_mm_load_pd(&left[l]), _mm_load_pd(&right[l]));
-
-          _mm_store_pd(&sum[l], sumv);           
-        }
-
-      }
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-      {  
-        if(isGap(x1_gap, i))
-          left = x1_gapColumn;
-        else
-        {
-          left = left_ptr;
-          left_ptr += 20;
-        }
-
-        if(isGap(x2_gap, i))
-          right = x2_gapColumn;
-        else
-        {
-          right = right_ptr;
-          right_ptr += 20;
-        }
-
-        sum = &sumtable[20 * i];
-
-        for(l = 0; l < 20; l+=2)
-        {
-          __m128d sumv = _mm_mul_pd(_mm_load_pd(&left[l]), _mm_load_pd(&right[l]));
-
-          _mm_store_pd(&sum[l], sumv);           
-        }
-      }
-      break;
-    default:
-      assert(0);
-  }
-}
-
-static void coreGTRGAMMA(const int upper, double *sumtable,
-    volatile double *ext_dlnLdlz,  volatile double *ext_d2lnLdlz2, double *EIGN, double *gammaRates, double lz, int *wrptr)
-{
-  double 
-    dlnLdlz = 0.0,
-            d2lnLdlz2 = 0.0,
-            ki, 
-            kisqr,  
-            inv_Li, 
-            dlnLidlz, 
-            d2lnLidlz2,  
-		*sum;
-	PLL_ALIGN_BEGIN double
-            diagptable0[16] PLL_ALIGN_END,
-            diagptable1[16] PLL_ALIGN_END,
-            diagptable2[16] PLL_ALIGN_END;
-
-  int     
-    i, 
-    j, 
-    l;
-
-  for(i = 0; i < 4; i++)
-  {
-    ki = gammaRates[i];
-    kisqr = ki * ki;
-
-    diagptable0[i * 4] = 1.0;
-    diagptable1[i * 4] = 0.0;
-    diagptable2[i * 4] = 0.0;
-
-    for(l = 1; l < 4; l++)
-    {
-      diagptable0[i * 4 + l] = exp(EIGN[l] * ki * lz);
-      diagptable1[i * 4 + l] = EIGN[l] * ki;
-      diagptable2[i * 4 + l] = EIGN[l] * EIGN[l] * kisqr;
-    }
-  }
-
-  for (i = 0; i < upper; i++)
-  { 
-    __m128d a0 = _mm_setzero_pd();
-    __m128d a1 = _mm_setzero_pd();
-    __m128d a2 = _mm_setzero_pd();
-
-    sum = &sumtable[i * 16];         
-
-    for(j = 0; j < 4; j++)
-    {                   
-      double       
-        *d0 = &diagptable0[j * 4],
-        *d1 = &diagptable1[j * 4],
-        *d2 = &diagptable2[j * 4];
-
-      for(l = 0; l < 4; l+=2)
-      {
-        __m128d tmpv = _mm_mul_pd(_mm_load_pd(&d0[l]), _mm_load_pd(&sum[j * 4 + l]));
-        a0 = _mm_add_pd(a0, tmpv);
-        a1 = _mm_add_pd(a1, _mm_mul_pd(tmpv, _mm_load_pd(&d1[l])));
-        a2 = _mm_add_pd(a2, _mm_mul_pd(tmpv, _mm_load_pd(&d2[l])));
-      }           
-    }
-
-    a0 = _mm_hadd_pd(a0, a0);
-    a1 = _mm_hadd_pd(a1, a1);
-    a2 = _mm_hadd_pd(a2, a2);
-
-    _mm_storel_pd(&inv_Li, a0);     
-    _mm_storel_pd(&dlnLidlz, a1);
-    _mm_storel_pd(&d2lnLidlz2, a2); 
-
-    inv_Li = 1.0 / fabs (inv_Li);
-
-    dlnLidlz   *= inv_Li;
-    d2lnLidlz2 *= inv_Li;     
-
-    dlnLdlz   += wrptr[i] * dlnLidlz;
-    d2lnLdlz2 += wrptr[i] * (d2lnLidlz2 - dlnLidlz * dlnLidlz);
-  }
-
-
-  *ext_dlnLdlz   = dlnLdlz;
-  *ext_d2lnLdlz2 = d2lnLdlz2; 
-}
-
-static void coreGTRCAT_BINARY(int upper, int numberOfCategories, double *sum,
-                              volatile double *d1, volatile double *d2, 
-                              double *rptr, double *EIGN, int *cptr, double lz, int *wgt)
-{
-  int i;
-  double
-    *d, *d_start = NULL,
-    tmp_0, inv_Li, dlnLidlz, d2lnLidlz2,
-    dlnLdlz = 0.0,
-    d2lnLdlz2 = 0.0;
-  double e[2];
-  double dd1;
-
-  e[0] = EIGN[0];
-  e[1] = EIGN[0] * EIGN[0];
-
-
-  d = d_start = (double *)rax_malloc(numberOfCategories * sizeof(double));
-
-  dd1 = e[0] * lz;
-
-  for(i = 0; i < numberOfCategories; i++)
-    d[i] = exp(dd1 * rptr[i]);
-
-  for (i = 0; i < upper; i++)
-    {
-      double
-        r = rptr[cptr[i]],
-        wr1 = r * wgt[i],
-        wr2 = r * r * wgt[i];
-      
-      d = &d_start[cptr[i]];
-
-      inv_Li = sum[2 * i];
-      inv_Li += (tmp_0 = d[0] * sum[2 * i + 1]);
-
-      inv_Li = 1.0/fabs(inv_Li);
-
-      dlnLidlz   = tmp_0 * e[0];
-      d2lnLidlz2 = tmp_0 * e[1];
-
-      dlnLidlz   *= inv_Li;
-      d2lnLidlz2 *= inv_Li;
-
-      dlnLdlz   += wr1 * dlnLidlz;
-      d2lnLdlz2 += wr2 * (d2lnLidlz2 - dlnLidlz * dlnLidlz);
-    }
-
-  *d1 = dlnLdlz;
-  *d2 = d2lnLdlz2;
-
-  rax_free(d_start);
-}
-
-
-static void coreGTRCAT(int upper, int numberOfCategories, double *sum,
-    volatile double *d1, volatile double *d2, int *wgt,
-    double *rptr, double *EIGN, int *cptr, double lz)
-{
-  int i;
-  double
-    *d, *d_start = NULL,
-    inv_Li, dlnLidlz, d2lnLidlz2,
-    dlnLdlz = 0.0,
-    d2lnLdlz2 = 0.0;
-
-  PLL_ALIGN_BEGIN double e1[4] PLL_ALIGN_END;
-  PLL_ALIGN_BEGIN double e2[4] PLL_ALIGN_END;
-  double dd1, dd2, dd3;
-
-  __m128d
-    e1v[2],
-    e2v[2];
-
-  e1[0] = 0.0;
-  e2[0] = 0.0;
-  e1[1] = EIGN[1];
-  e2[1] = EIGN[1] * EIGN[1];
-  e1[2] = EIGN[2];
-  e2[2] = EIGN[2] * EIGN[2];
-  e1[3] = EIGN[3];
-  e2[3] = EIGN[3] * EIGN[3];
-
-  e1v[0]= _mm_load_pd(&e1[0]);
-  e1v[1]= _mm_load_pd(&e1[2]);
-
-  e2v[0]= _mm_load_pd(&e2[0]);
-  e2v[1]= _mm_load_pd(&e2[2]);
-
-  rax_posix_memalign ((void **) &d_start, PLL_BYTE_ALIGNMENT, numberOfCategories * 4 * sizeof(double));
-  d = d_start;
-
-  dd1 = EIGN[1] * lz;
-  dd2 = EIGN[2] * lz;
-  dd3 = EIGN[3] * lz;
-
-  for(i = 0; i < numberOfCategories; i++)
-  {
-    d[i * 4 + 0] = 1.0;
-    d[i * 4 + 1] = exp(dd1 * rptr[i]);
-    d[i * 4 + 2] = exp(dd2 * rptr[i]);
-    d[i * 4 + 3] = exp(dd3 * rptr[i]);
-  }
-
-  for (i = 0; i < upper; i++)
-  {
-    double *s = &sum[4 * i];
-    d = &d_start[4 * cptr[i]];  
-
-    __m128d tmp_0v =_mm_mul_pd(_mm_load_pd(&d[0]),_mm_load_pd(&s[0]));
-    __m128d tmp_1v =_mm_mul_pd(_mm_load_pd(&d[2]),_mm_load_pd(&s[2]));
-
-    __m128d inv_Liv    = _mm_add_pd(tmp_0v, tmp_1v);      
-
-    __m128d dlnLidlzv   = _mm_add_pd(_mm_mul_pd(tmp_0v, e1v[0]), _mm_mul_pd(tmp_1v, e1v[1]));     
-    __m128d d2lnLidlz2v = _mm_add_pd(_mm_mul_pd(tmp_0v, e2v[0]), _mm_mul_pd(tmp_1v, e2v[1]));
-
-
-    inv_Liv   = _mm_hadd_pd(inv_Liv, inv_Liv);
-    dlnLidlzv = _mm_hadd_pd(dlnLidlzv, dlnLidlzv);
-    d2lnLidlz2v = _mm_hadd_pd(d2lnLidlz2v, d2lnLidlz2v);                 
-
-    _mm_storel_pd(&inv_Li, inv_Liv);     
-    _mm_storel_pd(&dlnLidlz, dlnLidlzv);                 
-    _mm_storel_pd(&d2lnLidlz2, d2lnLidlz2v);      
-
-    inv_Li = 1.0 / fabs (inv_Li);
-
-    dlnLidlz   *= inv_Li;
-    d2lnLidlz2 *= inv_Li;
-
-    dlnLdlz  += wgt[i] * rptr[cptr[i]] * dlnLidlz;
-    d2lnLdlz2 += wgt[i] * rptr[cptr[i]] * rptr[cptr[i]] * (d2lnLidlz2 - dlnLidlz * dlnLidlz);
-  }
-
-  *d1 = dlnLdlz;
-  *d2 = d2lnLdlz2;
-
-  rax_free(d_start);
-}
-
-#if (!defined(__SSE3) && !defined(__AVX))
-static void coreGTRGAMMA_BINARY(const int upper, double *sumtable,
-                                volatile double *d1,   volatile double *d2, double *EIGN, double *gammaRates, double lz, int *wrptr)
-{
-  int i, j;
-  double
-    *diagptable, *diagptable_start, *sum,
-    tmp_1, inv_Li, dlnLidlz, d2lnLidlz2, ki, kisqr,
-    dlnLdlz = 0.0,
-    d2lnLdlz2 = 0.0;
-
-  diagptable = diagptable_start = (double *)rax_malloc(sizeof(double) * 12);
-
-  for(i = 0; i < 4; i++)
-    {
-      ki = gammaRates[i];
-      kisqr = ki * ki;
-
-      diagptable[i * 3]     = exp (EIGN[1] * ki * lz);
-      diagptable[i * 3 + 1] = EIGN[1] * ki;
-      diagptable[i * 3 + 2] = EIGN[1] * EIGN[1] * kisqr;
-    }
-
-  for (i = 0; i < upper; i++)
-    {
-      diagptable = diagptable_start;
-      sum = &(sumtable[i * 8]);
-
-      inv_Li      = 0.0;
-      dlnLidlz    = 0.0;
-      d2lnLidlz2  = 0.0;
-
-      for(j = 0; j < 4; j++)
-        {
-          inv_Li += sum[2 * j];
-
-          tmp_1      =  diagptable[3 * j] * sum[2 * j + 1];
-          inv_Li     += tmp_1;
-          dlnLidlz   += tmp_1 * diagptable[3 * j + 1];
-          d2lnLidlz2 += tmp_1 * diagptable[3 * j + 2];
-        }
-
-      inv_Li = 1.0 / fabs(inv_Li);
-
-      dlnLidlz   *= inv_Li;
-      d2lnLidlz2 *= inv_Li;
-
-
-      dlnLdlz  += wrptr[i] * dlnLidlz;
-      d2lnLdlz2 += wrptr[i] * (d2lnLidlz2 - dlnLidlz * dlnLidlz);
-    }
-
-  *d1 = dlnLdlz;
-  *d2 = d2lnLdlz2;
-
-  rax_free(diagptable_start);
-}
-#else
-static void coreGTRGAMMA_BINARY(const int upper, double *sumtable,
-                                volatile double *d1,   volatile double *d2, double *EIGN, double *gammaRates, double lz, int *wrptr)
-{
-	double
-		dlnLdlz = 0.0,
-		d2lnLdlz2 = 0.0,
-		ki,
-		kisqr,
-		inv_Li,
-		dlnLidlz,
-		d2lnLidlz2,
-		*sum;
-	PLL_ALIGN_BEGIN double
-		diagptable0[8] PLL_ALIGN_END,
-		diagptable1[8] PLL_ALIGN_END,
-		diagptable2[8] PLL_ALIGN_END;
-    
-  int     
-    i, 
-    j;
-  
-  for(i = 0; i < 4; i++)
-    {
-      ki = gammaRates[i];
-      kisqr = ki * ki;
-      
-      diagptable0[i * 2] = 1.0;
-      diagptable1[i * 2] = 0.0;
-      diagptable2[i * 2] = 0.0;
-     
-      diagptable0[i * 2 + 1] = exp(EIGN[0] * ki * lz);
-      diagptable1[i * 2 + 1] = EIGN[0] * ki;
-      diagptable2[i * 2 + 1] = EIGN[0] * EIGN[0] * kisqr;    
-    }
-
-  for (i = 0; i < upper; i++)
-    { 
-      __m128d a0 = _mm_setzero_pd();
-      __m128d a1 = _mm_setzero_pd();
-      __m128d a2 = _mm_setzero_pd();
-
-      sum = &sumtable[i * 8];         
-
-      for(j = 0; j < 4; j++)
-        {                       
-          double           
-            *d0 = &diagptable0[j * 2],
-            *d1 = &diagptable1[j * 2],
-            *d2 = &diagptable2[j * 2];
-                         
-          __m128d tmpv = _mm_mul_pd(_mm_load_pd(d0), _mm_load_pd(&sum[j * 2]));
-          a0 = _mm_add_pd(a0, tmpv);
-          a1 = _mm_add_pd(a1, _mm_mul_pd(tmpv, _mm_load_pd(d1)));
-          a2 = _mm_add_pd(a2, _mm_mul_pd(tmpv, _mm_load_pd(d2)));
-                          
-        }
-
-      a0 = _mm_hadd_pd(a0, a0);
-      a1 = _mm_hadd_pd(a1, a1);
-      a2 = _mm_hadd_pd(a2, a2);
-
-      _mm_storel_pd(&inv_Li, a0);     
-      _mm_storel_pd(&dlnLidlz, a1);
-      _mm_storel_pd(&d2lnLidlz2, a2); 
-
-      inv_Li = 1.0 / fabs(inv_Li);
-     
-      dlnLidlz   *= inv_Li;
-      d2lnLidlz2 *= inv_Li;     
-
-      dlnLdlz   += wrptr[i] * dlnLidlz;
-      d2lnLdlz2 += wrptr[i] * (d2lnLidlz2 - dlnLidlz * dlnLidlz);
-    }
-
- 
-  *d1   = dlnLdlz;
-  *d2 = d2lnLdlz2; 
-}
-
-
-#endif
-
-static void coreGTRGAMMAPROT_LG4(double *gammaRates, double *EIGN[4], double *sumtable, int upper, int *wrptr,
-                                 volatile double *ext_dlnLdlz,  volatile double *ext_d2lnLdlz2, double lz,
-                                 double * lg4_weights)
-{
-	double  *sum;
-	PLL_ALIGN_BEGIN double
-    diagptable0[80] PLL_ALIGN_END,
-    diagptable1[80] PLL_ALIGN_END,
-    diagptable2[80] PLL_ALIGN_END;    
-  int     i, j, l;
-  double  dlnLdlz = 0;
-  double d2lnLdlz2 = 0;
-  double ki, kisqr; 
-
-  for(i = 0; i < 4; i++)
-    {
-      ki = gammaRates[i];
-      kisqr = ki * ki;
-      
-      diagptable0[i * 20] = 1.0;
-      diagptable1[i * 20] = 0.0;
-      diagptable2[i * 20] = 0.0;
-
-      for(l = 1; l < 20; l++)
-        {
-          diagptable0[i * 20 + l] = exp(EIGN[i][l] * ki * lz);
-          diagptable1[i * 20 + l] = EIGN[i][l] * ki;
-          diagptable2[i * 20 + l] = EIGN[i][l] * EIGN[i][l] * kisqr;
-        }
-    }
-
-  for (i = 0; i < upper; i++)
-    { 
-
-      double
-      	  inv_Li = 0.0,
-      	  dlnLidlz = 0.0,
-      	  d2lnLidlz2 = 0.0;
-
-      sum = &sumtable[i * 80];         
-
-      for(j = 0; j < 4; j++)
-        {                       
-          double
-          	l0,
-          	l1,
-          	l2,
-            *d0 = &diagptable0[j * 20],
-            *d1 = &diagptable1[j * 20],
-            *d2 = &diagptable2[j * 20];
-                 
-          __m128d a0 = _mm_setzero_pd();
-          __m128d a1 = _mm_setzero_pd();
-          __m128d a2 = _mm_setzero_pd();
-
-          for(l = 0; l < 20; l+=2)
-            {
-              __m128d tmpv = _mm_mul_pd(_mm_load_pd(&d0[l]), _mm_load_pd(&sum[j * 20 +l]));
-              a0 = _mm_add_pd(a0, tmpv);
-              a1 = _mm_add_pd(a1, _mm_mul_pd(tmpv, _mm_load_pd(&d1[l])));
-              a2 = _mm_add_pd(a2, _mm_mul_pd(tmpv, _mm_load_pd(&d2[l])));
-            }             
-
-          a0 = _mm_hadd_pd(a0, a0);
-      	  a1 = _mm_hadd_pd(a1, a1);
-      	  a2 = _mm_hadd_pd(a2, a2);
-
-      	 _mm_storel_pd(&l0, a0);
-      	 _mm_storel_pd(&l1, a1);
-      	 _mm_storel_pd(&l2, a2);
-
-      	 inv_Li     += lg4_weights[j] * l0;
-      	 dlnLidlz   += lg4_weights[j] * l1;
-     	 d2lnLidlz2 += lg4_weights[j] * l2;
-      }
-
-      inv_Li = 1.0 / fabs (inv_Li);
-
-      dlnLidlz   *= inv_Li;
-      d2lnLidlz2 *= inv_Li;
-
-      dlnLdlz   += wrptr[i] * dlnLidlz;
-      d2lnLdlz2 += wrptr[i] * (d2lnLidlz2 - dlnLidlz * dlnLidlz);
-    }
-
-  *ext_dlnLdlz   = dlnLdlz;
-  *ext_d2lnLdlz2 = d2lnLdlz2;
-}
-
-
-
-static void coreGTRGAMMAPROT(double *gammaRates, double *EIGN, double *sumtable, int upper, int *wrptr,
-    volatile double *ext_dlnLdlz,  volatile double *ext_d2lnLdlz2, double lz)
-{
-	double  *sum;
-	PLL_ALIGN_BEGIN double
-		diagptable0[80] PLL_ALIGN_END,
-		diagptable1[80] PLL_ALIGN_END,
-		diagptable2[80] PLL_ALIGN_END;
-
-  int     i, j, l;
-  double  dlnLdlz = 0;
-  double d2lnLdlz2 = 0;
-  double ki, kisqr; 
-  double inv_Li, dlnLidlz, d2lnLidlz2;
-
-  for(i = 0; i < 4; i++)
-  {
-    ki = gammaRates[i];
-    kisqr = ki * ki;
-
-    diagptable0[i * 20] = 1.0;
-    diagptable1[i * 20] = 0.0;
-    diagptable2[i * 20] = 0.0;
-
-    for(l = 1; l < 20; l++)
-    {
-      diagptable0[i * 20 + l] = exp(EIGN[l] * ki * lz);
-      diagptable1[i * 20 + l] = EIGN[l] * ki;
-      diagptable2[i * 20 + l] = EIGN[l] * EIGN[l] * kisqr;
-    }
-  }
-
-  for (i = 0; i < upper; i++)
-  { 
-    __m128d a0 = _mm_setzero_pd();
-    __m128d a1 = _mm_setzero_pd();
-    __m128d a2 = _mm_setzero_pd();
-
-    sum = &sumtable[i * 80];         
-
-    for(j = 0; j < 4; j++)
-    {                   
-      double       
-        *d0 = &diagptable0[j * 20],
-        *d1 = &diagptable1[j * 20],
-        *d2 = &diagptable2[j * 20];
-
-      for(l = 0; l < 20; l+=2)
-      {
-        __m128d tmpv = _mm_mul_pd(_mm_load_pd(&d0[l]), _mm_load_pd(&sum[j * 20 +l]));
-        a0 = _mm_add_pd(a0, tmpv);
-        a1 = _mm_add_pd(a1, _mm_mul_pd(tmpv, _mm_load_pd(&d1[l])));
-        a2 = _mm_add_pd(a2, _mm_mul_pd(tmpv, _mm_load_pd(&d2[l])));
-      }           
-    }
-
-    a0 = _mm_hadd_pd(a0, a0);
-    a1 = _mm_hadd_pd(a1, a1);
-    a2 = _mm_hadd_pd(a2, a2);
-
-    _mm_storel_pd(&inv_Li, a0);
-    _mm_storel_pd(&dlnLidlz, a1);
-    _mm_storel_pd(&d2lnLidlz2, a2);
-
-    inv_Li = 1.0 / fabs (inv_Li);
-
-    dlnLidlz   *= inv_Li;
-    d2lnLidlz2 *= inv_Li;
-
-    dlnLdlz   += wrptr[i] * dlnLidlz;
-    d2lnLdlz2 += wrptr[i] * (d2lnLidlz2 - dlnLidlz * dlnLidlz);
-  }
-
-  *ext_dlnLdlz   = dlnLdlz;
-  *ext_d2lnLdlz2 = d2lnLdlz2;
-}
-
-
-
-static void coreGTRCATPROT(double *EIGN, double lz, int numberOfCategories, double *rptr, int *cptr, int upper,
-    int *wgt, volatile double *ext_dlnLdlz,  volatile double *ext_d2lnLdlz2, double *sumtable)
-{
-  int i, l;
-  double *d1, *d_start = NULL, *sum;
-  PLL_ALIGN_BEGIN double 
-    e[20] PLL_ALIGN_END, 
-    s[20] PLL_ALIGN_END, 
-    dd[20] PLL_ALIGN_END;
-  double inv_Li, dlnLidlz, d2lnLidlz2;
-  double  dlnLdlz = 0.0;
-  double  d2lnLdlz2 = 0.0;
-
-  rax_posix_memalign ((void **)&d_start, PLL_BYTE_ALIGNMENT, numberOfCategories * 20 * sizeof(double));
-  d1 = d_start; 
-
-  e[0] = 0.0;
-  s[0] = 0.0; 
-
-  for(l = 1; l < 20; l++)
-  {
-    e[l]  = EIGN[l] * EIGN[l];
-    s[l]  = EIGN[l];
-    dd[l] = s[l] * lz;
-  }
-
-  for(i = 0; i < numberOfCategories; i++)
-  {      
-    d1[20 * i] = 1.0;
-    for(l = 1; l < 20; l++)
-      d1[20 * i + l] = exp(dd[l] * rptr[i]);
-  }
-
-  for (i = 0; i < upper; i++)
-  {
-    __m128d a0 = _mm_setzero_pd();
-    __m128d a1 = _mm_setzero_pd();
-    __m128d a2 = _mm_setzero_pd();
-
-    d1 = &d_start[20 * cptr[i]];
-    sum = &sumtable[20 * i];
-
-    for(l = 0; l < 20; l+=2)
-    {     
-      __m128d tmpv = _mm_mul_pd(_mm_load_pd(&d1[l]), _mm_load_pd(&sum[l]));
-
-      a0 = _mm_add_pd(a0, tmpv);
-      __m128d sv = _mm_load_pd(&s[l]);    
-
-      a1 = _mm_add_pd(a1, _mm_mul_pd(tmpv, sv));
-      __m128d ev = _mm_load_pd(&e[l]);    
-
-      a2 = _mm_add_pd(a2, _mm_mul_pd(tmpv, ev));
-    }
-
-    a0 = _mm_hadd_pd(a0, a0);
-    a1 = _mm_hadd_pd(a1, a1);
-    a2 = _mm_hadd_pd(a2, a2);
-
-    _mm_storel_pd(&inv_Li, a0);     
-    _mm_storel_pd(&dlnLidlz, a1);                 
-    _mm_storel_pd(&d2lnLidlz2, a2);
-
-    inv_Li = 1.0 / fabs (inv_Li);
-
-    dlnLidlz   *= inv_Li;
-    d2lnLidlz2 *= inv_Li;
-
-    dlnLdlz  += wgt[i] * rptr[cptr[i]] * dlnLidlz;
-    d2lnLdlz2 += wgt[i] * rptr[cptr[i]] * rptr[cptr[i]] * (d2lnLidlz2 - dlnLidlz * dlnLidlz);
-  }
-
-  *ext_dlnLdlz   = dlnLdlz;
-  *ext_d2lnLdlz2 = d2lnLdlz2;
-
-  rax_free(d_start);
-}
-
-
-
-
-#endif
-
-
-
diff --git a/pllrepo/src/mem_alloc.c b/pllrepo/src/mem_alloc.c
deleted file mode 100644
index 68e928d..0000000
--- a/pllrepo/src/mem_alloc.c
+++ /dev/null
@@ -1,228 +0,0 @@
-
-#define MEM_ALLOC_NO_GUARDS 1
-
-#include "mem_alloc.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#ifndef __APPLE__
-#include <malloc.h>             // this is probably not necessary
-#endif
-
-#ifdef RAXML_USE_LLALLOC
-
-// the llalloc library implementation in lockless_alloc/ll_alloc.c exports the alloction functions prefixed
-// with 'llalloc'. The following are the forward declarations of the llalloc* functions 
-
-#define PREFIX(X)   llalloc##X
-
-void *PREFIX(memalign)(size_t align, size_t size);
-void *PREFIX(malloc)(size_t size);
-void *PREFIX(realloc)(void *p, size_t size);
-int PREFIX(posix_memalign)(void **p, size_t align, size_t size);
-void *PREFIX(calloc)(size_t n, size_t size);
-void PREFIX(free)(void *p);
-
-
-// wrappers that forward the rax_* functions to the corresponding llalloc* functions
-
-
-void *rax_memalign(size_t align, size_t size) {
-  return PREFIX(memalign)(align, size);
-}
-
-void *rax_malloc( size_t size ) {
-  return PREFIX(malloc)(size);
-}
-void *rax_realloc( void *p, size_t size ) {
-  return PREFIX(realloc)(p, size);
-}
-
-
-void rax_free(void *p) {
-  PREFIX(free)(p);
-}
-
-int rax_posix_memalign(void **p, size_t align, size_t size) {
-  return PREFIX(posix_memalign)(p, align, size);
-}
-void *rax_calloc(size_t n, size_t size) {
-  return PREFIX(calloc)(n,size);
-}
-
-void *rax_malloc_aligned(size_t size) 
-{
-  const size_t PLL_BYTE_ALIGNMENT = 32;
-  return rax_memalign(PLL_BYTE_ALIGNMENT, size);
-  
-}
-
-#else // RAXML_USE_LLALLOC
-// if llalloc should not be used, forward the rax_* functions to the corresponding standard function
-
-void *rax_memalign(size_t align, size_t size) {
-#if defined (__APPLE__)
-    void * mem;
-    if (posix_memalign (&mem, align, size))
-      return (NULL);
-    else
-      return (mem);
-#else
-    return memalign(align, size);
-#endif
-    
-}
-
-void *rax_malloc( size_t size ) {
-  return malloc(size);
-}
-void *rax_realloc( void *p, size_t size ) {
-  return realloc(p, size);
-}
-
-
-void rax_free(void *p) {
-  free(p);
-}
-
-int rax_posix_memalign(void **p, size_t align, size_t size) {
-  return posix_memalign(p, align, size);
-}
-void *rax_calloc(size_t n, size_t size) {
-  return calloc(n,size);
-}
-
-void *rax_malloc_aligned(size_t size) 
-{
-  const size_t PLL_BYTE_ALIGNMENT = 32;
-  return rax_memalign(PLL_BYTE_ALIGNMENT, size);
-  
-}
-
-#endif
-
-
-
-#if 0
-//
-// two test cases to check if the default malloc plays along well with lockless malloc. Normally there shoudl not be a
-// problem as long as everyone handles 'foreign' sbrk calls gracefully (as lockless and glibc seem to do). 
-// WARNING: there is a slightly worrying comment in glibc malloc, which seems to assume that magically no foreign sbrks
-// happen between two consecutive sbrk calls while re-establishing page alignment in some obscure special case. IMHO, this
-// is clearly an error (race) in multithreaded programs, as there is no way how a foreign sbrk user can properly lock anything.
-// see: http://sourceware.org/git/?p=glibc.git;a=blob;f=malloc/malloc.c;h=0f1796c9134ffef289ec31fb1cd538f3a9490ae1;hb=HEAD#l2581
-//
-// If all threads consistently only use the rax_* wrappers this is not a problem, but as this is a library, we can not be sure 
-// that no other thread uses default malloc... note that lockless malloc only uses sbrk for the slab (=small block) area, while 
-// raxml heavy uses malloc/free only on much larger blocks...
-// If anything ever goes wrong while using mixed glibc/lockless malloc, this should be investigated.
-//
-// TODO: the potential race seems to be related to handling the case where a 'foreign sbrk' adjusted the break to a non page-boundary.
-// check if lockless malloc actually ever adjusts to non page-boundaries.
-
-
-void check_block( void *p, size_t size ) {
-    size_t i;
-    char *cp = (char*)p;
-    
-    for( i = 0; i < size; ++i ) {
-        
-        if( cp[i] != (char)i ) {
-            printf( "MEEEEEEEEEEEEEEEEEEEEP\n" );
-            abort();
-        }
-    }
-    
-}
-
-
-void fill_block( void *p, size_t size ) {
-    size_t i;
-    char *cp = (char*)p;
-    
-    for( i = 0; i < size; ++i ) {
-        cp[i] = (char)i;
-    }
-}
-
-
-void malloc_stress() {
-    const int n_slots = 100000;
-    
-    void *blocks1[n_slots];
-    size_t sizes1[n_slots];
-    void *blocks2[n_slots];
-    size_t sizes2[n_slots];
-    
-    memset( blocks1, 0, sizeof( void * ) * n_slots ); 
-    memset( blocks2, 0, sizeof( void * ) * n_slots ); 
-    
-    memset( sizes1, 0, sizeof( size_t ) * n_slots );
-    memset( sizes2, 0, sizeof( size_t ) * n_slots );
-    
-    
-    
-    while( 1 ) {
-        int r = rand() % n_slots;
-        
-        void *bs;
-        
-        
-        int size;
-        if( rand() % 2 == 0 ) {
-            size = rand() % (32 * 16); // hit slab
-        } else {
-            size = (rand() % 128) * 128; // not slab
-        }
-            
-            
-        if( 1 || rand() % 2 == 0 ) {
-            if( blocks1[r] == 0 ) {
-                blocks1[r] = malloc( size );
-                sizes1[r] = size;
-                fill_block( blocks1[r], sizes1[r] );
-            } else {
-                check_block( blocks1[r], sizes1[r] );
-                free( blocks1[r] );
-                blocks1[r] = 0;
-            }
-        } else {
-            if( blocks2[r] == 0 ) {
-                blocks2[r] = rax_malloc( size );
-                sizes2[r] = size;
-                fill_block( blocks2[r], sizes2[r] );
-            } else {
-                check_block( blocks2[r], sizes2[r] );
-                
-                rax_free( blocks2[r] );
-                blocks2[r] = 0;
-            }
-        }
-            
-       
-        
-    }
-    
-}
-
-
-void malloc_stress2() {
-    const size_t n_slots = 1000;
-    
-    void *blocks[n_slots];
-    size_t i;
-    for( i = 0; i < n_slots; ++i ) {
-        blocks[i] = malloc( (rand() % 32) * 1024 ); 
-        
-    }
-    sbrk( 10 );
-    for( i = 0; i < n_slots; ++i ) {
-        free(blocks[i]);
-        
-    }
-    
-    
-    
-}
-#endif
-
diff --git a/pllrepo/src/mem_alloc.h b/pllrepo/src/mem_alloc.h
deleted file mode 100644
index 29553c7..0000000
--- a/pllrepo/src/mem_alloc.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#ifndef __mem_alloc_h
-#define __mem_alloc_h
-
-#if defined WIN32 || defined _WIN32 || defined __WIN32__
-#include <stdlib.h>
-//#include <intrin.h>
-#include <malloc.h>
-//#include <windows.h>
-#endif
-
-#include <stddef.h>
-#include <stdlib.h>
-#ifdef __linux__
-#include <malloc.h>
-#endif
-#include "pll.h"
-#include <string.h>
-
-//#define rax_memalign memalign
-//#define rax_malloc malloc
-//#define rax_calloc calloc
-//#define rax_realloc realloc
-
-
-#if defined WIN32 || defined _WIN32 || defined __WIN32__
-#define rax_posix_memalign(ptr,alignment,size) *(ptr) = _aligned_malloc((size),(alignment))
-#define rax_malloc(size) _aligned_malloc((size), PLL_BYTE_ALIGNMENT)
-void *rax_calloc(size_t count, size_t size);
-#define rax_free _aligned_free
-#else
-#define rax_posix_memalign posix_memalign
-#define rax_malloc malloc
-#define rax_calloc calloc
-#define rax_free free
-#endif
-
-//#define rax_malloc_aligned(x) memalign(PLL_BYTE_ALIGNMENT,x)
-
-//void *rax_memalign(size_t align, size_t size);
-//void *rax_malloc(size_t size);
-//void *rax_realloc(void *p, size_t size);
-//void rax_free(void *p);
-//int rax_posix_memalign(void **p, size_t align, size_t size);
-//void *rax_calloc(size_t n, size_t size);
-//
-//void *rax_malloc_aligned(size_t size);
-
-
-/* for strndup stuff */
-static __inline char *my_strndup(const char *s, size_t n) {
-	char *ret = (char *) rax_malloc(n+1);
-	strncpy(ret, s, n);
-	ret[n] = 0;
-	return ret;
-}
-
-#if 0
-// using the following contraption to trigger a compile-time error does not work on some gcc versions. It will trigger a confising linker error in the best case, so it is deativated.
-
-#if defined(RAXML_USE_LLALLOC) && !defined(MEM_ALLOC_NO_GUARDS)
-#define malloc(x) XXX_DONT_USE_MALLOC_WITHOUT_RAX_PREFIX_XXX
-#define free(x) XXX_DONT_USE_FREE_WITHOUT_RAX_PREFIX_XXX
-#define calloc(x,y) XXX_DONT_USE_CALLOC_WITHOUT_RAX_PREFIX_XXX
-#define realloc(x,y) XXX_DONT_USE_REALLOC_WITHOUT_RAX_PREFIX_XXX
-#define malloc_aligned(x) XXX_DONT_USE_MALLOC_ALIGNED_WITHOUT_RAX_PREFIX_XXX
-#define posix_memalign(x,y,z) XXX_DONT_USE_POSIX_MEMALIGN_ALIGNED_WITHOUT_RAX_PREFIX_XXX
-#endif
-#endif
-
-#endif
diff --git a/pllrepo/src/mic_native.h b/pllrepo/src/mic_native.h
deleted file mode 100644
index 38b24a3..0000000
--- a/pllrepo/src/mic_native.h
+++ /dev/null
@@ -1,56 +0,0 @@
-#ifndef MIC_NATIVE_H_
-#define MIC_NATIVE_H_
-
-void newviewGTRGAMMA_MIC(int tipCase,
-                  double *x1, double *x2, double *x3, double *extEV, double *tipVector,
-                  int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                  int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling);
-
-double evaluateGTRGAMMA_MIC(int *ex1, int *ex2, int *wptr,
-                 double *x1_start, double *x2_start,
-                 double *tipVector,
-                 unsigned char *tipX1, const int n, double *diagptable, const pllBoolean fastScaling);
-
-void sumGTRGAMMA_MIC(int tipCase, double *sumtable, double *x1_start, double *x2_start, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n);
-
-void coreGTRGAMMA_MIC(const int upper, double *sumtable,
-    volatile double *ext_dlnLdlz,  volatile double *ext_d2lnLdlz2, double *EIGN, double *gammaRates, double lz, int *wrptr);
-
-// protein data
-void newviewGTRGAMMAPROT_MIC(int tipCase,
-                  double *x1, double *x2, double *x3, double *extEV, double *tipVector,
-                  int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                  int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling);
-
-double evaluateGTRGAMMAPROT_MIC(int *ex1, int *ex2, int *wptr,
-                 double *x1_start, double *x2_start,
-                 double *tipVector,
-                 unsigned char *tipX1, const int n, double *diagptable, const pllBoolean fastScaling);
-
-void sumGTRGAMMAPROT_MIC(int tipCase, double *sumtable, double *x1_start, double *x2_start, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n);
-
-void coreGTRGAMMAPROT_MIC(const int upper, double *sumtable,
-    volatile double *ext_dlnLdlz,  volatile double *ext_d2lnLdlz2, double *EIGN, double *gammaRates, double lz, int *wrptr);
-
-// protein data - LG4
-
-void newviewGTRGAMMAPROT_LG4_MIC(int tipCase,
-                  double *x1, double *x2, double *x3, double *extEV[4], double *tipVector[4],
-                  unsigned char *tipX1, unsigned char *tipX2,
-                  int n, double *left, double *right, int *wgt, int *scalerIncrement);
-
-double evaluateGTRGAMMAPROT_LG4_MIC(int *wptr,
-                 double *x1_start, double *x2_start,
-                 double *tipVector[4],
-                 unsigned char *tipX1, const int n, double *diagptable);
-
-void sumGTRGAMMAPROT_LG4_MIC(int tipCase, double *sumtable, double *x1_start, double *x2_start, double *tipVector[4],
-    unsigned char *tipX1, unsigned char *tipX2, int n);
-
-void coreGTRGAMMAPROT_LG4_MIC(const int upper, double *sumtable,
-    volatile double *ext_dlnLdlz,  volatile double *ext_d2lnLdlz2, double *EIGN[4], double *gammaRates, double lz, int *wrptr);
-
-
-#endif /* MIC_NATIVE_H_ */
diff --git a/pllrepo/src/mic_native_aa.c b/pllrepo/src/mic_native_aa.c
deleted file mode 100644
index 2cfd2b1..0000000
--- a/pllrepo/src/mic_native_aa.c
+++ /dev/null
@@ -1,1254 +0,0 @@
-#include <omp.h>
-#include <immintrin.h>
-#include <string.h>
-#include <math.h>
-
-#include "pll.h"
-#include "mic_native.h"
-
-static const int states = 20;
-static const int statesSquare = 20 * 20;
-static const int span = 20 * 4;
-static const int maxStateValue = 23;
-
-__inline void mic_fma4x80(const double* inv, double* outv, double* mulv)
-{
-    __mmask8 k1 = _mm512_int2mask(0x0F);
-    __mmask8 k2 = _mm512_int2mask(0xF0);
-    for(int l = 0; l < 80; l += 40)
-    {
-        __m512d t = _mm512_setzero_pd();
-
-        t = _mm512_extload_pd(&inv[l], _MM_UPCONV_PD_NONE, _MM_BROADCAST_1X8, _MM_HINT_NONE);
-        __m512d m = _mm512_load_pd(&mulv[l]);
-        __m512d acc = _mm512_load_pd(&outv[l]);
-        __m512d r = _mm512_fmadd_pd(t, m, acc);
-        _mm512_store_pd(&outv[l], r);
-
-        m = _mm512_load_pd(&mulv[l + 8]);
-        acc = _mm512_load_pd(&outv[l + 8]);
-        r = _mm512_fmadd_pd(t, m, acc);
-        _mm512_store_pd(&outv[l + 8], r);
-
-        t = _mm512_mask_extload_pd(t, k1, &inv[l], _MM_UPCONV_PD_NONE, _MM_BROADCAST_1X8, _MM_HINT_NONE);
-        t = _mm512_mask_extload_pd(t, k2, &inv[l+20], _MM_UPCONV_PD_NONE, _MM_BROADCAST_1X8, _MM_HINT_NONE);
-
-        m = _mm512_load_pd(&mulv[l + 16]);
-        acc = _mm512_load_pd(&outv[l + 16]);
-        r = _mm512_fmadd_pd(t, m, acc);
-        _mm512_store_pd(&outv[l + 16], r);
-
-        t = _mm512_extload_pd(&inv[l+20], _MM_UPCONV_PD_NONE, _MM_BROADCAST_1X8, _MM_HINT_NONE);
-        m = _mm512_load_pd(&mulv[l + 24]);
-        acc = _mm512_load_pd(&outv[l + 24]);
-        r = _mm512_fmadd_pd(t, m, acc);
-        _mm512_store_pd(&outv[l + 24], r);
-
-        m = _mm512_load_pd(&mulv[l + 32]);
-        acc = _mm512_load_pd(&outv[l + 32]);
-        r = _mm512_fmadd_pd(t, m, acc);
-        _mm512_store_pd(&outv[l + 32], r);
-    }
-}
-
-
-void newviewGTRGAMMAPROT_MIC(int tipCase,
-                  double *x1, double *x2, double *x3, double *extEV, double *tipVector,
-                  int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                  int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling)
-{
-  __m512d minlikelihood_MIC = _mm512_set1_pd(PLL_MINLIKELIHOOD);
-  __m512d twotothe256_MIC = _mm512_set1_pd(PLL_TWOTOTHE256);
-  __m512i absMask_MIC = _mm512_set1_epi64(0x7fffffffffffffffULL);
-
-  int addScale = 0;
-
-  double aEV[1600] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-  #pragma ivdep
-  for (int l = 0; l < 1600; ++l)
-  {
-      aEV[l] = extEV[(l / span) * states + (l % states)];
-  }
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:
-      {
-        /* multiply all possible tip state vectors with the respective P-matrices
-        */
-
-        double umpX1[1840] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-        double umpX2[1840] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-        for(int i = 0; i < maxStateValue; ++i)
-        {
-          for(int k = 0; k < span; ++k)
-          {
-              umpX1[i * span + k] = 0.0;
-              umpX2[i * span + k] = 0.0;
-
-              #pragma ivdep
-              for(int l = 0; l < states; ++l)
-              {
-                  umpX1[i * span + k] +=  tipVector[i * states + l] *  left[k * states + l];
-                  umpX2[i * span + k] +=  tipVector[i * states + l] * right[k * states + l];
-              }
-          }
-        }
-
-        for (int i = 0; i < n; i++)
-        {
-            const double *uX1 = &umpX1[span * tipX1[i]];
-            const double *uX2 = &umpX2[span * tipX2[i]];
-
-            double uX[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-            double* v3 = &x3[i * span];
-
-            #pragma ivdep
-            #pragma vector aligned
-            for(int l = 0; l < span; ++l)
-            {
-                uX[l] = uX1[l] * uX2[l];
-                v3[l] = 0.;
-            }
-
-            for(int k = 0; k < states; ++k)
-            {
-                for (int j = 0; j < span; j += 8)
-                {
-                    _mm_prefetch((const char *)&aEV[span*(k+1) + j], _MM_HINT_T0);
-                }
-
-                mic_fma4x80(&uX[k], v3, &aEV[k * span]);
-            }
-
-            // init scaling counter for the site
-            if (!fastScaling)
-                ex3[i] = 0;
-
-        } // sites loop
-      }
-      break;
-    case PLL_TIP_INNER:
-      {
-        /* we do analogous pre-computations as above, with the only difference that we now do them
-        only for one tip vector */
-
-          double umpX1[1840] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-        /* precompute P and left tip vector product */
-
-        for(int i = 0; i < maxStateValue; ++i)
-        {
-          for(int k = 0; k < span; ++k)
-          {
-              umpX1[i * span + k] = 0.0;
-
-              #pragma ivdep
-              for(int l = 0; l < states; ++l)
-              {
-                  umpX1[i * span + k] +=  tipVector[i * states + l] *  left[k * states + l];
-              }
-          }
-        }
-
-        // re-arrange right matrix for better memory layout
-        double aRight[4 * statesSquare] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-        for(int j = 0; j < 4; j++)
-        {
-            for(int k = 0; k < states; k++)
-            {
-                for(int l = 0; l < states; l++)
-                {
-                    aRight[k * span + j * states + l] = right[j * statesSquare +  l * states + k];
-                }
-            }
-        }
-
-        for (int i = 0; i < n; i++)
-        {
-            #pragma unroll(10)
-            for (int j = 0; j < span; j += 8)
-            {
-                _mm_prefetch((const char *)&x2[span*(i+1) + j], _MM_HINT_T1);
-            }
-
-            /* access pre-computed value based on the raw sequence data tipX1 that is used as an index */
-            double* uX1 = &umpX1[span * tipX1[i]];
-            double uX2[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-            double uX[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-            double* v3 = &(x3[span * i]);
-
-            const double* v2 = &(x2[span * i]);
-
-            #pragma vector aligned
-            for(int l = 0; l < span; ++l)
-            {
-                uX2[l] = 0.;
-            }
-
-            for(int k = 0; k < states; ++k)
-            {
-                for (int j = 0; j < span; j += 8)
-                {
-                    _mm_prefetch((const char *)&aRight[span*(k+1) + j], _MM_HINT_T0);
-                }
-
-                mic_fma4x80(&v2[k], uX2, &aRight[k * span]);
-            }
-
-            #pragma ivdep
-            #pragma vector aligned
-            for(int l = 0; l < span; ++l)
-            {
-                uX[l] = uX1[l] * uX2[l];
-                v3[l] = 0.;
-            }
-
-            for(int k = 0; k < states; ++k)
-            {
-                for (int j = 0; j < span; j += 8)
-                {
-                    _mm_prefetch((const char *)&aEV[span*(k+1) + j], _MM_HINT_T0);
-                }
-
-                mic_fma4x80(&uX[k], v3, &aEV[k * span]);
-            }
-
-            __m512d t1 = _mm512_load_pd(&v3[0]);
-            t1 = _mm512_castsi512_pd(_mm512_and_epi64(_mm512_castpd_si512(t1), absMask_MIC));
-            double vmax = _mm512_reduce_gmax_pd(t1);
-            double mx[16] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-            for (int l = 8; l < span; l += 8)
-            {
-                __m512d t = _mm512_load_pd(&v3[l]);
-                t = _mm512_castsi512_pd(_mm512_and_epi64(_mm512_castpd_si512(t), absMask_MIC));
-                double vmax2 = _mm512_reduce_gmax_pd(t);
-                vmax = PLL_MAX(vmax, vmax2);
-            }
-
-            if (vmax < PLL_MINLIKELIHOOD)
-            {
-                #pragma vector aligned nontemporal
-                for(int l = 0; l < span; l++)
-                  v3[l] *= PLL_TWOTOTHE256;
-
-                if(!fastScaling)
-                  ex3[i] += 1;
-                else
-                  addScale += wgt[i];
-            }
-        } // site loop
-
-      }
-      break;
-    case PLL_INNER_INNER:
-    {
-      /* same as above, without pre-computations */
-
-
-        // re-arrange right matrix for better memory layout
-        double aLeft[4 * statesSquare] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-        double aRight[4 * statesSquare] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-        for(int j = 0; j < 4; j++)
-        {
-            for(int k = 0; k < states; k++)
-            {
-                for(int l = 0; l < states; l++)
-                {
-                    aLeft[k * span + j * states + l] = left[j * statesSquare + l * states + k];
-                    aRight[k * span + j * states + l] = right[j * statesSquare + l * states + k];
-                }
-            }
-        }
-
-        for (int i = 0; i < n; i++)
-        {
-
-            #pragma unroll(10)
-            for (int j = 0; j < span; j += 8)
-            {
-                _mm_prefetch((const char *)&x1[span*(i+1) + j], _MM_HINT_T1);
-                _mm_prefetch((const char *)&x2[span*(i+1) + j], _MM_HINT_T1);
-            }
-
-
-            double uX1[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-            double uX2[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-            double uX[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-            double* v3 = &(x3[span * i]);
-
-            const double* v1 = &(x1[span * i]);
-            const double* v2 = &(x2[span * i]);
-
-            #pragma vector aligned
-            for(int l = 0; l < span; ++l)
-            {
-                uX1[l] = 0.;
-                uX2[l] = 0.;
-            }
-
-            for(int k = 0; k < states; ++k)
-            {
-                for (int j = 0; j < span; j += 8)
-                {
-                    _mm_prefetch((const char *)&aRight[span*(k+1) + j], _MM_HINT_T0);
-                    _mm_prefetch((const char *)&aLeft[span*(k+1) + j], _MM_HINT_T0);
-                }
-
-                mic_fma4x80(&v1[k], uX1, &aLeft[k * span]);
-                mic_fma4x80(&v2[k], uX2, &aRight[k * span]);
-            }
-
-            #pragma ivdep
-            #pragma vector aligned
-            for(int l = 0; l < span; ++l)
-            {
-                uX[l] = uX1[l] * uX2[l];
-                v3[l] = 0.;
-            }
-
-            for(int k = 0; k < states; ++k)
-            {
-                for (int j = 0; j < span; j += 8)
-                {
-                    _mm_prefetch((const char *)&aEV[span*(k+1) + j], _MM_HINT_T0);
-                }
-
-                mic_fma4x80(&uX[k], v3, &aEV[k * span]);
-            }
-
-            __m512d t1 = _mm512_load_pd(&v3[0]);
-            t1 = _mm512_castsi512_pd(_mm512_and_epi64(_mm512_castpd_si512(t1), absMask_MIC));
-            double vmax = _mm512_reduce_gmax_pd(t1);
-            double mx[16] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-            for (int l = 8; l < span; l += 8)
-            {
-                __m512d t = _mm512_load_pd(&v3[l]);
-                t = _mm512_castsi512_pd(_mm512_and_epi64(_mm512_castpd_si512(t), absMask_MIC));
-                double vmax2 = _mm512_reduce_gmax_pd(t);
-                vmax = PLL_MAX(vmax, vmax2);
-            }
-
-            if (vmax < PLL_MINLIKELIHOOD)
-            {
-                #pragma vector aligned nontemporal
-                for(int l = 0; l < span; l++)
-                  v3[l] *= PLL_TWOTOTHE256;
-
-                if(!fastScaling)
-                  ex3[i] += 1;
-                else
-                  addScale += wgt[i];
-            }
-        }
-    } break;
-    default:
-//      assert(0);
-      break;
-  }
-
-  *scalerIncrement = addScale;
-
-}
-
-
-
-double evaluateGTRGAMMAPROT_MIC(int *ex1, int *ex2, int *wgt, double *x1_start, double *x2_start, double *tipVector,
-                 unsigned char *tipX1, const int n, double *diagptable, const pllBoolean fastScaling)
-{
-    double sum = 0.0;
-
-    /* the left node is a tip */
-    if(tipX1)
-    {
-        double aTipVec[1840] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-        for(int k = 0; k < maxStateValue; k++)
-        {
-            for(int l = 0; l < states; l++)
-            {
-                aTipVec[k*span + l] = aTipVec[k*span + states + l] = aTipVec[k*span + 2*states + l] = aTipVec[k*span + 3*states + l] = tipVector[k*states + l];
-            }
-        }
-
-        /* loop over the sites of this partition */
-        for (int i = 0; i < n; i++)
-        {
-          /* access pre-computed tip vector values via a lookup table */
-          const double *x1 = &(aTipVec[span * tipX1[i]]);
-          /* access the other(inner) node at the other end of the branch */
-          const double *x2 = &(x2_start[span * i]);
-
-          double term = 0.;
-
-          #pragma ivdep
-          #pragma vector aligned
-          for(int j = 0; j < span; j++) {
-              term += x1[j] * x2[j] * diagptable[j];
-          }
-
-          if(!fastScaling)
-              term = log(0.25 * fabs(term)) + (ex2[i] * log(PLL_MINLIKELIHOOD));
-          else
-              term = log(0.25 * fabs(term));
-
-          sum += wgt[i] * term;
-        }
-    }
-    else
-    {
-        for (int i = 0; i < n; i++)
-        {
-            _mm_prefetch((const char *) &x1_start[span*(i+8)], _MM_HINT_T1);
-            _mm_prefetch((const char *) &x1_start[span*(i+8) + 8], _MM_HINT_T1);
-            _mm_prefetch((const char *) &x2_start[span*(i+8)], _MM_HINT_T1);
-            _mm_prefetch((const char *) &x2_start[span*(i+8) + 8], _MM_HINT_T1);
-
-            _mm_prefetch((const char *) &x1_start[span*(i+1)], _MM_HINT_T0);
-            _mm_prefetch((const char *) &x1_start[span*(i+1) + 8], _MM_HINT_T0);
-            _mm_prefetch((const char *) &x2_start[span*(i+1)], _MM_HINT_T0);
-            _mm_prefetch((const char *) &x2_start[span*(i+1) + 8], _MM_HINT_T0);
-
-          const double *x1 = &(x1_start[span * i]);
-          const double *x2 = &(x2_start[span * i]);
-
-          double term = 0.;
-
-          #pragma ivdep
-          #pragma vector aligned
-          for(int j = 0; j < span; j++)
-              term += x1[j] * x2[j] * diagptable[j];
-
-          if(!fastScaling)
-              term = log(0.25 * fabs(term)) + ((ex1[i] + ex2[i]) * log(PLL_MINLIKELIHOOD));
-          else
-              term = log(0.25 * fabs(term));
-
-          sum += wgt[i] * term;
-        }
-    }
-
-    return sum;
-}
-
-void sumGTRGAMMAPROT_MIC(int tipCase, double *sumtable, double *x1_start, double *x2_start, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n)
-{
-    double aTipVec[1840] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-    for(int k = 0; k < maxStateValue; k++)
-    {
-        for(int l = 0; l < states; l++)
-        {
-            aTipVec[k*span + l] = aTipVec[k*span + states + l] = aTipVec[k*span + 2*states + l] = aTipVec[k*span + 3*states + l] = tipVector[k*states + l];
-        }
-    }
-
-    switch(tipCase)
-    {
-      case PLL_TIP_TIP:
-      {
-        for(int i = 0; i < n; i++)
-        {
-            const double *left  = &(aTipVec[span * tipX1[i]]);
-            const double *right = &(aTipVec[span * tipX2[i]]);
-
-            #pragma ivdep
-            #pragma vector aligned nontemporal
-            for(int l = 0; l < span; l++)
-            {
-                sumtable[i * span + l] = left[l] * right[l];
-            }
-        }
-      } break;
-      case PLL_TIP_INNER:
-      {
-        for(int i = 0; i < n; i++)
-        {
-          _mm_prefetch((const char *) &x2_start[span*(i+16)], _MM_HINT_T1);
-          _mm_prefetch((const char *) &x2_start[span*(i+16) + 8], _MM_HINT_T1);
-
-          _mm_prefetch((const char *) &x2_start[span*(i+2)], _MM_HINT_T0);
-          _mm_prefetch((const char *) &x2_start[span*(i+2) + 8], _MM_HINT_T0);
-
-          const double *left = &(aTipVec[span * tipX1[i]]);
-          const double *right = &(x2_start[span * i]);
-
-          #pragma ivdep
-          #pragma vector aligned nontemporal
-          for(int l = 0; l < span; l++)
-          {
-              sumtable[i * span + l] = left[l] * right[l];
-          }
-        }
-      } break;
-      case PLL_INNER_INNER:
-      {
-        for(int i = 0; i < n; i++)
-        {
-            _mm_prefetch((const char *) &x1_start[span*(i+16)], _MM_HINT_T1);
-            _mm_prefetch((const char *) &x1_start[span*(i+16) + 8], _MM_HINT_T1);
-            _mm_prefetch((const char *) &x2_start[span*(i+16)], _MM_HINT_T1);
-            _mm_prefetch((const char *) &x2_start[span*(i+16) + 8], _MM_HINT_T1);
-
-            _mm_prefetch((const char *) &x1_start[span*(i+2)], _MM_HINT_T0);
-            _mm_prefetch((const char *) &x1_start[span*(i+2) + 8], _MM_HINT_T0);
-            _mm_prefetch((const char *) &x2_start[span*(i+2)], _MM_HINT_T0);
-            _mm_prefetch((const char *) &x2_start[span*(i+2) + 8], _MM_HINT_T0);
-
-            const double *left  = &(x1_start[span * i]);
-            const double *right = &(x2_start[span * i]);
-
-            #pragma ivdep
-            #pragma vector aligned nontemporal
-            for(int l = 0; l < span; l++)
-            {
-                sumtable[i * span + l] = left[l] * right[l];
-            }
-        }
-      } break;
-  //    default:
-  //      assert(0);
-    }
-}
-
-void coreGTRGAMMAPROT_MIC(const int upper, double *sumtable,
-    volatile double *ext_dlnLdlz,  volatile double *ext_d2lnLdlz2, double *EIGN, double *gammaRates, double lz, int *wgt)
-{
-    double diagptable0[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-    double diagptable1[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-    double diagptable2[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-    double diagptable01[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-    double diagptable02[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-    /* pre-compute the derivatives of the P matrix for all discrete GAMMA rates */
-
-    for(int i = 0; i < 4; i++)
-    {
-        const double ki = gammaRates[i];
-        const double kisqr = ki * ki;
-
-        diagptable0[i*states] = 1.;
-        diagptable1[i*states] = 0.;
-        diagptable2[i*states] = 0.;
-
-        for(int l = 1; l < states; l++)
-        {
-          diagptable0[i * states + l]  = exp(EIGN[l] * ki * lz);
-          diagptable1[i * states + l] = EIGN[l] * ki;
-          diagptable2[i * states + l] = EIGN[l] * EIGN[l] * kisqr;
-        }
-    }
-
-    #pragma ivdep
-    for(int i = 0; i < span; i++)
-    {
-        diagptable01[i] = diagptable0[i] * diagptable1[i];
-        diagptable02[i] = diagptable0[i] * diagptable2[i];
-    }
-
-    /* loop over sites in this partition */
-
-    const int aligned_width = upper % PLL_VECTOR_WIDTH == 0 ? upper / PLL_VECTOR_WIDTH : upper / PLL_VECTOR_WIDTH + 1;
-
-    double dlnLdlz = 0.;
-    double d2lnLdlz2 = 0.;
-
-    __mmask16 k1 = _mm512_int2mask(0x000000FF);
-
-    for (int i = 0; i < aligned_width; i++)
-    {
-        _mm_prefetch((const char *) &sumtable[i * span * 8], _MM_HINT_T0);
-        _mm_prefetch((const char *) &sumtable[i * span * 8 + 8], _MM_HINT_T0);
-
-        /* access the array with pre-computed values */
-        const double *sum = &sumtable[i * span * PLL_VECTOR_WIDTH];
-
-        /* initial per-site likelihood and 1st and 2nd derivatives */
-
-        double invBuf[PLL_VECTOR_WIDTH] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-        double d1Buf[PLL_VECTOR_WIDTH] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-        double d2Buf[PLL_VECTOR_WIDTH] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-        __m512d invVec;
-        __m512d d1Vec;
-        __m512d d2Vec;
-        int mask = 0x01;
-
-        #pragma noprefetch sum
-        #pragma unroll(8)
-        for(int j = 0; j < PLL_VECTOR_WIDTH; j++)
-        {
-            _mm_prefetch((const char *) &sum[span*(j+8)], _MM_HINT_T1);
-            _mm_prefetch((const char *) &sum[span*(j+8) + 8], _MM_HINT_T1);
-
-            _mm_prefetch((const char *) &sum[span*(j+1)], _MM_HINT_T0);
-            _mm_prefetch((const char *) &sum[span*(j+1) + 8], _MM_HINT_T0);
-
-            __m512d inv_1 = _mm512_setzero_pd();
-            __m512d d1_1 = _mm512_setzero_pd();
-            __m512d d2_1 = _mm512_setzero_pd();
-
-            for (int offset = 0; offset < span; offset += 8)
-            {
-                __m512d d0_1 = _mm512_load_pd(&diagptable0[offset]);
-                __m512d d01_1 = _mm512_load_pd(&diagptable01[offset]);
-                __m512d d02_1 = _mm512_load_pd(&diagptable02[offset]);
-                __m512d s_1 = _mm512_load_pd(&sum[j*span + offset]);
-
-                inv_1 = _mm512_fmadd_pd(d0_1, s_1, inv_1);
-                d1_1 = _mm512_fmadd_pd(d01_1, s_1, d1_1);
-                d2_1 = _mm512_fmadd_pd(d02_1, s_1, d2_1);
-            }
-
-            __mmask8 k1 = _mm512_int2mask(mask);
-            mask <<= 1;
-
-            // reduce
-            inv_1 = _mm512_add_pd (inv_1, _mm512_swizzle_pd(inv_1, _MM_SWIZ_REG_CDAB));
-            inv_1 = _mm512_add_pd (inv_1, _mm512_swizzle_pd(inv_1, _MM_SWIZ_REG_BADC));
-            inv_1 = _mm512_add_pd (inv_1, _mm512_castsi512_pd(_mm512_permute4f128_epi32(_mm512_castpd_si512(inv_1), _MM_PERM_BADC)));
-            invVec = _mm512_mask_mov_pd(invVec, k1, inv_1);
-
-            d1_1 = _mm512_add_pd (d1_1, _mm512_swizzle_pd(d1_1, _MM_SWIZ_REG_CDAB));
-            d1_1 = _mm512_add_pd (d1_1, _mm512_swizzle_pd(d1_1, _MM_SWIZ_REG_BADC));
-            d1_1 = _mm512_add_pd (d1_1, _mm512_castsi512_pd(_mm512_permute4f128_epi32(_mm512_castpd_si512(d1_1), _MM_PERM_BADC)));
-            d1Vec = _mm512_mask_mov_pd(d1Vec, k1, d1_1);
-
-            d2_1 = _mm512_add_pd (d2_1, _mm512_swizzle_pd(d2_1, _MM_SWIZ_REG_CDAB));
-            d2_1 = _mm512_add_pd (d2_1, _mm512_swizzle_pd(d2_1, _MM_SWIZ_REG_BADC));
-            d2_1 = _mm512_add_pd (d2_1, _mm512_castsi512_pd(_mm512_permute4f128_epi32(_mm512_castpd_si512(d2_1), _MM_PERM_BADC)));
-            d2Vec = _mm512_mask_mov_pd(d2Vec, k1, d2_1);
-        }
-
-        _mm512_store_pd(&invBuf[0], invVec);
-        _mm512_store_pd(&d1Buf[0], d1Vec);
-        _mm512_store_pd(&d2Buf[0], d2Vec);
-
-        #pragma ivdep
-        #pragma vector aligned
-        for (int j = 0; j < PLL_VECTOR_WIDTH; ++j)
-        {
-            const double inv_Li = 1.0 / invBuf[j];
-
-            const double d1 = d1Buf[j] * inv_Li;
-            const double d2 = d2Buf[j] * inv_Li;
-
-            dlnLdlz += wgt[i * PLL_VECTOR_WIDTH + j] * d1;
-            d2lnLdlz2 += wgt[i * PLL_VECTOR_WIDTH + j] * (d2 - d1 * d1);
-        }
-    } // site loop
-
-    *ext_dlnLdlz   = dlnLdlz;
-    *ext_d2lnLdlz2 = d2lnLdlz2;
-}
-
-
-/****
- *       PROTEIN - LG4
- */
-
-void newviewGTRGAMMAPROT_LG4_MIC(int tipCase,
-                  double *x1, double *x2, double *x3, double *extEV[4], double *tipVector[4],
-                  unsigned char *tipX1, unsigned char *tipX2,
-                  int n, double *left, double *right, int *wgt, int *scalerIncrement)
-{
-
-  __m512d minlikelihood_MIC = _mm512_set1_pd(PLL_MINLIKELIHOOD);
-  __m512d twotothe256_MIC = _mm512_set1_pd(PLL_TWOTOTHE256);
-  __m512i absMask_MIC = _mm512_set1_epi64(0x7fffffffffffffffULL);
-
-  int addScale = 0;
-
-  double aEV[1600] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-  #pragma ivdep
-  for (int l = 0; l < 1600; ++l)
-  {
-      aEV[l] = extEV[(l % span) / states][(l / span) * states + (l % states)];
-  }
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:
-      {
-        /* multiply all possible tip state vectors with the respective P-matrices
-        */
-
-        double umpX1[1840] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-        double umpX2[1840] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-        for(int i = 0; i < 23; ++i)
-        {
-          for(int k = 0; k < span; ++k)
-          {
-              umpX1[i * span + k] = 0.0;
-              umpX2[i * span + k] = 0.0;
-              double *tipv = &(tipVector[k / states][i * states]);
-
-
-              #pragma ivdep
-              for(int l = 0; l < states; ++l)
-              {
-                  umpX1[i * span + k] +=  tipv[l] *  left[k * states + l];
-                  umpX2[i * span + k] +=  tipv[l] * right[k * states + l];
-              }
-          }
-        }
-
-        for (int i = 0; i < n; i++)
-        {
-            const double *uX1 = &umpX1[span * tipX1[i]];
-            const double *uX2 = &umpX2[span * tipX2[i]];
-
-            double uX[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-            double* v3 = &x3[i * span];
-
-            #pragma ivdep
-            #pragma vector aligned
-            for(int l = 0; l < span; ++l)
-            {
-                uX[l] = uX1[l] * uX2[l];
-                v3[l] = 0.;
-            }
-
-            for(int k = 0; k < states; ++k)
-            {
-                for (int j = 0; j < span; j += 8)
-                {
-                    _mm_prefetch((const char *)&aEV[span*(k+1) + j], _MM_HINT_T0);
-                }
-
-                mic_fma4x80(&uX[k], v3, &aEV[k * span]);
-            }
-
-        } // sites loop
-      }
-      break;
-    case PLL_TIP_INNER:
-      {
-        /* we do analogous pre-computations as above, with the only difference that we now do them
-        only for one tip vector */
-
-          double umpX1[1840] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-        /* precompute P and left tip vector product */
-
-        for(int i = 0; i < 23; ++i)
-        {
-          for(int k = 0; k < span; ++k)
-          {
-              umpX1[i * span + k] = 0.0;
-              double *tipv = &(tipVector[k / states][i * states]);
-
-              #pragma ivdep
-              for(int l = 0; l < states; ++l)
-              {
-                  umpX1[i * span + k] +=  tipv[l] *  left[k * states + l];
-              }
-          }
-        }
-
-        // re-arrange right matrix for better memory layout
-        double aRight[4 * statesSquare] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-        for(int j = 0; j < 4; j++)
-        {
-            for(int k = 0; k < states; k++)
-            {
-                for(int l = 0; l < states; l++)
-                {
-                    aRight[k * span + j * states + l] = right[j * statesSquare +  l * states + k];
-                }
-            }
-        }
-
-        for (int i = 0; i < n; i++)
-        {
-            #pragma unroll(10)
-            for (int j = 0; j < span; j += 8)
-            {
-                _mm_prefetch((const char *)&x2[span*(i+1) + j], _MM_HINT_T1);
-            }
-
-            /* access pre-computed value based on the raw sequence data tipX1 that is used as an index */
-            double* uX1 = &umpX1[span * tipX1[i]];
-            double uX2[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-            double uX[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-            double* v3 = &(x3[span * i]);
-
-            const double* v2 = &(x2[span * i]);
-
-            #pragma vector aligned
-            for(int l = 0; l < span; ++l)
-            {
-                uX2[l] = 0.;
-            }
-
-            for(int k = 0; k < states; ++k)
-            {
-				#pragma unroll(10)
-            	for (int j = 0; j < span; j += 8)
-                {
-                    _mm_prefetch((const char *)&aRight[span*(k+1) + j], _MM_HINT_T0);
-                }
-
-                mic_fma4x80(&v2[k], uX2, &aRight[k * span]);
-            }
-
-            #pragma ivdep
-            #pragma vector aligned
-            for(int l = 0; l < span; ++l)
-            {
-                uX[l] = uX1[l] * uX2[l];
-                v3[l] = 0.;
-            }
-
-            for(int k = 0; k < states; ++k)
-            {
-				#pragma unroll(10)
-            	for (int j = 0; j < span; j += 8)
-                {
-                    _mm_prefetch((const char *)&aEV[span*(k+1) + j], _MM_HINT_T0);
-                }
-
-                mic_fma4x80(&uX[k], v3, &aEV[k * span]);
-            }
-
-
-            __m512d t1 = _mm512_load_pd(&v3[0]);
-            t1 = _mm512_castsi512_pd(_mm512_and_epi64(_mm512_castpd_si512(t1), absMask_MIC));
-            double vmax = _mm512_reduce_gmax_pd(t1);
-            double mx[16] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-            for (int l = 8; l < span; l += 8)
-            {
-                __m512d t = _mm512_load_pd(&v3[l]);
-                t = _mm512_castsi512_pd(_mm512_and_epi64(_mm512_castpd_si512(t), absMask_MIC));
-                double vmax2 = _mm512_reduce_gmax_pd(t);
-                vmax = PLL_MAX(vmax, vmax2);
-            }
-
-            if (vmax < PLL_MINLIKELIHOOD)
-            {
-                #pragma vector aligned nontemporal
-                for(int l = 0; l < span; l++)
-                  v3[l] *= PLL_TWOTOTHE256;
-
-                addScale += wgt[i];
-            }
-        } // site loop
-
-      }
-      break;
-    case PLL_INNER_INNER:
-    {
-      /* same as above, without pre-computations */
-
-        // re-arrange right matrix for better memory layout
-        double aLeft[4 * statesSquare] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-        double aRight[4 * statesSquare] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-        for(int j = 0; j < 4; j++)
-        {
-            for(int k = 0; k < states; k++)
-            {
-                for(int l = 0; l < states; l++)
-                {
-                    aLeft[k * span + j * states + l] = left[j * statesSquare + l * states + k];
-                    aRight[k * span + j * states + l] = right[j * statesSquare + l * states + k];
-                }
-            }
-        }
-
-        for (int i = 0; i < n; i++)
-        {
-
-            #pragma unroll(10)
-            for (int j = 0; j < span; j += 8)
-            {
-                _mm_prefetch((const char *)&x1[span*(i+1) + j], _MM_HINT_T1);
-                _mm_prefetch((const char *)&x2[span*(i+1) + j], _MM_HINT_T1);
-            }
-
-
-            double uX1[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-            double uX2[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-            double uX[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-            double* v3 = &(x3[span * i]);
-
-            const double* v1 = &(x1[span * i]);
-            const double* v2 = &(x2[span * i]);
-
-            #pragma vector aligned
-            for(int l = 0; l < span; ++l)
-            {
-                uX1[l] = 0.;
-                uX2[l] = 0.;
-            }
-
-            for(int k = 0; k < states; ++k)
-            {
-				#pragma unroll(10)
-            	for (int j = 0; j < span; j += 8)
-                {
-                    _mm_prefetch((const char *)&aRight[span*(k+1) + j], _MM_HINT_T0);
-                    _mm_prefetch((const char *)&aLeft[span*(k+1) + j], _MM_HINT_T0);
-                }
-
-                mic_fma4x80(&v1[k], uX1, &aLeft[k * span]);
-                mic_fma4x80(&v2[k], uX2, &aRight[k * span]);
-            }
-
-            #pragma ivdep
-            #pragma vector aligned
-            for(int l = 0; l < span; ++l)
-            {
-                uX[l] = uX1[l] * uX2[l];
-                v3[l] = 0.;
-            }
-
-            for(int k = 0; k < states; ++k)
-            {
-				#pragma unroll(10)
-            	for (int j = 0; j < span; j += 8)
-                {
-                    _mm_prefetch((const char *)&aEV[span*(k+1) + j], _MM_HINT_T0);
-                }
-
-                mic_fma4x80(&uX[k], v3, &aEV[k * span]);
-            }
-
-            __m512d t1 = _mm512_load_pd(&v3[0]);
-            t1 = _mm512_castsi512_pd(_mm512_and_epi64(_mm512_castpd_si512(t1), absMask_MIC));
-            double vmax = _mm512_reduce_gmax_pd(t1);
-            double mx[16] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-            for (int l = 8; l < span; l += 8)
-            {
-                __m512d t = _mm512_load_pd(&v3[l]);
-                t = _mm512_castsi512_pd(_mm512_and_epi64(_mm512_castpd_si512(t), absMask_MIC));
-                double vmax2 = _mm512_reduce_gmax_pd(t);
-                vmax = PLL_MAX(vmax, vmax2);
-            }
-
-            if (vmax < PLL_MINLIKELIHOOD)
-            {
-                #pragma vector aligned nontemporal
-                for(int l = 0; l < span; l++)
-                  v3[l] *= PLL_TWOTOTHE256;
-
-                addScale += wgt[i];
-            }
-        }
-    } break;
-    default:
-//      assert(0);
-      break;
-  }
-
-  *scalerIncrement = addScale;
-
-}
-
-
-
-double evaluateGTRGAMMAPROT_LG4_MIC(int *wgt, double *x1_start, double *x2_start, double *tipVector[4],
-                 unsigned char *tipX1, const int n, double *diagptable)
-{
-    double sum = 0.0;
-
-    /* the left node is a tip */
-    if(tipX1)
-    {
-        double aTipVec[1840] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-        for(int k = 0; k < 23; k++)
-        {
-            for(int j = 0; j < 4; j++)
-            {
-				for(int l = 0; l < states; l++)
-				{
-					aTipVec[k*span + j*states + l] = tipVector[j][k*states + l];
-				}
-            }
-        }
-
-        /* loop over the sites of this partition */
-        for (int i = 0; i < n; i++)
-        {
-			/* access pre-computed tip vector values via a lookup table */
-			const double *x1 = &(aTipVec[span * tipX1[i]]);
-			/* access the other(inner) node at the other end of the branch */
-			const double *x2 = &(x2_start[span * i]);
-
-			#pragma unroll(10)
-			for (int k = 0; k < span; k += 8)
-			{
-				_mm_prefetch((const char *) &x2_start[span*(i+2) + k], _MM_HINT_T1);
-				_mm_prefetch((const char *) &x2_start[span*(i+1) + k], _MM_HINT_T0);
-			}
-
-			double term = 0.;
-
-			#pragma ivdep
-			#pragma vector aligned
-			#pragma noprefetch x2
-			for(int j = 0; j < span; j++) {
-			  term += x1[j] * x2[j] * diagptable[j];
-			}
-
-			term = log(0.25 * fabs(term));
-
-			sum += wgt[i] * term;
-        }
-    }
-    else
-    {
-        for (int i = 0; i < n; i++)
-        {
-			#pragma unroll(10)
-			for (int k = 0; k < span; k += 8)
-			{
-				_mm_prefetch((const char *) &x1_start[span*(i+2) + k], _MM_HINT_T1);
-				_mm_prefetch((const char *) &x1_start[span*(i+1) + k], _MM_HINT_T0);
-
-				_mm_prefetch((const char *) &x2_start[span*(i+2) + k], _MM_HINT_T1);
-				_mm_prefetch((const char *) &x2_start[span*(i+1) + k], _MM_HINT_T0);
-			}
-
-			const double *x1 = &(x1_start[span * i]);
-			const double *x2 = &(x2_start[span * i]);
-
-			double term = 0.;
-
-			#pragma ivdep
-			#pragma vector aligned
-			#pragma noprefetch x1 x2
-			for(int j = 0; j < span; j++)
-			  term += x1[j] * x2[j] * diagptable[j];
-
-			term = log(0.25 * fabs(term));
-
-			sum += wgt[i] * term;
-        }
-    }
-
-    return sum;
-}
-
-void sumGTRGAMMAPROT_LG4_MIC(int tipCase, double *sumtable, double *x1_start, double *x2_start, double *tipVector[4],
-    unsigned char *tipX1, unsigned char *tipX2, int n)
-{
-    double aTipVec[1840] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-    for(int k = 0; k < maxStateValue; k++)
-    {
-        for(int j = 0; j < 4; j++)
-        {
-			for(int l = 0; l < states; l++)
-			{
-				aTipVec[k*span + j*states + l] = tipVector[j][k*states + l];
-			}
-        }
-    }
-
-    switch(tipCase)
-    {
-      case PLL_TIP_TIP:
-      {
-        for(int i = 0; i < n; i++)
-        {
-            const double *left  = &(aTipVec[span * tipX1[i]]);
-            const double *right = &(aTipVec[span * tipX2[i]]);
-
-            #pragma ivdep
-            #pragma vector aligned nontemporal
-            for(int l = 0; l < span; l++)
-            {
-                sumtable[i * span + l] = left[l] * right[l];
-            }
-        }
-      } break;
-      case PLL_TIP_INNER:
-      {
-        for(int i = 0; i < n; i++)
-        {
-			#pragma unroll(10)
-			for (int k = 0; k < span; k += 8)
-			{
-				_mm_prefetch((const char *) &x2_start[span*(i+2) + k], _MM_HINT_T1);
-				_mm_prefetch((const char *) &x2_start[span*(i+1) + k], _MM_HINT_T0);
-			}
-
-          const double *left = &(aTipVec[span * tipX1[i]]);
-          const double *right = &(x2_start[span * i]);
-
-          #pragma ivdep
-          #pragma vector aligned nontemporal
-		  #pragma noprefetch right
-          for(int l = 0; l < span; l++)
-          {
-              sumtable[i * span + l] = left[l] * right[l];
-          }
-        }
-      } break;
-      case PLL_INNER_INNER:
-      {
-        for(int i = 0; i < n; i++)
-        {
-			#pragma unroll(10)
-			for (int k = 0; k < span; k += 8)
-			{
-				_mm_prefetch((const char *) &x1_start[span*(i+2) + k], _MM_HINT_T1);
-				_mm_prefetch((const char *) &x1_start[span*(i+1) + k], _MM_HINT_T0);
-
-				_mm_prefetch((const char *) &x2_start[span*(i+2) + k], _MM_HINT_T1);
-				_mm_prefetch((const char *) &x2_start[span*(i+1) + k], _MM_HINT_T0);
-			}
-
-            const double *left  = &(x1_start[span * i]);
-            const double *right = &(x2_start[span * i]);
-
-            #pragma ivdep
-            #pragma vector aligned nontemporal
-			#pragma noprefetch left right
-            for(int l = 0; l < span; l++)
-            {
-                sumtable[i * span + l] = left[l] * right[l];
-            }
-        }
-      } break;
-  //    default:
-  //      assert(0);
-    }
-}
-
-void coreGTRGAMMAPROT_LG4_MIC(const int upper, double *sumtable,
-    volatile double *ext_dlnLdlz,  volatile double *ext_d2lnLdlz2, double *EIGN[4], double *gammaRates, double lz, int *wgt)
-{
-    double diagptable0[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-    double diagptable1[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-    double diagptable2[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-    double diagptable01[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-    double diagptable02[span] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-    /* pre-compute the derivatives of the P matrix for all discrete GAMMA rates */
-
-    for(int i = 0; i < 4; i++)
-    {
-        const double ki = gammaRates[i];
-        const double kisqr = ki * ki;
-
-        diagptable0[i*states] = 1.;
-        diagptable1[i*states] = 0.;
-        diagptable2[i*states] = 0.;
-
-        for(int l = 1; l < states; l++)
-        {
-          diagptable0[i * states + l]  = exp(EIGN[i][l] * ki * lz);
-          diagptable1[i * states + l] = EIGN[i][l] * ki;
-          diagptable2[i * states + l] = EIGN[i][l] * EIGN[i][l] * kisqr;
-        }
-    }
-
-    #pragma ivdep
-    for(int i = 0; i < span; i++)
-    {
-        diagptable01[i] = diagptable0[i] * diagptable1[i];
-        diagptable02[i] = diagptable0[i] * diagptable2[i];
-    }
-
-    /* loop over sites in this partition */
-
-    const int aligned_width = upper % 8 == 0 ? upper / 8 : upper / 8 + 1;
-
-    double dlnLdlz = 0.;
-    double d2lnLdlz2 = 0.;
-
-    __mmask16 k1 = _mm512_int2mask(0x000000FF);
-
-    for (int i = 0; i < aligned_width; i++)
-    {
-        /* access the array with pre-computed values */
-        const double *sum = &sumtable[i * span * 8];
-
-        /* initial per-site likelihood and 1st and 2nd derivatives */
-
-        double invBuf[8] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-        double d1Buf[8] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-        double d2Buf[8] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-        __m512d invVec;
-        __m512d d1Vec;
-        __m512d d2Vec;
-        int mask = 0x01;
-
-        #pragma noprefetch sum
-        #pragma unroll(8)
-        for(int j = 0; j < 8; j++)
-        {
-
-        	#pragma unroll(10)
-			for (int k = 0; k < span; k += 8)
-			{
-				_mm_prefetch((const char *) &sum[span*(j+2) + k], _MM_HINT_T1);
-				_mm_prefetch((const char *) &sum[span*(j+1) + k], _MM_HINT_T0);
-			}
-
-            __m512d inv_1 = _mm512_setzero_pd();
-            __m512d d1_1 = _mm512_setzero_pd();
-            __m512d d2_1 = _mm512_setzero_pd();
-
-            for (int offset = 0; offset < span; offset += 8)
-            {
-                __m512d d0_1 = _mm512_load_pd(&diagptable0[offset]);
-                __m512d d01_1 = _mm512_load_pd(&diagptable01[offset]);
-                __m512d d02_1 = _mm512_load_pd(&diagptable02[offset]);
-                __m512d s_1 = _mm512_load_pd(&sum[j*span + offset]);
-
-                inv_1 = _mm512_fmadd_pd(d0_1, s_1, inv_1);
-                d1_1 = _mm512_fmadd_pd(d01_1, s_1, d1_1);
-                d2_1 = _mm512_fmadd_pd(d02_1, s_1, d2_1);
-            }
-
-            __mmask8 k1 = _mm512_int2mask(mask);
-            mask <<= 1;
-
-            // reduce
-            inv_1 = _mm512_add_pd (inv_1, _mm512_swizzle_pd(inv_1, _MM_SWIZ_REG_CDAB));
-            inv_1 = _mm512_add_pd (inv_1, _mm512_swizzle_pd(inv_1, _MM_SWIZ_REG_BADC));
-            inv_1 = _mm512_add_pd (inv_1, _mm512_castsi512_pd(_mm512_permute4f128_epi32(_mm512_castpd_si512(inv_1), _MM_PERM_BADC)));
-            invVec = _mm512_mask_mov_pd(invVec, k1, inv_1);
-
-            d1_1 = _mm512_add_pd (d1_1, _mm512_swizzle_pd(d1_1, _MM_SWIZ_REG_CDAB));
-            d1_1 = _mm512_add_pd (d1_1, _mm512_swizzle_pd(d1_1, _MM_SWIZ_REG_BADC));
-            d1_1 = _mm512_add_pd (d1_1, _mm512_castsi512_pd(_mm512_permute4f128_epi32(_mm512_castpd_si512(d1_1), _MM_PERM_BADC)));
-            d1Vec = _mm512_mask_mov_pd(d1Vec, k1, d1_1);
-
-            d2_1 = _mm512_add_pd (d2_1, _mm512_swizzle_pd(d2_1, _MM_SWIZ_REG_CDAB));
-            d2_1 = _mm512_add_pd (d2_1, _mm512_swizzle_pd(d2_1, _MM_SWIZ_REG_BADC));
-            d2_1 = _mm512_add_pd (d2_1, _mm512_castsi512_pd(_mm512_permute4f128_epi32(_mm512_castpd_si512(d2_1), _MM_PERM_BADC)));
-            d2Vec = _mm512_mask_mov_pd(d2Vec, k1, d2_1);
-        }
-
-        _mm512_store_pd(&invBuf[0], invVec);
-        _mm512_store_pd(&d1Buf[0], d1Vec);
-        _mm512_store_pd(&d2Buf[0], d2Vec);
-
-        #pragma ivdep
-        #pragma vector aligned
-        for (int j = 0; j < 8; ++j)
-        {
-            const double inv_Li = 1.0 / invBuf[j];
-
-            const double d1 = d1Buf[j] * inv_Li;
-            const double d2 = d2Buf[j] * inv_Li;
-
-            dlnLdlz += wgt[i * 8 + j] * d1;
-            d2lnLdlz2 += wgt[i * 8 + j] * (d2 - d1 * d1);
-        }
-    } // site loop
-
-    *ext_dlnLdlz   = dlnLdlz;
-    *ext_d2lnLdlz2 = d2lnLdlz2;
-}
-
diff --git a/pllrepo/src/mic_native_dna.c b/pllrepo/src/mic_native_dna.c
deleted file mode 100644
index 6dd6631..0000000
--- a/pllrepo/src/mic_native_dna.c
+++ /dev/null
@@ -1,676 +0,0 @@
-#include <omp.h>
-#include <immintrin.h>
-#include <string.h>
-#include <math.h>
-
-#include "pll.h"
-#include "mic_native.h"
-
-static const int states = 4;
-static const int statesSquare = 16;
-static const int span = 4 * 4;
-static const int maxStateValue = 16;
-
-__inline void mic_broadcast16x64(const double* inv, double* outv)
-{
-    __mmask8 k1 = _mm512_int2mask(0x0F);
-    __mmask8 k2 = _mm512_int2mask(0xF0);
-    for(int l = 0; l < 16; l += 2)
-    {
-        __m512d t = _mm512_setzero_pd();
-        t = _mm512_mask_extload_pd(t, k1, &inv[(l%4)*4 + l/4], _MM_UPCONV_PD_NONE, _MM_BROADCAST_1X8, _MM_HINT_NONE);
-        t = _mm512_mask_extload_pd(t, k2, &inv[((l+1)%4)*4 + (l+1)/4], _MM_UPCONV_PD_NONE, _MM_BROADCAST_1X8, _MM_HINT_NONE);
-
-        _mm512_store_pd(&outv[l*4], t);
-    }
-}
-
-void newviewGTRGAMMA_MIC(int tipCase,
-                  double *x1, double *x2, double *x3, double *extEV, double *tipVector,
-                  int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                  int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling)
-{
-    __m512d minlikelihood_MIC = _mm512_set1_pd(PLL_MINLIKELIHOOD);
-    __m512d twotothe256_MIC = _mm512_set1_pd(PLL_TWOTOTHE256);
-    __m512i absMask_MIC = _mm512_set1_epi64(0x7fffffffffffffffULL);
-
-	int addScale = 0;
-
-    double aEV[64] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-    #pragma ivdep
-    for (int l = 0; l < 64; ++l)
-    {
-        aEV[l] = extEV[(l / 16) * 4 + (l % 4)];
-    }
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:
-      {
-        /* multiply all possible tip state vectors with the respective P-matrices
-        */
-
-            double umpX1[256] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-            double umpX2[256] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-            for(int k = 0; k < 256; ++k)
-            {
-                umpX1[k] = 0.0;
-                umpX2[k] = 0.0;
-            }
-
-            for(int i = 0; i < maxStateValue; ++i)
-            {
-              for(int l = 0; l < states; ++l)
-              {
-                  #pragma ivdep
-                  for(int k = 0; k < span; ++k)
-                  {
-                      umpX1[16 * i + k] +=  tipVector[i * 4 + l] *  left[k * 4 + l];
-                      umpX2[16 * i + k] +=  tipVector[i * 4 + l] * right[k * 4 + l];
-                  }
-              }
-            }
-
-        double auX[64] __attribute__((align(64)));
-
-        for(int i = 0; i < n; ++i)
-        {
-            _mm_prefetch((const char*) (const char*) &x3[span*(i+8)], _MM_HINT_ET1);
-            _mm_prefetch((const char*) &x3[span*(i+8) + 8], _MM_HINT_ET1);
-
-            _mm_prefetch((const char*) &x3[span*(i+1)], _MM_HINT_ET0);
-            _mm_prefetch((const char*) &x3[span*(i+1) + 8], _MM_HINT_ET0);
-
-            const double *uX1 = &umpX1[16 * tipX1[i]];
-            const double *uX2 = &umpX2[16 * tipX2[i]];
-
-            double uX[16] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-            double* v = &x3[i * 16];
-
-            #pragma ivdep
-            #pragma vector aligned
-            for(int l = 0; l < 16; ++l)
-            {
-                uX[l] = uX1[l] * uX2[l];
-                v[l] = 0.;
-            }
-
-            mic_broadcast16x64(uX, auX);
-
-            for (int j = 0; j < 4; ++j)
-            {
-                #pragma ivdep
-                #pragma vector aligned
-                #pragma vector nontemporal
-                for(int k = 0; k < 16; ++k)
-                {
-                    v[k] += auX[j*16 + k] * aEV[j*16 + k];
-                }
-            }
-
-            // init scaling counter for the site
-            if (!fastScaling)
-                ex3[i] = 0;
-
-        } // sites loop
-
-      }
-      break;
-    case PLL_TIP_INNER:
-      {
-        /* we do analogous pre-computations as above, with the only difference that we now do them
-        only for one tip vector */
-
-          double umpX1[256] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-        /* precompute P and left tip vector product */
-
-        for(int k = 0; k < 256; ++k)
-        {
-            umpX1[k] = 0.0;
-        }
-
-        for(int i = 0; i < 16; ++i)
-        {
-          for(int l = 0; l < 4; ++l)
-          {
-              #pragma ivdep
-              for(int k = 0; k < 16; ++k)
-              {
-                  umpX1[16 * i + k] +=  tipVector[i * 4 + l] *  left[k * 4 + l];
-              }
-          }
-        }
-
-        // re-arrange right matrix for better memory layout
-        double aRight[64] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-        for(int j = 0; j < 4; j++)
-        {
-            for(int l = 0; l < 16; l++)
-            {
-                aRight[j*16 + l] = right[l*4 + j];
-            }
-        }
-
-        for (int i = 0; i < n; i++)
-        {
-            _mm_prefetch((const char*) &x2[span*(i+16)], _MM_HINT_T1);
-            _mm_prefetch((const char*) &x2[span*(i+16) + 8], _MM_HINT_T1);
-            _mm_prefetch((const char*) &x3[span*(i+16)], _MM_HINT_ET1);
-            _mm_prefetch((const char*) &x3[span*(i+16) + 8], _MM_HINT_ET1);
-
-            _mm_prefetch((const char*) &x2[span*(i+1)], _MM_HINT_T0);
-            _mm_prefetch((const char*) &x2[span*(i+1) + 8], _MM_HINT_T0);
-            _mm_prefetch((const char*) &x3[span*(i+1)], _MM_HINT_ET0);
-            _mm_prefetch((const char*) &x3[span*(i+1) + 8], _MM_HINT_ET0);
-
-            /* access pre-computed value based on the raw sequence data tipX1 that is used as an index */
-            double* uX1 = &umpX1[span * tipX1[i]];
-            double uX2[16] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-            double uX[16] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-            #pragma vector aligned
-            for(int l = 0; l < 16; ++l)
-            {
-                uX2[l] = 0.;
-            }
-
-            double aV2[64] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-            const double* v2 = &(x2[16 * i]);
-
-            mic_broadcast16x64(v2, aV2);
-
-            for(int j = 0; j < 4; j++)
-            {
-                #pragma ivdep
-                #pragma vector aligned
-                for(int l = 0; l < 16; l++)
-                {
-                    uX2[l] += aV2[j*16 + l] * aRight[j*16 + l];
-                }
-            }
-
-            double* v3 = &(x3[span * i]);
-
-            #pragma ivdep
-            #pragma vector aligned
-            for(int l = 0; l < 16; ++l)
-            {
-                uX[l] = uX1[l] * uX2[l];
-                v3[l] = 0.;
-            }
-
-            double auX[64] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-            mic_broadcast16x64(uX, auX);
-
-            for (int j = 0; j < 4; ++j)
-            {
-                #pragma ivdep
-                #pragma vector aligned
-                for(int k = 0; k < 16; ++k)
-                {
-                    v3[k] += auX[j*16 + k] * aEV[j*16 + k];
-                }
-            }
-
-            __m512d t1 = _mm512_load_pd(&v3[0]);
-            t1 = _mm512_castsi512_pd(_mm512_and_epi64(_mm512_castpd_si512(t1), absMask_MIC));
-            double vmax1 = _mm512_reduce_gmax_pd(t1);
-            __m512d t2 = _mm512_load_pd(&v3[8]);
-            t2 = _mm512_castsi512_pd(_mm512_and_epi64(_mm512_castpd_si512(t2), absMask_MIC));
-            double vmax2 = _mm512_reduce_gmax_pd(t2);
-
-            if(vmax1 < PLL_MINLIKELIHOOD && vmax2 < PLL_MINLIKELIHOOD)
-            {
-				t1 = _mm512_mul_pd(t1, twotothe256_MIC);
-				_mm512_store_pd(&v3[0], t1);
-				t2 = _mm512_mul_pd(t2, twotothe256_MIC);
-				_mm512_store_pd(&v3[8], t2);
-
-                if(!fastScaling)
-                  ex3[i] += 1;
-                else
-                  addScale += wgt[i];
-            }
-        } // site loop
-      }
-      break;
-    case PLL_INNER_INNER:
-    {
-      /* same as above, without pre-computations */
-
-        // re-arrange right matrix for better memory layout
-        double aLeft[64] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-        double aRight[64] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-        for(int j = 0; j < 4; j++)
-        {
-            for(int l = 0; l < 16; l++)
-            {
-                aLeft[j*16 + l] = left[l*4 + j];
-                aRight[j*16 + l] = right[l*4 + j];
-            }
-        }
-
-        for (int i = 0; i < n; i++)
-        {
-            _mm_prefetch((const char*) &x1[span*(i+8)], _MM_HINT_T1);
-            _mm_prefetch((const char*) &x1[span*(i+8) + 8], _MM_HINT_T1);
-            _mm_prefetch((const char*) &x2[span*(i+8)], _MM_HINT_T1);
-            _mm_prefetch((const char*) &x2[span*(i+8) + 8], _MM_HINT_T1);
-            _mm_prefetch((const char*) &x3[span*(i+8)], _MM_HINT_ET1);
-            _mm_prefetch((const char*) &x3[span*(i+8) + 8], _MM_HINT_ET1);
-
-            _mm_prefetch((const char*) &x1[span*(i+1)], _MM_HINT_T0);
-            _mm_prefetch((const char*) &x1[span*(i+1) + 8], _MM_HINT_T0);
-            _mm_prefetch((const char*) &x2[span*(i+1)], _MM_HINT_T0);
-            _mm_prefetch((const char*) &x2[span*(i+1) + 8], _MM_HINT_T0);
-            _mm_prefetch((const char*) &x3[span*(i+1)], _MM_HINT_ET0);
-            _mm_prefetch((const char*) &x3[span*(i+1) + 8], _MM_HINT_ET0);
-
-            double uX1[16] __attribute__((align(64)));
-            double uX2[16] __attribute__((align(64)));
-            double uX[16] __attribute__((align(64)));
-
-            for(int l = 0; l < 16; l++)
-            {
-              uX1[l] = 0.;
-              uX2[l] = 0.;
-            }
-
-            double aV1[64] __attribute__((align(64)));
-            double aV2[64] __attribute__((align(64)));
-
-            const double* v1 = &(x1[span * i]);
-            const double* v2 = &(x2[span * i]);
-
-            mic_broadcast16x64(v1, aV1);
-
-            mic_broadcast16x64(v2, aV2);
-
-            for(int j = 0; j < 4; j++)
-            {
-                #pragma ivdep
-                #pragma vector aligned
-                for(int l = 0; l < 16; l++)
-                {
-                    uX1[l] += aV1[j*16 + l] * aLeft[j*16 + l];
-                    uX2[l] += aV2[j*16 + l] * aRight[j*16 + l];
-                }
-            }
-
-            double* v3 =  &(x3[span * i]);
-
-            #pragma ivdep
-            #pragma vector aligned
-            for(int l = 0; l < 16; ++l)
-            {
-                uX[l] = uX1[l] * uX2[l];
-                v3[l] = 0.;
-            }
-
-            double auX[64] __attribute__((align(64)));
-            mic_broadcast16x64(uX, auX);
-
-            for(int j = 0; j < 4; ++j)
-            {
-                #pragma ivdep
-                #pragma vector aligned
-                for(int k = 0; k < 16; ++k)
-                {
-                    v3[k] += auX[j*16 + k] * aEV[j*16 + k];
-                }
-            }
-
-
-            __m512d t1 = _mm512_load_pd(&v3[0]);
-            t1 = _mm512_castsi512_pd(_mm512_and_epi64(_mm512_castpd_si512(t1), absMask_MIC));
-            double vmax1 = _mm512_reduce_gmax_pd(t1);
-            __m512d t2 = _mm512_load_pd(&v3[8]);
-            t2 = _mm512_castsi512_pd(_mm512_and_epi64(_mm512_castpd_si512(t2), absMask_MIC));
-            double vmax2 = _mm512_reduce_gmax_pd(t2);
-
-            if(vmax1 < PLL_MINLIKELIHOOD && vmax2 < PLL_MINLIKELIHOOD)
-            {
-				t1 = _mm512_mul_pd(t1, twotothe256_MIC);
-				_mm512_store_pd(&v3[0], t1);
-				t2 = _mm512_mul_pd(t2, twotothe256_MIC);
-				_mm512_store_pd(&v3[8], t2);
-
-                if(!fastScaling)
-                  ex3[i] += 1;
-                else
-                  addScale += wgt[i];
-            }
-        }
-    } break;
-    default:
-//      assert(0);
-      break;
-  }
-
-  /* as above, increment the global counter that counts scaling multiplications by the scaling multiplications
-     carried out for computing the likelihood array at node p */
-
-  if (fastScaling)
-  {
-      *scalerIncrement = addScale;
-  }
-
-}
-
-double evaluateGTRGAMMA_MIC(int *ex1, int *ex2, int *wgt,
-                 double *x1_start, double *x2_start,
-                 double *tipVector,
-                 unsigned char *tipX1, const int n, double *diagptable, const pllBoolean fastScaling)
-{
-	double sum = 0.0;
-
-    /* the left node is a tip */
-    if(tipX1)
-    {
-
-        double aTipVec[256] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-        for(int k = 0; k < 16; k++)
-        {
-            for(int l = 0; l < 4; l++)
-            {
-                aTipVec[k*16 + l] = aTipVec[k*16 + 4 + l] = aTipVec[k*16 + 8 + l] = aTipVec[k*16 + 12 + l] = tipVector[k*4 + l];
-            }
-        }
-
-        /* loop over the sites of this partition */
-        for (int i = 0; i < n; i++)
-        {
-            _mm_prefetch((const char*) &x2_start[span*(i+8)], _MM_HINT_T1);
-            _mm_prefetch((const char*) &x2_start[span*(i+8) + 8], _MM_HINT_T1);
-
-            _mm_prefetch((const char*) &x2_start[span*(i+1)], _MM_HINT_T0);
-            _mm_prefetch((const char*) &x2_start[span*(i+1) + 8], _MM_HINT_T0);
-
-          /* access pre-computed tip vector values via a lookup table */
-          const double *x1 = &(aTipVec[16 * tipX1[i]]);
-          /* access the other(inner) node at the other end of the branch */
-          const double *x2 = &(x2_start[span * i]);
-
-          double term = 0.;
-
-          #pragma ivdep
-          #pragma vector aligned
-          for(int j = 0; j < span; j++)
-              term += x1[j] * x2[j] * diagptable[j];
-
-          if(!fastScaling)
-              term = log(0.25 * term) + (ex2[i] * log(PLL_MINLIKELIHOOD));
-          else
-              term = log(0.25 * term);
-
-          sum += wgt[i] * term;
-        }
-    }
-    else
-    {
-        for (int i = 0; i < n; i++)
-        {
-            _mm_prefetch((const char*) &x1_start[span*(i+8)], _MM_HINT_T1);
-            _mm_prefetch((const char*) &x1_start[span*(i+8) + 8], _MM_HINT_T1);
-            _mm_prefetch((const char*) &x2_start[span*(i+8)], _MM_HINT_T1);
-            _mm_prefetch((const char*) &x2_start[span*(i+8) + 8], _MM_HINT_T1);
-
-            _mm_prefetch((const char*) &x1_start[span*(i+1)], _MM_HINT_T0);
-            _mm_prefetch((const char*) &x1_start[span*(i+1) + 8], _MM_HINT_T0);
-            _mm_prefetch((const char*) &x2_start[span*(i+1)], _MM_HINT_T0);
-            _mm_prefetch((const char*) &x2_start[span*(i+1) + 8], _MM_HINT_T0);
-
-          const double *x1 = &(x1_start[span * i]);
-          const double *x2 = &(x2_start[span * i]);
-
-          double term = 0.;
-
-          #pragma ivdep
-          #pragma vector aligned
-          for(int j = 0; j < span; j++)
-              term += x1[j] * x2[j] * diagptable[j];
-
-          if(!fastScaling)
-              term = log(0.25 * fabs(term)) + ((ex1[i] + ex2[i]) * log(PLL_MINLIKELIHOOD));
-          else
-              term = log(0.25 * term);
-
-          sum += wgt[i] * term;
-        }
-    }
-
-    return sum;
-}
-
-void sumGTRGAMMA_MIC(int tipCase, double *sumtable, double *x1_start, double *x2_start, double *tipVector,
-    unsigned char *tipX1, unsigned char *tipX2, int n)
-{
-	double aTipVec[256] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-    for(int k = 0; k < 16; k++)
-    {
-        for(int l = 0; l < 4; l++)
-        {
-            aTipVec[k*16 + l] = aTipVec[k*16 + 4 + l] = aTipVec[k*16 + 8 + l] = aTipVec[k*16 + 12 + l] = tipVector[k*4 + l];
-        }
-    }
-
-    switch(tipCase)
-    {
-      case PLL_TIP_TIP:
-      {
-        for(int i = 0; i < n; i++)
-        {
-            const double *left  = &(aTipVec[16 * tipX1[i]]);
-            const double *right = &(aTipVec[16 * tipX2[i]]);
-            double* sum = &sumtable[i * span];
-
-            #pragma ivdep
-            #pragma vector aligned nontemporal
-            for(int l = 0; l < span; l++)
-            {
-              sum[l] = left[l] * right[l];
-            }
-        }
-      } break;
-      case PLL_TIP_INNER:
-      {
-        for(int i = 0; i < n; i++)
-        {
-          _mm_prefetch((const char*) &x2_start[span*(i+32)], _MM_HINT_T1);
-          _mm_prefetch((const char*) &x2_start[span*(i+32) + 8], _MM_HINT_T1);
-
-          _mm_prefetch((const char*) &x2_start[span*(i+4)], _MM_HINT_T0);
-          _mm_prefetch((const char*) &x2_start[span*(i+4) + 8], _MM_HINT_T0);
-
-          const double *left = &(aTipVec[16 * tipX1[i]]);
-          const double *right = &(x2_start[span * i]);
-          double* sum = &sumtable[i * span];
-
-          #pragma ivdep
-          #pragma vector aligned nontemporal
-          for(int l = 0; l < span; l++)
-          {
-              sum[l] = left[l] * right[l];
-          }
-        }
-      } break;
-      case PLL_INNER_INNER:
-      {
-        for(int i = 0; i < n; i++)
-        {
-            _mm_prefetch((const char*) &x1_start[span*(i+32)], _MM_HINT_T1);
-            _mm_prefetch((const char*) &x1_start[span*(i+32) + 8], _MM_HINT_T1);
-            _mm_prefetch((const char*) &x2_start[span*(i+32)], _MM_HINT_T1);
-            _mm_prefetch((const char*) &x2_start[span*(i+32) + 8], _MM_HINT_T1);
-
-            _mm_prefetch((const char*) &x1_start[span*(i+4)], _MM_HINT_T0);
-            _mm_prefetch((const char*) &x1_start[span*(i+4) + 8], _MM_HINT_T0);
-            _mm_prefetch((const char*) &x2_start[span*(i+4)], _MM_HINT_T0);
-            _mm_prefetch((const char*) &x2_start[span*(i+4) + 8], _MM_HINT_T0);
-
-            const double *left  = &(x1_start[span * i]);
-            const double *right = &(x2_start[span * i]);
-            double* sum = &sumtable[i * span];
-
-            #pragma ivdep
-            #pragma vector aligned nontemporal
-            for(int l = 0; l < span; l++)
-            {
-                sum[l] = left[l] * right[l];
-            }
-        }
-      } break;
-  //    default:
-  //      assert(0);
-    }
-}
-
-void coreGTRGAMMA_MIC(const int upper, double *sumtable,
-    volatile double *ext_dlnLdlz,  volatile double *ext_d2lnLdlz2, double *EIGN, double *gammaRates, double lz, int *wgt)
-{
-	double diagptable0[16] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-    double diagptable1[16] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-    double diagptable2[16] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-    double diagptable01[16] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-    double diagptable02[16] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-
-    /* pre-compute the derivatives of the P matrix for all discrete GAMMA rates */
-
-    for(int i = 0; i < 4; i++)
-    {
-        const double ki = gammaRates[i];
-        const double kisqr = ki * ki;
-
-        diagptable0[i*4] = 1.;
-        diagptable1[i*4] = 0.;
-        diagptable2[i*4] = 0.;
-
-        for(int l = 1; l < states; l++)
-        {
-          diagptable0[i * 4 + l]  = exp(EIGN[l] * ki * lz);
-          diagptable1[i * 4 + l] = EIGN[l] * ki;
-          diagptable2[i * 4 + l] = EIGN[l] * EIGN[l] * kisqr;
-        }
-    }
-
-    #pragma ivdep
-    for(int i = 0; i < 16; i++)
-    {
-        diagptable01[i] = diagptable0[i] * diagptable1[i];
-        diagptable02[i] = diagptable0[i] * diagptable2[i];
-    }
-
-    /* loop over sites in this partition */
-
-    const int aligned_width = upper % 8 == 0 ? upper / 8 : upper / 8 + 1;
-
-    double dlnLBuf[8] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-    double d2lnLBuf[8] __attribute__((align(PLL_BYTE_ALIGNMENT)));
-    for (int j = 0; j < 8; ++j)
-    {
-        dlnLBuf[j] = 0.;
-        d2lnLBuf[j] = 0.;
-    }
-
-    __mmask16 k1 = _mm512_int2mask(0x000000FF);
-
-    for (int i = 0; i < aligned_width; i++)
-    {
-        _mm_prefetch((const char*) &sumtable[i * span * 8], _MM_HINT_T0);
-        _mm_prefetch((const char*) &sumtable[i * span * 8 + 8], _MM_HINT_T0);
-
-        /* access the array with pre-computed values */
-        const double *sum = &sumtable[i * span * 8];
-
-        /* initial per-site likelihood and 1st and 2nd derivatives */
-
-        double invBuf[8] __attribute__((align(64)));
-        double d1Buf[8] __attribute__((align(64)));
-        double d2Buf[8] __attribute__((align(64)));
-
-        __m512d invVec;
-        __m512d d1Vec;
-        __m512d d2Vec;
-        int mask = 0x01;
-
-        #pragma noprefetch sum
-        #pragma unroll(8)
-        for(int j = 0; j < 8; j++)
-        {
-            _mm_prefetch((const char*) &sum[span*(j+8)], _MM_HINT_T1);
-            _mm_prefetch((const char*) &sum[span*(j+8) + 8], _MM_HINT_T1);
-
-            _mm_prefetch((const char*) &sum[span*(j+1)], _MM_HINT_T0);
-            _mm_prefetch((const char*) &sum[span*(j+1) + 8], _MM_HINT_T0);
-
-            __m512d d0_1 = _mm512_load_pd(&diagptable0[0]);
-            __m512d d0_2 = _mm512_load_pd(&diagptable0[8]);
-
-            __m512d d01_1 = _mm512_load_pd(&diagptable01[0]);
-            __m512d d01_2 = _mm512_load_pd(&diagptable01[8]);
-
-            __m512d d02_1 = _mm512_load_pd(&diagptable02[0]);
-            __m512d d02_2 = _mm512_load_pd(&diagptable02[8]);
-
-            __m512d s_1 = _mm512_load_pd(&sum[j*16]);
-            __m512d s_2 = _mm512_load_pd(&sum[j*16 + 8]);
-            __m512d inv_1 = _mm512_mul_pd(d0_1, s_1);
-            __m512d d1_1 = _mm512_mul_pd(d01_1, s_1);
-            __m512d d2_1 = _mm512_mul_pd(d02_1, s_1);
-
-            __m512d inv_2 = _mm512_fmadd_pd(d0_2, s_2, inv_1);
-            __m512d d1_2 = _mm512_fmadd_pd(d01_2, s_2, d1_1);
-            __m512d d2_2 = _mm512_fmadd_pd(d02_2, s_2, d2_1);
-
-            __mmask8 k1 = _mm512_int2mask(mask);
-            mask <<= 1;
-
-            // reduce
-            inv_2 = _mm512_add_pd (inv_2, _mm512_swizzle_pd(inv_2, _MM_SWIZ_REG_CDAB));
-            inv_2 = _mm512_add_pd (inv_2, _mm512_swizzle_pd(inv_2, _MM_SWIZ_REG_BADC));
-            inv_2 = _mm512_add_pd (inv_2, _mm512_castsi512_pd(_mm512_permute4f128_epi32(_mm512_castpd_si512(inv_2), _MM_PERM_BADC)));
-            invVec = _mm512_mask_mov_pd(invVec, k1, inv_2);
-
-            d1_2 = _mm512_add_pd (d1_2, _mm512_swizzle_pd(d1_2, _MM_SWIZ_REG_CDAB));
-            d1_2 = _mm512_add_pd (d1_2, _mm512_swizzle_pd(d1_2, _MM_SWIZ_REG_BADC));
-            d1_2 = _mm512_add_pd (d1_2, _mm512_castsi512_pd(_mm512_permute4f128_epi32(_mm512_castpd_si512(d1_2), _MM_PERM_BADC)));
-            d1Vec = _mm512_mask_mov_pd(d1Vec, k1, d1_2);
-
-            d2_2 = _mm512_add_pd (d2_2, _mm512_swizzle_pd(d2_2, _MM_SWIZ_REG_CDAB));
-            d2_2 = _mm512_add_pd (d2_2, _mm512_swizzle_pd(d2_2, _MM_SWIZ_REG_BADC));
-            d2_2 = _mm512_add_pd (d2_2, _mm512_castsi512_pd(_mm512_permute4f128_epi32(_mm512_castpd_si512(d2_2), _MM_PERM_BADC)));
-            d2Vec = _mm512_mask_mov_pd(d2Vec, k1, d2_2);
-        }
-
-        _mm512_store_pd(&invBuf[0], invVec);
-        _mm512_store_pd(&d1Buf[0], d1Vec);
-        _mm512_store_pd(&d2Buf[0], d2Vec);
-
-        #pragma ivdep
-        #pragma vector aligned
-        for (int j = 0; j < 8; ++j)
-        {
-            const double inv_Li = 1.0 / invBuf[j];
-
-            const double d1 = d1Buf[j] * inv_Li;
-            const double d2 = d2Buf[j] * inv_Li;
-
-            dlnLBuf[j] += wgt[i * 8 + j] * d1;
-            d2lnLBuf[j] += wgt[i * 8 + j] * (d2 - d1 * d1);
-        }
-    } // site loop
-
-    double dlnLdlz = 0.;
-    double d2lnLdlz2 = 0.;
-    for (int j = 0; j < 8; ++j)
-    {
-        dlnLdlz += dlnLBuf[j];
-        d2lnLdlz2 += d2lnLBuf[j];
-    }
-
-    *ext_dlnLdlz   = dlnLdlz;
-    *ext_d2lnLdlz2 = d2lnLdlz2;
-}
diff --git a/pllrepo/src/models.c b/pllrepo/src/models.c
deleted file mode 100644
index 7bc24ef..0000000
--- a/pllrepo/src/models.c
+++ /dev/null
@@ -1,4377 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file models.c
- *  
- * @brief Model related code
- *
- * Detailed description to appear soon.
- */ 
-
-
-#include "mem_alloc.h"
-
-#ifndef WIN32
-#include <sys/times.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <unistd.h> 
-#endif
-
-#include <math.h>
-#include <time.h> 
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <assert.h>
-
-#include "pll.h"
-#include "pllInternal.h"
-
-
-extern const unsigned int bitVectorSecondary[256];
-extern const unsigned int bitVector32[33];
-extern const unsigned int bitVectorAA[23];
-extern const unsigned int bitVectorIdentity[256];
-
-extern const partitionLengths pLengths[PLL_MAX_MODEL];
-
-
-
-extern FILE *byteFile;
-
-
-
-
-
-
-
-
-
-/** @brief Hardcoded values for the WAG model
-  
-    Fill the \a ext_initialRates array with hardcoded substitution rates
-    of the WAG model.
-   
-    @param ext_initialRates
-      Where to place the substitution rates
-*/
-void putWAG(double *ext_initialRates)
-{ 
-  double
-    scaler,
-    q[20][20],
-    daa[400];
-
-  int 
-    i,
-    j,
-    r;
-
-  /* fill the triangle below the diagonal with values */
-  daa[ 1*20+ 0] =  55.15710; daa[ 2*20+ 0] =  50.98480; daa[ 2*20+ 1] =  63.53460; 
-  daa[ 3*20+ 0] =  73.89980; daa[ 3*20+ 1] =  14.73040; daa[ 3*20+ 2] = 542.94200; 
-  daa[ 4*20+ 0] = 102.70400; daa[ 4*20+ 1] =  52.81910; daa[ 4*20+ 2] =  26.52560; 
-  daa[ 4*20+ 3] =   3.02949; daa[ 5*20+ 0] =  90.85980; daa[ 5*20+ 1] = 303.55000; 
-  daa[ 5*20+ 2] = 154.36400; daa[ 5*20+ 3] =  61.67830; daa[ 5*20+ 4] =   9.88179; 
-  daa[ 6*20+ 0] = 158.28500; daa[ 6*20+ 1] =  43.91570; daa[ 6*20+ 2] =  94.71980; 
-  daa[ 6*20+ 3] = 617.41600; daa[ 6*20+ 4] =   2.13520; daa[ 6*20+ 5] = 546.94700; 
-  daa[ 7*20+ 0] = 141.67200; daa[ 7*20+ 1] =  58.46650; daa[ 7*20+ 2] = 112.55600; 
-  daa[ 7*20+ 3] =  86.55840; daa[ 7*20+ 4] =  30.66740; daa[ 7*20+ 5] =  33.00520; 
-  daa[ 7*20+ 6] =  56.77170; daa[ 8*20+ 0] =  31.69540; daa[ 8*20+ 1] = 213.71500; 
-  daa[ 8*20+ 2] = 395.62900; daa[ 8*20+ 3] =  93.06760; daa[ 8*20+ 4] =  24.89720; 
-  daa[ 8*20+ 5] = 429.41100; daa[ 8*20+ 6] =  57.00250; daa[ 8*20+ 7] =  24.94100; 
-  daa[ 9*20+ 0] =  19.33350; daa[ 9*20+ 1] =  18.69790; daa[ 9*20+ 2] =  55.42360; 
-  daa[ 9*20+ 3] =   3.94370; daa[ 9*20+ 4] =  17.01350; daa[ 9*20+ 5] =  11.39170; 
-  daa[ 9*20+ 6] =  12.73950; daa[ 9*20+ 7] =   3.04501; daa[ 9*20+ 8] =  13.81900; 
-  daa[10*20+ 0] =  39.79150; daa[10*20+ 1] =  49.76710; daa[10*20+ 2] =  13.15280; 
-  daa[10*20+ 3] =   8.48047; daa[10*20+ 4] =  38.42870; daa[10*20+ 5] =  86.94890; 
-  daa[10*20+ 6] =  15.42630; daa[10*20+ 7] =   6.13037; daa[10*20+ 8] =  49.94620; 
-  daa[10*20+ 9] = 317.09700; daa[11*20+ 0] =  90.62650; daa[11*20+ 1] = 535.14200; 
-  daa[11*20+ 2] = 301.20100; daa[11*20+ 3] =  47.98550; daa[11*20+ 4] =   7.40339; 
-  daa[11*20+ 5] = 389.49000; daa[11*20+ 6] = 258.44300; daa[11*20+ 7] =  37.35580; 
-  daa[11*20+ 8] =  89.04320; daa[11*20+ 9] =  32.38320; daa[11*20+10] =  25.75550; 
-  daa[12*20+ 0] =  89.34960; daa[12*20+ 1] =  68.31620; daa[12*20+ 2] =  19.82210; 
-  daa[12*20+ 3] =  10.37540; daa[12*20+ 4] =  39.04820; daa[12*20+ 5] = 154.52600; 
-  daa[12*20+ 6] =  31.51240; daa[12*20+ 7] =  17.41000; daa[12*20+ 8] =  40.41410; 
-  daa[12*20+ 9] = 425.74600; daa[12*20+10] = 485.40200; daa[12*20+11] =  93.42760; 
-  daa[13*20+ 0] =  21.04940; daa[13*20+ 1] =  10.27110; daa[13*20+ 2] =   9.61621; 
-  daa[13*20+ 3] =   4.67304; daa[13*20+ 4] =  39.80200; daa[13*20+ 5] =   9.99208; 
-  daa[13*20+ 6] =   8.11339; daa[13*20+ 7] =   4.99310; daa[13*20+ 8] =  67.93710; 
-  daa[13*20+ 9] = 105.94700; daa[13*20+10] = 211.51700; daa[13*20+11] =   8.88360; 
-  daa[13*20+12] = 119.06300; daa[14*20+ 0] = 143.85500; daa[14*20+ 1] =  67.94890; 
-  daa[14*20+ 2] =  19.50810; daa[14*20+ 3] =  42.39840; daa[14*20+ 4] =  10.94040; 
-  daa[14*20+ 5] =  93.33720; daa[14*20+ 6] =  68.23550; daa[14*20+ 7] =  24.35700; 
-  daa[14*20+ 8] =  69.61980; daa[14*20+ 9] =   9.99288; daa[14*20+10] =  41.58440; 
-  daa[14*20+11] =  55.68960; daa[14*20+12] =  17.13290; daa[14*20+13] =  16.14440; 
-  daa[15*20+ 0] = 337.07900; daa[15*20+ 1] = 122.41900; daa[15*20+ 2] = 397.42300; 
-  daa[15*20+ 3] = 107.17600; daa[15*20+ 4] = 140.76600; daa[15*20+ 5] = 102.88700; 
-  daa[15*20+ 6] =  70.49390; daa[15*20+ 7] = 134.18200; daa[15*20+ 8] =  74.01690; 
-  daa[15*20+ 9] =  31.94400; daa[15*20+10] =  34.47390; daa[15*20+11] =  96.71300; 
-  daa[15*20+12] =  49.39050; daa[15*20+13] =  54.59310; daa[15*20+14] = 161.32800; 
-  daa[16*20+ 0] = 212.11100; daa[16*20+ 1] =  55.44130; daa[16*20+ 2] = 203.00600; 
-  daa[16*20+ 3] =  37.48660; daa[16*20+ 4] =  51.29840; daa[16*20+ 5] =  85.79280; 
-  daa[16*20+ 6] =  82.27650; daa[16*20+ 7] =  22.58330; daa[16*20+ 8] =  47.33070; 
-  daa[16*20+ 9] = 145.81600; daa[16*20+10] =  32.66220; daa[16*20+11] = 138.69800; 
-  daa[16*20+12] = 151.61200; daa[16*20+13] =  17.19030; daa[16*20+14] =  79.53840; 
-  daa[16*20+15] = 437.80200; daa[17*20+ 0] =  11.31330; daa[17*20+ 1] = 116.39200; 
-  daa[17*20+ 2] =   7.19167; daa[17*20+ 3] =  12.97670; daa[17*20+ 4] =  71.70700; 
-  daa[17*20+ 5] =  21.57370; daa[17*20+ 6] =  15.65570; daa[17*20+ 7] =  33.69830; 
-  daa[17*20+ 8] =  26.25690; daa[17*20+ 9] =  21.24830; daa[17*20+10] =  66.53090; 
-  daa[17*20+11] =  13.75050; daa[17*20+12] =  51.57060; daa[17*20+13] = 152.96400; 
-  daa[17*20+14] =  13.94050; daa[17*20+15] =  52.37420; daa[17*20+16] =  11.08640; 
-  daa[18*20+ 0] =  24.07350; daa[18*20+ 1] =  38.15330; daa[18*20+ 2] = 108.60000; 
-  daa[18*20+ 3] =  32.57110; daa[18*20+ 4] =  54.38330; daa[18*20+ 5] =  22.77100; 
-  daa[18*20+ 6] =  19.63030; daa[18*20+ 7] =  10.36040; daa[18*20+ 8] = 387.34400; 
-  daa[18*20+ 9] =  42.01700; daa[18*20+10] =  39.86180; daa[18*20+11] =  13.32640; 
-  daa[18*20+12] =  42.84370; daa[18*20+13] = 645.42800; daa[18*20+14] =  21.60460; 
-  daa[18*20+15] =  78.69930; daa[18*20+16] =  29.11480; daa[18*20+17] = 248.53900; 
-  daa[19*20+ 0] = 200.60100; daa[19*20+ 1] =  25.18490; daa[19*20+ 2] =  19.62460; 
-  daa[19*20+ 3] =  15.23350; daa[19*20+ 4] = 100.21400; daa[19*20+ 5] =  30.12810; 
-  daa[19*20+ 6] =  58.87310; daa[19*20+ 7] =  18.72470; daa[19*20+ 8] =  11.83580; 
-  daa[19*20+ 9] = 782.13000; daa[19*20+10] = 180.03400; daa[19*20+11] =  30.54340; 
-  daa[19*20+12] = 205.84500; daa[19*20+13] =  64.98920; daa[19*20+14] =  31.48870; 
-  daa[19*20+15] =  23.27390; daa[19*20+16] = 138.82300; daa[19*20+17] =  36.53690; 
-  daa[19*20+18] =  31.47300; 
-
-  /* initialize a 20x20 matrix */
-  for(i = 0; i < 20; i++)
-    for(j = 0; j < 20; j++)
-      q[i][j] = 0.0;
-
-  /* fill the triangle above the diagonal with the corresponding values from the
-     lower triangle */
-  for (i=0; i<20; i++)  
-    for (j=0; j<i; j++)               
-      daa[j*20+i] = daa[i*20+j];
-
-  /* copy the triangle above the diagonal from daa (which is a linear block) to
-     the triangle above the diagonal of a square matrix q */
-  for(i = 0; i < 19; i++)
-    for(j = i + 1; j < 20; j++)      
-      q[i][j] = daa[i * 20 + j];
-
-  
-  /*
-    for (i=0; i<20; i++) 
-    {
-      for (j=0; j<20; j++)
-        printf("%1.2f ", q[i][j]);
-      printf("\n");
-    }
-    printf("\n");
-
-    printf("%f\n", q[18][19]);
-  */
-
-  /* create a scaler from the last value (last row last column) of the upper
-     triangle of q */
-  scaler = 1.0 / q[18][19];
-
-  
-
-  /* scale all values of the matrix */
-  for(i = 0; i < 19; i++)
-    for(j = i + 1; j < 20; j++)      
-      q[i][j] *= scaler;
-
-  /* copy the upper triangle of q to the linear array ext_initialRates */
-  for(i = 0, r = 0; i < 19; i++)          
-    for(j = i + 1; j < 20; j++)      
-      ext_initialRates[r++] = q[i][j];           
-      
-  /*
-    for (i=0; i<20; i++) 
-    {
-      for (j=0; j<20; j++)
-        printf("%1.2f ", q[i][j]);
-      printf("\n");
-    }
-    printf("\n");
-  */
-
-}
-
-
-
-/** @brief Initialize protein substitution rates matrix 
-  * 
-  * Initialize the array pointed to by \a ext_initialRates with the substitution
-  * rates of the corresponding protein model and set f to the appropriate
-  * stationary frequencies
-  *
-  * @param f
-  *   Array where to store the stationary frequency rates
-  *
-  * @param proteinMatrix
-  *   Which protein matrix to use  
-  *
-  * @param ext_initialRates
-      Where to store the retrieved substitution rates
-  *
-  * @param lg4_index
-  *   In case we are filling a substitution rates matrix of an LG4 model the index
-  *   specifies which of the four matrixes to use 
-  *
-*/
-static void initProtMat(double f[20], int proteinMatrix, double *ext_initialRates, int lg4_index)
-{ 
-  double q[20][20];
-  double daa[400], max, temp;
-  int i, j, r;
-  double *initialRates = ext_initialRates;
-  double scaler;
-
-  {
-      switch(proteinMatrix)
-        {
-        case PLL_DAYHOFF:
-          {     
-            daa[ 1*20+ 0] =   27.00; daa[ 2*20+ 0] =   98.00; daa[ 2*20+ 1] =   32.00; daa[ 3*20+ 0] =  120.00;
-            daa[ 3*20+ 1] =    0.00; daa[ 3*20+ 2] =  905.00; daa[ 4*20+ 0] =   36.00; daa[ 4*20+ 1] =   23.00;
-            daa[ 4*20+ 2] =    0.00; daa[ 4*20+ 3] =    0.00; daa[ 5*20+ 0] =   89.00; daa[ 5*20+ 1] =  246.00;
-            daa[ 5*20+ 2] =  103.00; daa[ 5*20+ 3] =  134.00; daa[ 5*20+ 4] =    0.00; daa[ 6*20+ 0] =  198.00;
-            daa[ 6*20+ 1] =    1.00; daa[ 6*20+ 2] =  148.00; daa[ 6*20+ 3] = 1153.00; daa[ 6*20+ 4] =    0.00;
-            daa[ 6*20+ 5] =  716.00; daa[ 7*20+ 0] =  240.00; daa[ 7*20+ 1] =    9.00; daa[ 7*20+ 2] =  139.00;
-            daa[ 7*20+ 3] =  125.00; daa[ 7*20+ 4] =   11.00; daa[ 7*20+ 5] =   28.00; daa[ 7*20+ 6] =   81.00;
-            daa[ 8*20+ 0] =   23.00; daa[ 8*20+ 1] =  240.00; daa[ 8*20+ 2] =  535.00; daa[ 8*20+ 3] =   86.00;
-            daa[ 8*20+ 4] =   28.00; daa[ 8*20+ 5] =  606.00; daa[ 8*20+ 6] =   43.00; daa[ 8*20+ 7] =   10.00;
-            daa[ 9*20+ 0] =   65.00; daa[ 9*20+ 1] =   64.00; daa[ 9*20+ 2] =   77.00; daa[ 9*20+ 3] =   24.00;
-            daa[ 9*20+ 4] =   44.00; daa[ 9*20+ 5] =   18.00; daa[ 9*20+ 6] =   61.00; daa[ 9*20+ 7] =    0.00;
-            daa[ 9*20+ 8] =    7.00; daa[10*20+ 0] =   41.00; daa[10*20+ 1] =   15.00; daa[10*20+ 2] =   34.00;
-            daa[10*20+ 3] =    0.00; daa[10*20+ 4] =    0.00; daa[10*20+ 5] =   73.00; daa[10*20+ 6] =   11.00;
-            daa[10*20+ 7] =    7.00; daa[10*20+ 8] =   44.00; daa[10*20+ 9] =  257.00; daa[11*20+ 0] =   26.00;
-            daa[11*20+ 1] =  464.00; daa[11*20+ 2] =  318.00; daa[11*20+ 3] =   71.00; daa[11*20+ 4] =    0.00;
-            daa[11*20+ 5] =  153.00; daa[11*20+ 6] =   83.00; daa[11*20+ 7] =   27.00; daa[11*20+ 8] =   26.00;
-            daa[11*20+ 9] =   46.00; daa[11*20+10] =   18.00; daa[12*20+ 0] =   72.00; daa[12*20+ 1] =   90.00;
-            daa[12*20+ 2] =    1.00; daa[12*20+ 3] =    0.00; daa[12*20+ 4] =    0.00; daa[12*20+ 5] =  114.00;
-            daa[12*20+ 6] =   30.00; daa[12*20+ 7] =   17.00; daa[12*20+ 8] =    0.00; daa[12*20+ 9] =  336.00;
-            daa[12*20+10] =  527.00; daa[12*20+11] =  243.00; daa[13*20+ 0] =   18.00; daa[13*20+ 1] =   14.00;
-            daa[13*20+ 2] =   14.00; daa[13*20+ 3] =    0.00; daa[13*20+ 4] =    0.00; daa[13*20+ 5] =    0.00;
-            daa[13*20+ 6] =    0.00; daa[13*20+ 7] =   15.00; daa[13*20+ 8] =   48.00; daa[13*20+ 9] =  196.00;
-            daa[13*20+10] =  157.00; daa[13*20+11] =    0.00; daa[13*20+12] =   92.00; daa[14*20+ 0] =  250.00;
-            daa[14*20+ 1] =  103.00; daa[14*20+ 2] =   42.00; daa[14*20+ 3] =   13.00; daa[14*20+ 4] =   19.00;
-            daa[14*20+ 5] =  153.00; daa[14*20+ 6] =   51.00; daa[14*20+ 7] =   34.00; daa[14*20+ 8] =   94.00;
-            daa[14*20+ 9] =   12.00; daa[14*20+10] =   32.00; daa[14*20+11] =   33.00; daa[14*20+12] =   17.00;
-            daa[14*20+13] =   11.00; daa[15*20+ 0] =  409.00; daa[15*20+ 1] =  154.00; daa[15*20+ 2] =  495.00;
-            daa[15*20+ 3] =   95.00; daa[15*20+ 4] =  161.00; daa[15*20+ 5] =   56.00; daa[15*20+ 6] =   79.00;
-            daa[15*20+ 7] =  234.00; daa[15*20+ 8] =   35.00; daa[15*20+ 9] =   24.00; daa[15*20+10] =   17.00;
-            daa[15*20+11] =   96.00; daa[15*20+12] =   62.00; daa[15*20+13] =   46.00; daa[15*20+14] =  245.00;
-            daa[16*20+ 0] =  371.00; daa[16*20+ 1] =   26.00; daa[16*20+ 2] =  229.00; daa[16*20+ 3] =   66.00;
-            daa[16*20+ 4] =   16.00; daa[16*20+ 5] =   53.00; daa[16*20+ 6] =   34.00; daa[16*20+ 7] =   30.00;
-            daa[16*20+ 8] =   22.00; daa[16*20+ 9] =  192.00; daa[16*20+10] =   33.00; daa[16*20+11] =  136.00;
-            daa[16*20+12] =  104.00; daa[16*20+13] =   13.00; daa[16*20+14] =   78.00; daa[16*20+15] =  550.00;
-            daa[17*20+ 0] =    0.00; daa[17*20+ 1] =  201.00; daa[17*20+ 2] =   23.00; daa[17*20+ 3] =    0.00;
-            daa[17*20+ 4] =    0.00; daa[17*20+ 5] =    0.00; daa[17*20+ 6] =    0.00; daa[17*20+ 7] =    0.00;
-            daa[17*20+ 8] =   27.00; daa[17*20+ 9] =    0.00; daa[17*20+10] =   46.00; daa[17*20+11] =    0.00;
-            daa[17*20+12] =    0.00; daa[17*20+13] =   76.00; daa[17*20+14] =    0.00; daa[17*20+15] =   75.00;
-            daa[17*20+16] =    0.00; daa[18*20+ 0] =   24.00; daa[18*20+ 1] =    8.00; daa[18*20+ 2] =   95.00;
-            daa[18*20+ 3] =    0.00; daa[18*20+ 4] =   96.00; daa[18*20+ 5] =    0.00; daa[18*20+ 6] =   22.00;
-            daa[18*20+ 7] =    0.00; daa[18*20+ 8] =  127.00; daa[18*20+ 9] =   37.00; daa[18*20+10] =   28.00;
-            daa[18*20+11] =   13.00; daa[18*20+12] =    0.00; daa[18*20+13] =  698.00; daa[18*20+14] =    0.00;
-            daa[18*20+15] =   34.00; daa[18*20+16] =   42.00; daa[18*20+17] =   61.00; daa[19*20+ 0] =  208.00;
-            daa[19*20+ 1] =   24.00; daa[19*20+ 2] =   15.00; daa[19*20+ 3] =   18.00; daa[19*20+ 4] =   49.00;
-            daa[19*20+ 5] =   35.00; daa[19*20+ 6] =   37.00; daa[19*20+ 7] =   54.00; daa[19*20+ 8] =   44.00;
-            daa[19*20+ 9] =  889.00; daa[19*20+10] =  175.00; daa[19*20+11] =   10.00; daa[19*20+12] =  258.00;
-            daa[19*20+13] =   12.00; daa[19*20+14] =   48.00; daa[19*20+15] =   30.00; daa[19*20+16] =  157.00;
-            daa[19*20+17] =    0.00; daa[19*20+18] =   28.00;               
-
-	    f[ 0] = 0.087127; f[ 1] = 0.040904; f[ 2] = 0.040432; f[ 3] = 0.046872;
-	    f[ 4] = 0.033474; f[ 5] = 0.038255; f[ 6] = 0.049530; f[ 7] = 0.088612;
-	    f[ 8] = 0.033618; f[ 9] = 0.036886; f[10] = 0.085357; f[11] = 0.080482;
-	    f[12] = 0.014753; f[13] = 0.039772; f[14] = 0.050680; f[15] = 0.069577;
-	    f[16] = 0.058542; f[17] = 0.010494; f[18] = 0.029916; f[19] = 0.064717;
-          }
-          break;
-        case PLL_DCMUT:
-          {     
-            daa[ 1*20+ 0] =   26.78280; daa[ 2*20+ 0] =   98.44740; daa[ 2*20+ 1] =   32.70590; daa[ 3*20+ 0] =  119.98050; 
-            daa[ 3*20+ 1] =    0.00000; daa[ 3*20+ 2] =  893.15150; daa[ 4*20+ 0] =   36.00160; daa[ 4*20+ 1] =   23.23740; 
-            daa[ 4*20+ 2] =    0.00000; daa[ 4*20+ 3] =    0.00000; daa[ 5*20+ 0] =   88.77530; daa[ 5*20+ 1] =  243.99390; 
-            daa[ 5*20+ 2] =  102.85090; daa[ 5*20+ 3] =  134.85510; daa[ 5*20+ 4] =    0.00000; daa[ 6*20+ 0] =  196.11670; 
-            daa[ 6*20+ 1] =    0.00000; daa[ 6*20+ 2] =  149.34090; daa[ 6*20+ 3] = 1138.86590; daa[ 6*20+ 4] =    0.00000; 
-            daa[ 6*20+ 5] =  708.60220; daa[ 7*20+ 0] =  238.61110; daa[ 7*20+ 1] =    8.77910; daa[ 7*20+ 2] =  138.53520; 
-            daa[ 7*20+ 3] =  124.09810; daa[ 7*20+ 4] =   10.72780; daa[ 7*20+ 5] =   28.15810; daa[ 7*20+ 6] =   81.19070; 
-            daa[ 8*20+ 0] =   22.81160; daa[ 8*20+ 1] =  238.31480; daa[ 8*20+ 2] =  529.00240; daa[ 8*20+ 3] =   86.82410; 
-            daa[ 8*20+ 4] =   28.27290; daa[ 8*20+ 5] =  601.16130; daa[ 8*20+ 6] =   43.94690; daa[ 8*20+ 7] =   10.68020; 
-            daa[ 9*20+ 0] =   65.34160; daa[ 9*20+ 1] =   63.26290; daa[ 9*20+ 2] =   76.80240; daa[ 9*20+ 3] =   23.92480; 
-            daa[ 9*20+ 4] =   43.80740; daa[ 9*20+ 5] =   18.03930; daa[ 9*20+ 6] =   60.95260; daa[ 9*20+ 7] =    0.00000; 
-            daa[ 9*20+ 8] =    7.69810; daa[10*20+ 0] =   40.64310; daa[10*20+ 1] =   15.49240; daa[10*20+ 2] =   34.11130; 
-            daa[10*20+ 3] =    0.00000; daa[10*20+ 4] =    0.00000; daa[10*20+ 5] =   73.07720; daa[10*20+ 6] =   11.28800; 
-            daa[10*20+ 7] =    7.15140; daa[10*20+ 8] =   44.35040; daa[10*20+ 9] =  255.66850; daa[11*20+ 0] =   25.86350; 
-            daa[11*20+ 1] =  461.01240; daa[11*20+ 2] =  314.83710; daa[11*20+ 3] =   71.69130; daa[11*20+ 4] =    0.00000; 
-            daa[11*20+ 5] =  151.90780; daa[11*20+ 6] =   83.00780; daa[11*20+ 7] =   26.76830; daa[11*20+ 8] =   27.04750; 
-            daa[11*20+ 9] =   46.08570; daa[11*20+10] =   18.06290; daa[12*20+ 0] =   71.78400; daa[12*20+ 1] =   89.63210; 
-            daa[12*20+ 2] =    0.00000; daa[12*20+ 3] =    0.00000; daa[12*20+ 4] =    0.00000; daa[12*20+ 5] =  112.74990; 
-            daa[12*20+ 6] =   30.48030; daa[12*20+ 7] =   17.03720; daa[12*20+ 8] =    0.00000; daa[12*20+ 9] =  333.27320; 
-            daa[12*20+10] =  523.01150; daa[12*20+11] =  241.17390; daa[13*20+ 0] =   18.36410; daa[13*20+ 1] =   13.69060; 
-            daa[13*20+ 2] =   13.85030; daa[13*20+ 3] =    0.00000; daa[13*20+ 4] =    0.00000; daa[13*20+ 5] =    0.00000; 
-            daa[13*20+ 6] =    0.00000; daa[13*20+ 7] =   15.34780; daa[13*20+ 8] =   47.59270; daa[13*20+ 9] =  195.19510; 
-            daa[13*20+10] =  156.51600; daa[13*20+11] =    0.00000; daa[13*20+12] =   92.18600; daa[14*20+ 0] =  248.59200; 
-            daa[14*20+ 1] =  102.83130; daa[14*20+ 2] =   41.92440; daa[14*20+ 3] =   13.39400; daa[14*20+ 4] =   18.75500; 
-            daa[14*20+ 5] =  152.61880; daa[14*20+ 6] =   50.70030; daa[14*20+ 7] =   34.71530; daa[14*20+ 8] =   93.37090; 
-            daa[14*20+ 9] =   11.91520; daa[14*20+10] =   31.62580; daa[14*20+11] =   33.54190; daa[14*20+12] =   17.02050; 
-            daa[14*20+13] =   11.05060; daa[15*20+ 0] =  405.18700; daa[15*20+ 1] =  153.15900; daa[15*20+ 2] =  488.58920; 
-            daa[15*20+ 3] =   95.60970; daa[15*20+ 4] =  159.83560; daa[15*20+ 5] =   56.18280; daa[15*20+ 6] =   79.39990; 
-            daa[15*20+ 7] =  232.22430; daa[15*20+ 8] =   35.36430; daa[15*20+ 9] =   24.79550; daa[15*20+10] =   17.14320; 
-            daa[15*20+11] =   95.45570; daa[15*20+12] =   61.99510; daa[15*20+13] =   45.99010; daa[15*20+14] =  242.72020; 
-            daa[16*20+ 0] =  368.03650; daa[16*20+ 1] =   26.57450; daa[16*20+ 2] =  227.16970; daa[16*20+ 3] =   66.09300; 
-            daa[16*20+ 4] =   16.23660; daa[16*20+ 5] =   52.56510; daa[16*20+ 6] =   34.01560; daa[16*20+ 7] =   30.66620; 
-            daa[16*20+ 8] =   22.63330; daa[16*20+ 9] =  190.07390; daa[16*20+10] =   33.10900; daa[16*20+11] =  135.05990; 
-            daa[16*20+12] =  103.15340; daa[16*20+13] =   13.66550; daa[16*20+14] =   78.28570; daa[16*20+15] =  543.66740; 
-            daa[17*20+ 0] =    0.00000; daa[17*20+ 1] =  200.13750; daa[17*20+ 2] =   22.49680; daa[17*20+ 3] =    0.00000; 
-            daa[17*20+ 4] =    0.00000; daa[17*20+ 5] =    0.00000; daa[17*20+ 6] =    0.00000; daa[17*20+ 7] =    0.00000; 
-            daa[17*20+ 8] =   27.05640; daa[17*20+ 9] =    0.00000; daa[17*20+10] =   46.17760; daa[17*20+11] =    0.00000; 
-            daa[17*20+12] =    0.00000; daa[17*20+13] =   76.23540; daa[17*20+14] =    0.00000; daa[17*20+15] =   74.08190; 
-            daa[17*20+16] =    0.00000; daa[18*20+ 0] =   24.41390; daa[18*20+ 1] =    7.80120; daa[18*20+ 2] =   94.69400; 
-            daa[18*20+ 3] =    0.00000; daa[18*20+ 4] =   95.31640; daa[18*20+ 5] =    0.00000; daa[18*20+ 6] =   21.47170; 
-            daa[18*20+ 7] =    0.00000; daa[18*20+ 8] =  126.54000; daa[18*20+ 9] =   37.48340; daa[18*20+10] =   28.65720; 
-            daa[18*20+11] =   13.21420; daa[18*20+12] =    0.00000; daa[18*20+13] =  695.26290; daa[18*20+14] =    0.00000; 
-            daa[18*20+15] =   33.62890; daa[18*20+16] =   41.78390; daa[18*20+17] =   60.80700; daa[19*20+ 0] =  205.95640; 
-            daa[19*20+ 1] =   24.03680; daa[19*20+ 2] =   15.80670; daa[19*20+ 3] =   17.83160; daa[19*20+ 4] =   48.46780; 
-            daa[19*20+ 5] =   34.69830; daa[19*20+ 6] =   36.72500; daa[19*20+ 7] =   53.81650; daa[19*20+ 8] =   43.87150; 
-            daa[19*20+ 9] =  881.00380; daa[19*20+10] =  174.51560; daa[19*20+11] =   10.38500; daa[19*20+12] =  256.59550; 
-            daa[19*20+13] =   12.36060; daa[19*20+14] =   48.50260; daa[19*20+15] =   30.38360; daa[19*20+16] =  156.19970; 
-            daa[19*20+17] =    0.00000; daa[19*20+18] =   27.93790;                
-
-	    f[ 0] = 0.087127; f[ 1] = 0.040904; f[ 2] = 0.040432; f[ 3] = 0.046872;
-	    f[ 4] = 0.033474; f[ 5] = 0.038255; f[ 6] = 0.049530; f[ 7] = 0.088612;
-	    f[ 8] = 0.033619; f[ 9] = 0.036886; f[10] = 0.085357; f[11] = 0.080481;
-	    f[12] = 0.014753; f[13] = 0.039772; f[14] = 0.050680; f[15] = 0.069577;
-	    f[16] = 0.058542; f[17] = 0.010494; f[18] = 0.029916; f[19] = 0.064717;
-
-	    f[ 0] = 0.087127; f[ 1] = 0.040904; f[ 2] = 0.040432; f[ 3] = 0.046872;
-	    f[ 4] = 0.033474; f[ 5] = 0.038255; f[ 6] = 0.049530; f[ 7] = 0.088612;
-	    f[ 8] = 0.033619; f[ 9] = 0.036886; f[10] = 0.085357; f[11] = 0.080481;
-	    f[12] = 0.014753; f[13] = 0.039772; f[14] = 0.050680; f[15] = 0.069577;
-	    f[16] = 0.058542; f[17] = 0.010494; f[18] = 0.029916; f[19] = 0.064717;
-
-          }
-          break;
-        case PLL_JTT:
-          {
-            daa[ 1*20+ 0] =   58.00; daa[ 2*20+ 0] =   54.00; daa[ 2*20+ 1] =   45.00; daa[ 3*20+ 0] =   81.00;
-            daa[ 3*20+ 1] =   16.00; daa[ 3*20+ 2] =  528.00; daa[ 4*20+ 0] =   56.00; daa[ 4*20+ 1] =  113.00;
-            daa[ 4*20+ 2] =   34.00; daa[ 4*20+ 3] =   10.00; daa[ 5*20+ 0] =   57.00; daa[ 5*20+ 1] =  310.00;
-            daa[ 5*20+ 2] =   86.00; daa[ 5*20+ 3] =   49.00; daa[ 5*20+ 4] =    9.00; daa[ 6*20+ 0] =  105.00;
-            daa[ 6*20+ 1] =   29.00; daa[ 6*20+ 2] =   58.00; daa[ 6*20+ 3] =  767.00; daa[ 6*20+ 4] =    5.00;
-            daa[ 6*20+ 5] =  323.00; daa[ 7*20+ 0] =  179.00; daa[ 7*20+ 1] =  137.00; daa[ 7*20+ 2] =   81.00;
-            daa[ 7*20+ 3] =  130.00; daa[ 7*20+ 4] =   59.00; daa[ 7*20+ 5] =   26.00; daa[ 7*20+ 6] =  119.00;
-            daa[ 8*20+ 0] =   27.00; daa[ 8*20+ 1] =  328.00; daa[ 8*20+ 2] =  391.00; daa[ 8*20+ 3] =  112.00;
-            daa[ 8*20+ 4] =   69.00; daa[ 8*20+ 5] =  597.00; daa[ 8*20+ 6] =   26.00; daa[ 8*20+ 7] =   23.00;
-            daa[ 9*20+ 0] =   36.00; daa[ 9*20+ 1] =   22.00; daa[ 9*20+ 2] =   47.00; daa[ 9*20+ 3] =   11.00;
-            daa[ 9*20+ 4] =   17.00; daa[ 9*20+ 5] =    9.00; daa[ 9*20+ 6] =   12.00; daa[ 9*20+ 7] =    6.00;
-            daa[ 9*20+ 8] =   16.00; daa[10*20+ 0] =   30.00; daa[10*20+ 1] =   38.00; daa[10*20+ 2] =   12.00;
-            daa[10*20+ 3] =    7.00; daa[10*20+ 4] =   23.00; daa[10*20+ 5] =   72.00; daa[10*20+ 6] =    9.00;
-            daa[10*20+ 7] =    6.00; daa[10*20+ 8] =   56.00; daa[10*20+ 9] =  229.00; daa[11*20+ 0] =   35.00;
-            daa[11*20+ 1] =  646.00; daa[11*20+ 2] =  263.00; daa[11*20+ 3] =   26.00; daa[11*20+ 4] =    7.00;
-            daa[11*20+ 5] =  292.00; daa[11*20+ 6] =  181.00; daa[11*20+ 7] =   27.00; daa[11*20+ 8] =   45.00;
-            daa[11*20+ 9] =   21.00; daa[11*20+10] =   14.00; daa[12*20+ 0] =   54.00; daa[12*20+ 1] =   44.00;
-            daa[12*20+ 2] =   30.00; daa[12*20+ 3] =   15.00; daa[12*20+ 4] =   31.00; daa[12*20+ 5] =   43.00;
-            daa[12*20+ 6] =   18.00; daa[12*20+ 7] =   14.00; daa[12*20+ 8] =   33.00; daa[12*20+ 9] =  479.00;
-            daa[12*20+10] =  388.00; daa[12*20+11] =   65.00; daa[13*20+ 0] =   15.00; daa[13*20+ 1] =    5.00;
-            daa[13*20+ 2] =   10.00; daa[13*20+ 3] =    4.00; daa[13*20+ 4] =   78.00; daa[13*20+ 5] =    4.00;
-            daa[13*20+ 6] =    5.00; daa[13*20+ 7] =    5.00; daa[13*20+ 8] =   40.00; daa[13*20+ 9] =   89.00;
-            daa[13*20+10] =  248.00; daa[13*20+11] =    4.00; daa[13*20+12] =   43.00; daa[14*20+ 0] =  194.00;
-            daa[14*20+ 1] =   74.00; daa[14*20+ 2] =   15.00; daa[14*20+ 3] =   15.00; daa[14*20+ 4] =   14.00;
-            daa[14*20+ 5] =  164.00; daa[14*20+ 6] =   18.00; daa[14*20+ 7] =   24.00; daa[14*20+ 8] =  115.00;
-            daa[14*20+ 9] =   10.00; daa[14*20+10] =  102.00; daa[14*20+11] =   21.00; daa[14*20+12] =   16.00;
-            daa[14*20+13] =   17.00; daa[15*20+ 0] =  378.00; daa[15*20+ 1] =  101.00; daa[15*20+ 2] =  503.00;
-            daa[15*20+ 3] =   59.00; daa[15*20+ 4] =  223.00; daa[15*20+ 5] =   53.00; daa[15*20+ 6] =   30.00;
-            daa[15*20+ 7] =  201.00; daa[15*20+ 8] =   73.00; daa[15*20+ 9] =   40.00; daa[15*20+10] =   59.00;
-            daa[15*20+11] =   47.00; daa[15*20+12] =   29.00; daa[15*20+13] =   92.00; daa[15*20+14] =  285.00;
-            daa[16*20+ 0] =  475.00; daa[16*20+ 1] =   64.00; daa[16*20+ 2] =  232.00; daa[16*20+ 3] =   38.00;
-            daa[16*20+ 4] =   42.00; daa[16*20+ 5] =   51.00; daa[16*20+ 6] =   32.00; daa[16*20+ 7] =   33.00;
-            daa[16*20+ 8] =   46.00; daa[16*20+ 9] =  245.00; daa[16*20+10] =   25.00; daa[16*20+11] =  103.00;
-            daa[16*20+12] =  226.00; daa[16*20+13] =   12.00; daa[16*20+14] =  118.00; daa[16*20+15] =  477.00;
-            daa[17*20+ 0] =    9.00; daa[17*20+ 1] =  126.00; daa[17*20+ 2] =    8.00; daa[17*20+ 3] =    4.00;
-            daa[17*20+ 4] =  115.00; daa[17*20+ 5] =   18.00; daa[17*20+ 6] =   10.00; daa[17*20+ 7] =   55.00;
-            daa[17*20+ 8] =    8.00; daa[17*20+ 9] =    9.00; daa[17*20+10] =   52.00; daa[17*20+11] =   10.00;
-            daa[17*20+12] =   24.00; daa[17*20+13] =   53.00; daa[17*20+14] =    6.00; daa[17*20+15] =   35.00;
-            daa[17*20+16] =   12.00; daa[18*20+ 0] =   11.00; daa[18*20+ 1] =   20.00; daa[18*20+ 2] =   70.00;
-            daa[18*20+ 3] =   46.00; daa[18*20+ 4] =  209.00; daa[18*20+ 5] =   24.00; daa[18*20+ 6] =    7.00;
-            daa[18*20+ 7] =    8.00; daa[18*20+ 8] =  573.00; daa[18*20+ 9] =   32.00; daa[18*20+10] =   24.00;
-            daa[18*20+11] =    8.00; daa[18*20+12] =   18.00; daa[18*20+13] =  536.00; daa[18*20+14] =   10.00;
-            daa[18*20+15] =   63.00; daa[18*20+16] =   21.00; daa[18*20+17] =   71.00; daa[19*20+ 0] =  298.00;
-            daa[19*20+ 1] =   17.00; daa[19*20+ 2] =   16.00; daa[19*20+ 3] =   31.00; daa[19*20+ 4] =   62.00;
-            daa[19*20+ 5] =   20.00; daa[19*20+ 6] =   45.00; daa[19*20+ 7] =   47.00; daa[19*20+ 8] =   11.00;
-            daa[19*20+ 9] =  961.00; daa[19*20+10] =  180.00; daa[19*20+11] =   14.00; daa[19*20+12] =  323.00;
-            daa[19*20+13] =   62.00; daa[19*20+14] =   23.00; daa[19*20+15] =   38.00; daa[19*20+16] =  112.00;
-            daa[19*20+17] =   25.00; daa[19*20+18] =   16.00;
-                    
-	    f[ 0] = 0.076748; f[ 1] = 0.051691; f[ 2] = 0.042645; f[ 3] = 0.051544;
-	    f[ 4] = 0.019803; f[ 5] = 0.040752; f[ 6] = 0.061830; f[ 7] = 0.073152;
-	    f[ 8] = 0.022944; f[ 9] = 0.053761; f[10] = 0.091904; f[11] = 0.058676;
-	    f[12] = 0.023826; f[13] = 0.040126; f[14] = 0.050901; f[15] = 0.068765;
-	    f[16] = 0.058565; f[17] = 0.014261; f[18] = 0.032102; f[19] = 0.066004;
-          }
-          break;
-        case  PLL_MTREV:
-          {
-            daa[ 1*20+ 0] =   23.18; daa[ 2*20+ 0] =   26.95; daa[ 2*20+ 1] =   13.24; daa[ 3*20+ 0] =   17.67;
-            daa[ 3*20+ 1] =    1.90; daa[ 3*20+ 2] =  794.38; daa[ 4*20+ 0] =   59.93; daa[ 4*20+ 1] =  103.33;
-            daa[ 4*20+ 2] =   58.94; daa[ 4*20+ 3] =    1.90; daa[ 5*20+ 0] =    1.90; daa[ 5*20+ 1] =  220.99;
-            daa[ 5*20+ 2] =  173.56; daa[ 5*20+ 3] =   55.28; daa[ 5*20+ 4] =   75.24; daa[ 6*20+ 0] =    9.77;
-            daa[ 6*20+ 1] =    1.90; daa[ 6*20+ 2] =   63.05; daa[ 6*20+ 3] =  583.55; daa[ 6*20+ 4] =    1.90;
-            daa[ 6*20+ 5] =  313.56; daa[ 7*20+ 0] =  120.71; daa[ 7*20+ 1] =   23.03; daa[ 7*20+ 2] =   53.30;
-            daa[ 7*20+ 3] =   56.77; daa[ 7*20+ 4] =   30.71; daa[ 7*20+ 5] =    6.75; daa[ 7*20+ 6] =   28.28;
-            daa[ 8*20+ 0] =   13.90; daa[ 8*20+ 1] =  165.23; daa[ 8*20+ 2] =  496.13; daa[ 8*20+ 3] =  113.99;
-            daa[ 8*20+ 4] =  141.49; daa[ 8*20+ 5] =  582.40; daa[ 8*20+ 6] =   49.12; daa[ 8*20+ 7] =    1.90;
-            daa[ 9*20+ 0] =   96.49; daa[ 9*20+ 1] =    1.90; daa[ 9*20+ 2] =   27.10; daa[ 9*20+ 3] =    4.34;
-            daa[ 9*20+ 4] =   62.73; daa[ 9*20+ 5] =    8.34; daa[ 9*20+ 6] =    3.31; daa[ 9*20+ 7] =    5.98;
-            daa[ 9*20+ 8] =   12.26; daa[10*20+ 0] =   25.46; daa[10*20+ 1] =   15.58; daa[10*20+ 2] =   15.16;
-            daa[10*20+ 3] =    1.90; daa[10*20+ 4] =   25.65; daa[10*20+ 5] =   39.70; daa[10*20+ 6] =    1.90;
-            daa[10*20+ 7] =    2.41; daa[10*20+ 8] =   11.49; daa[10*20+ 9] =  329.09; daa[11*20+ 0] =    8.36;
-            daa[11*20+ 1] =  141.40; daa[11*20+ 2] =  608.70; daa[11*20+ 3] =    2.31; daa[11*20+ 4] =    1.90;
-            daa[11*20+ 5] =  465.58; daa[11*20+ 6] =  313.86; daa[11*20+ 7] =   22.73; daa[11*20+ 8] =  127.67;
-            daa[11*20+ 9] =   19.57; daa[11*20+10] =   14.88; daa[12*20+ 0] =  141.88; daa[12*20+ 1] =    1.90;
-            daa[12*20+ 2] =   65.41; daa[12*20+ 3] =    1.90; daa[12*20+ 4] =    6.18; daa[12*20+ 5] =   47.37;
-            daa[12*20+ 6] =    1.90; daa[12*20+ 7] =    1.90; daa[12*20+ 8] =   11.97; daa[12*20+ 9] =  517.98;
-            daa[12*20+10] =  537.53; daa[12*20+11] =   91.37; daa[13*20+ 0] =    6.37; daa[13*20+ 1] =    4.69;
-            daa[13*20+ 2] =   15.20; daa[13*20+ 3] =    4.98; daa[13*20+ 4] =   70.80; daa[13*20+ 5] =   19.11;
-            daa[13*20+ 6] =    2.67; daa[13*20+ 7] =    1.90; daa[13*20+ 8] =   48.16; daa[13*20+ 9] =   84.67;
-            daa[13*20+10] =  216.06; daa[13*20+11] =    6.44; daa[13*20+12] =   90.82; daa[14*20+ 0] =   54.31;
-            daa[14*20+ 1] =   23.64; daa[14*20+ 2] =   73.31; daa[14*20+ 3] =   13.43; daa[14*20+ 4] =   31.26;
-            daa[14*20+ 5] =  137.29; daa[14*20+ 6] =   12.83; daa[14*20+ 7] =    1.90; daa[14*20+ 8] =   60.97;
-            daa[14*20+ 9] =   20.63; daa[14*20+10] =   40.10; daa[14*20+11] =   50.10; daa[14*20+12] =   18.84;
-            daa[14*20+13] =   17.31; daa[15*20+ 0] =  387.86; daa[15*20+ 1] =    6.04; daa[15*20+ 2] =  494.39;
-            daa[15*20+ 3] =   69.02; daa[15*20+ 4] =  277.05; daa[15*20+ 5] =   54.11; daa[15*20+ 6] =   54.71;
-            daa[15*20+ 7] =  125.93; daa[15*20+ 8] =   77.46; daa[15*20+ 9] =   47.70; daa[15*20+10] =   73.61;
-            daa[15*20+11] =  105.79; daa[15*20+12] =  111.16; daa[15*20+13] =   64.29; daa[15*20+14] =  169.90;
-            daa[16*20+ 0] =  480.72; daa[16*20+ 1] =    2.08; daa[16*20+ 2] =  238.46; daa[16*20+ 3] =   28.01;
-            daa[16*20+ 4] =  179.97; daa[16*20+ 5] =   94.93; daa[16*20+ 6] =   14.82; daa[16*20+ 7] =   11.17;
-            daa[16*20+ 8] =   44.78; daa[16*20+ 9] =  368.43; daa[16*20+10] =  126.40; daa[16*20+11] =  136.33;
-            daa[16*20+12] =  528.17; daa[16*20+13] =   33.85; daa[16*20+14] =  128.22; daa[16*20+15] =  597.21;
-            daa[17*20+ 0] =    1.90; daa[17*20+ 1] =   21.95; daa[17*20+ 2] =   10.68; daa[17*20+ 3] =   19.86;
-            daa[17*20+ 4] =   33.60; daa[17*20+ 5] =    1.90; daa[17*20+ 6] =    1.90; daa[17*20+ 7] =   10.92;
-            daa[17*20+ 8] =    7.08; daa[17*20+ 9] =    1.90; daa[17*20+10] =   32.44; daa[17*20+11] =   24.00;
-            daa[17*20+12] =   21.71; daa[17*20+13] =    7.84; daa[17*20+14] =    4.21; daa[17*20+15] =   38.58;
-            daa[17*20+16] =    9.99; daa[18*20+ 0] =    6.48; daa[18*20+ 1] =    1.90; daa[18*20+ 2] =  191.36;
-            daa[18*20+ 3] =   21.21; daa[18*20+ 4] =  254.77; daa[18*20+ 5] =   38.82; daa[18*20+ 6] =   13.12;
-            daa[18*20+ 7] =    3.21; daa[18*20+ 8] =  670.14; daa[18*20+ 9] =   25.01; daa[18*20+10] =   44.15;
-            daa[18*20+11] =   51.17; daa[18*20+12] =   39.96; daa[18*20+13] =  465.58; daa[18*20+14] =   16.21;
-            daa[18*20+15] =   64.92; daa[18*20+16] =   38.73; daa[18*20+17] =   26.25; daa[19*20+ 0] =  195.06;
-            daa[19*20+ 1] =    7.64; daa[19*20+ 2] =    1.90; daa[19*20+ 3] =    1.90; daa[19*20+ 4] =    1.90;
-            daa[19*20+ 5] =   19.00; daa[19*20+ 6] =   21.14; daa[19*20+ 7] =    2.53; daa[19*20+ 8] =    1.90;
-            daa[19*20+ 9] = 1222.94; daa[19*20+10] =   91.67; daa[19*20+11] =    1.90; daa[19*20+12] =  387.54;
-            daa[19*20+13] =    6.35; daa[19*20+14] =    8.23; daa[19*20+15] =    1.90; daa[19*20+16] =  204.54;
-            daa[19*20+17] =    5.37; daa[19*20+18] =    1.90;
-            
-            
-            f[ 0] = 0.072000; f[ 1] = 0.019000; f[ 2] = 0.039000; f[ 3] = 0.019000;
-            f[ 4] = 0.006000; f[ 5] = 0.025000; f[ 6] = 0.024000; f[ 7] = 0.056000;
-            f[ 8] = 0.028000; f[ 9] = 0.088000; f[10] = 0.169000; f[11] = 0.023000;
-            f[12] = 0.054000; f[13] = 0.061000; f[14] = 0.054000; f[15] = 0.072000;
-            f[16] = 0.086000; f[17] = 0.029000; f[18] = 0.033000; f[19] = 0.043000;
-          }
-          break;
-        case PLL_WAG:
-          {
-            daa[ 1*20+ 0] =  55.15710; daa[ 2*20+ 0] =  50.98480; daa[ 2*20+ 1] =  63.53460; 
-            daa[ 3*20+ 0] =  73.89980; daa[ 3*20+ 1] =  14.73040; daa[ 3*20+ 2] = 542.94200; 
-            daa[ 4*20+ 0] = 102.70400; daa[ 4*20+ 1] =  52.81910; daa[ 4*20+ 2] =  26.52560; 
-            daa[ 4*20+ 3] =   3.02949; daa[ 5*20+ 0] =  90.85980; daa[ 5*20+ 1] = 303.55000; 
-            daa[ 5*20+ 2] = 154.36400; daa[ 5*20+ 3] =  61.67830; daa[ 5*20+ 4] =   9.88179; 
-            daa[ 6*20+ 0] = 158.28500; daa[ 6*20+ 1] =  43.91570; daa[ 6*20+ 2] =  94.71980; 
-            daa[ 6*20+ 3] = 617.41600; daa[ 6*20+ 4] =   2.13520; daa[ 6*20+ 5] = 546.94700; 
-            daa[ 7*20+ 0] = 141.67200; daa[ 7*20+ 1] =  58.46650; daa[ 7*20+ 2] = 112.55600; 
-            daa[ 7*20+ 3] =  86.55840; daa[ 7*20+ 4] =  30.66740; daa[ 7*20+ 5] =  33.00520; 
-            daa[ 7*20+ 6] =  56.77170; daa[ 8*20+ 0] =  31.69540; daa[ 8*20+ 1] = 213.71500; 
-            daa[ 8*20+ 2] = 395.62900; daa[ 8*20+ 3] =  93.06760; daa[ 8*20+ 4] =  24.89720; 
-            daa[ 8*20+ 5] = 429.41100; daa[ 8*20+ 6] =  57.00250; daa[ 8*20+ 7] =  24.94100; 
-            daa[ 9*20+ 0] =  19.33350; daa[ 9*20+ 1] =  18.69790; daa[ 9*20+ 2] =  55.42360; 
-            daa[ 9*20+ 3] =   3.94370; daa[ 9*20+ 4] =  17.01350; daa[ 9*20+ 5] =  11.39170; 
-            daa[ 9*20+ 6] =  12.73950; daa[ 9*20+ 7] =   3.04501; daa[ 9*20+ 8] =  13.81900; 
-            daa[10*20+ 0] =  39.79150; daa[10*20+ 1] =  49.76710; daa[10*20+ 2] =  13.15280; 
-            daa[10*20+ 3] =   8.48047; daa[10*20+ 4] =  38.42870; daa[10*20+ 5] =  86.94890; 
-            daa[10*20+ 6] =  15.42630; daa[10*20+ 7] =   6.13037; daa[10*20+ 8] =  49.94620; 
-            daa[10*20+ 9] = 317.09700; daa[11*20+ 0] =  90.62650; daa[11*20+ 1] = 535.14200; 
-            daa[11*20+ 2] = 301.20100; daa[11*20+ 3] =  47.98550; daa[11*20+ 4] =   7.40339; 
-            daa[11*20+ 5] = 389.49000; daa[11*20+ 6] = 258.44300; daa[11*20+ 7] =  37.35580; 
-            daa[11*20+ 8] =  89.04320; daa[11*20+ 9] =  32.38320; daa[11*20+10] =  25.75550; 
-            daa[12*20+ 0] =  89.34960; daa[12*20+ 1] =  68.31620; daa[12*20+ 2] =  19.82210; 
-            daa[12*20+ 3] =  10.37540; daa[12*20+ 4] =  39.04820; daa[12*20+ 5] = 154.52600; 
-            daa[12*20+ 6] =  31.51240; daa[12*20+ 7] =  17.41000; daa[12*20+ 8] =  40.41410; 
-            daa[12*20+ 9] = 425.74600; daa[12*20+10] = 485.40200; daa[12*20+11] =  93.42760; 
-            daa[13*20+ 0] =  21.04940; daa[13*20+ 1] =  10.27110; daa[13*20+ 2] =   9.61621; 
-            daa[13*20+ 3] =   4.67304; daa[13*20+ 4] =  39.80200; daa[13*20+ 5] =   9.99208; 
-            daa[13*20+ 6] =   8.11339; daa[13*20+ 7] =   4.99310; daa[13*20+ 8] =  67.93710; 
-            daa[13*20+ 9] = 105.94700; daa[13*20+10] = 211.51700; daa[13*20+11] =   8.88360; 
-            daa[13*20+12] = 119.06300; daa[14*20+ 0] = 143.85500; daa[14*20+ 1] =  67.94890; 
-            daa[14*20+ 2] =  19.50810; daa[14*20+ 3] =  42.39840; daa[14*20+ 4] =  10.94040; 
-            daa[14*20+ 5] =  93.33720; daa[14*20+ 6] =  68.23550; daa[14*20+ 7] =  24.35700; 
-            daa[14*20+ 8] =  69.61980; daa[14*20+ 9] =   9.99288; daa[14*20+10] =  41.58440; 
-            daa[14*20+11] =  55.68960; daa[14*20+12] =  17.13290; daa[14*20+13] =  16.14440; 
-            daa[15*20+ 0] = 337.07900; daa[15*20+ 1] = 122.41900; daa[15*20+ 2] = 397.42300; 
-            daa[15*20+ 3] = 107.17600; daa[15*20+ 4] = 140.76600; daa[15*20+ 5] = 102.88700; 
-            daa[15*20+ 6] =  70.49390; daa[15*20+ 7] = 134.18200; daa[15*20+ 8] =  74.01690; 
-            daa[15*20+ 9] =  31.94400; daa[15*20+10] =  34.47390; daa[15*20+11] =  96.71300; 
-            daa[15*20+12] =  49.39050; daa[15*20+13] =  54.59310; daa[15*20+14] = 161.32800; 
-            daa[16*20+ 0] = 212.11100; daa[16*20+ 1] =  55.44130; daa[16*20+ 2] = 203.00600; 
-            daa[16*20+ 3] =  37.48660; daa[16*20+ 4] =  51.29840; daa[16*20+ 5] =  85.79280; 
-            daa[16*20+ 6] =  82.27650; daa[16*20+ 7] =  22.58330; daa[16*20+ 8] =  47.33070; 
-            daa[16*20+ 9] = 145.81600; daa[16*20+10] =  32.66220; daa[16*20+11] = 138.69800; 
-            daa[16*20+12] = 151.61200; daa[16*20+13] =  17.19030; daa[16*20+14] =  79.53840; 
-            daa[16*20+15] = 437.80200; daa[17*20+ 0] =  11.31330; daa[17*20+ 1] = 116.39200; 
-            daa[17*20+ 2] =   7.19167; daa[17*20+ 3] =  12.97670; daa[17*20+ 4] =  71.70700; 
-            daa[17*20+ 5] =  21.57370; daa[17*20+ 6] =  15.65570; daa[17*20+ 7] =  33.69830; 
-            daa[17*20+ 8] =  26.25690; daa[17*20+ 9] =  21.24830; daa[17*20+10] =  66.53090; 
-            daa[17*20+11] =  13.75050; daa[17*20+12] =  51.57060; daa[17*20+13] = 152.96400; 
-            daa[17*20+14] =  13.94050; daa[17*20+15] =  52.37420; daa[17*20+16] =  11.08640; 
-            daa[18*20+ 0] =  24.07350; daa[18*20+ 1] =  38.15330; daa[18*20+ 2] = 108.60000; 
-            daa[18*20+ 3] =  32.57110; daa[18*20+ 4] =  54.38330; daa[18*20+ 5] =  22.77100; 
-            daa[18*20+ 6] =  19.63030; daa[18*20+ 7] =  10.36040; daa[18*20+ 8] = 387.34400; 
-            daa[18*20+ 9] =  42.01700; daa[18*20+10] =  39.86180; daa[18*20+11] =  13.32640; 
-            daa[18*20+12] =  42.84370; daa[18*20+13] = 645.42800; daa[18*20+14] =  21.60460; 
-            daa[18*20+15] =  78.69930; daa[18*20+16] =  29.11480; daa[18*20+17] = 248.53900; 
-            daa[19*20+ 0] = 200.60100; daa[19*20+ 1] =  25.18490; daa[19*20+ 2] =  19.62460; 
-            daa[19*20+ 3] =  15.23350; daa[19*20+ 4] = 100.21400; daa[19*20+ 5] =  30.12810; 
-            daa[19*20+ 6] =  58.87310; daa[19*20+ 7] =  18.72470; daa[19*20+ 8] =  11.83580; 
-            daa[19*20+ 9] = 782.13000; daa[19*20+10] = 180.03400; daa[19*20+11] =  30.54340; 
-            daa[19*20+12] = 205.84500; daa[19*20+13] =  64.98920; daa[19*20+14] =  31.48870; 
-            daa[19*20+15] =  23.27390; daa[19*20+16] = 138.82300; daa[19*20+17] =  36.53690; 
-            daa[19*20+18] =  31.47300; 
-                   
-	    f[0]  = 0.0866279; f[1]  = 0.043972;  f[2]  = 0.0390894; f[3]  = 0.0570451;
-	    f[4]  = 0.0193078; f[5]  = 0.0367281; f[6]  = 0.0580589; f[7]  = 0.0832518;
-	    f[8]  = 0.0244313; f[9]  = 0.048466;  f[10] = 0.086209;  f[11] = 0.0620286;
-	    f[12] = 0.0195027; f[13] = 0.0384319; f[14] = 0.0457631; f[15] = 0.0695179;
-	    f[16] = 0.0610127; f[17] = 0.0143859; f[18] = 0.0352742; f[19] = 0.0708957;   
-          }
-          break;
-        case PLL_RTREV:
-          {
-            daa[1*20+0]= 34;         daa[2*20+0]= 51;         daa[2*20+1]= 35;         daa[3*20+0]= 10;         
-            daa[3*20+1]= 30;         daa[3*20+2]= 384;        daa[4*20+0]= 439;        daa[4*20+1]= 92;         
-            daa[4*20+2]= 128;        daa[4*20+3]= 1;          daa[5*20+0]= 32;         daa[5*20+1]= 221;        
-            daa[5*20+2]= 236;        daa[5*20+3]= 78;         daa[5*20+4]= 70;         daa[6*20+0]= 81;         
-            daa[6*20+1]= 10;         daa[6*20+2]= 79;         daa[6*20+3]= 542;        daa[6*20+4]= 1;          
-            daa[6*20+5]= 372;        daa[7*20+0]= 135;        daa[7*20+1]= 41;         daa[7*20+2]= 94;         
-            daa[7*20+3]= 61;         daa[7*20+4]= 48;         daa[7*20+5]= 18;         daa[7*20+6]= 70;         
-            daa[8*20+0]= 30;         daa[8*20+1]= 90;         daa[8*20+2]= 320;        daa[8*20+3]= 91;         
-            daa[8*20+4]= 124;        daa[8*20+5]= 387;        daa[8*20+6]= 34;         daa[8*20+7]= 68;         
-            daa[9*20+0]= 1;          daa[9*20+1]= 24;         daa[9*20+2]= 35;         daa[9*20+3]= 1;          
-            daa[9*20+4]= 104;        daa[9*20+5]= 33;         daa[9*20+6]= 1;          daa[9*20+7]= 1;          
-            daa[9*20+8]= 34;         daa[10*20+0]= 45;        daa[10*20+1]= 18;        daa[10*20+2]= 15;        
-            daa[10*20+3]= 5;         daa[10*20+4]= 110;       daa[10*20+5]= 54;        daa[10*20+6]= 21;        
-            daa[10*20+7]= 3;         daa[10*20+8]= 51;        daa[10*20+9]= 385;       daa[11*20+0]= 38;        
-            daa[11*20+1]= 593;       daa[11*20+2]= 123;       daa[11*20+3]= 20;        daa[11*20+4]= 16;        
-            daa[11*20+5]= 309;       daa[11*20+6]= 141;       daa[11*20+7]= 30;        daa[11*20+8]= 76;        
-            daa[11*20+9]= 34;        daa[11*20+10]= 23;       daa[12*20+0]= 235;       daa[12*20+1]= 57;        
-            daa[12*20+2]= 1;         daa[12*20+3]= 1;         daa[12*20+4]= 156;       daa[12*20+5]= 158;       
-            daa[12*20+6]= 1;         daa[12*20+7]= 37;        daa[12*20+8]= 116;       daa[12*20+9]= 375;       
-            daa[12*20+10]= 581;      daa[12*20+11]= 134;      daa[13*20+0]= 1;         daa[13*20+1]= 7;         
-            daa[13*20+2]= 49;        daa[13*20+3]= 1;         daa[13*20+4]= 70;        daa[13*20+5]= 1;         
-            daa[13*20+6]= 1;         daa[13*20+7]= 7;         daa[13*20+8]= 141;       daa[13*20+9]= 64;        
-            daa[13*20+10]= 179;      daa[13*20+11]= 14;       daa[13*20+12]= 247;      daa[14*20+0]= 97;        
-            daa[14*20+1]= 24;        daa[14*20+2]= 33;        daa[14*20+3]= 55;        daa[14*20+4]= 1;         
-            daa[14*20+5]= 68;        daa[14*20+6]= 52;        daa[14*20+7]= 17;        daa[14*20+8]= 44;        
-            daa[14*20+9]= 10;        daa[14*20+10]= 22;       daa[14*20+11]= 43;       daa[14*20+12]= 1;        
-            daa[14*20+13]= 11;       daa[15*20+0]= 460;       daa[15*20+1]= 102;       daa[15*20+2]= 294;       
-            daa[15*20+3]= 136;       daa[15*20+4]= 75;        daa[15*20+5]= 225;       daa[15*20+6]= 95;        
-            daa[15*20+7]= 152;       daa[15*20+8]= 183;       daa[15*20+9]= 4;         daa[15*20+10]= 24;       
-            daa[15*20+11]= 77;       daa[15*20+12]= 1;        daa[15*20+13]= 20;       daa[15*20+14]= 134;      
-            daa[16*20+0]= 258;       daa[16*20+1]= 64;        daa[16*20+2]= 148;       daa[16*20+3]= 55;        
-            daa[16*20+4]= 117;       daa[16*20+5]= 146;       daa[16*20+6]= 82;        daa[16*20+7]= 7;         
-            daa[16*20+8]= 49;        daa[16*20+9]= 72;        daa[16*20+10]= 25;       daa[16*20+11]= 110;      
-            daa[16*20+12]= 131;      daa[16*20+13]= 69;       daa[16*20+14]= 62;       daa[16*20+15]= 671;      
-            daa[17*20+0]= 5;         daa[17*20+1]= 13;        daa[17*20+2]= 16;        daa[17*20+3]= 1;         
-            daa[17*20+4]= 55;        daa[17*20+5]= 10;        daa[17*20+6]= 17;        daa[17*20+7]= 23;        
-            daa[17*20+8]= 48;        daa[17*20+9]= 39;        daa[17*20+10]= 47;       daa[17*20+11]= 6;        
-            daa[17*20+12]= 111;      daa[17*20+13]= 182;      daa[17*20+14]= 9;        daa[17*20+15]= 14;       
-            daa[17*20+16]= 1;        daa[18*20+0]= 55;        daa[18*20+1]= 47;        daa[18*20+2]= 28;        
-            daa[18*20+3]= 1;         daa[18*20+4]= 131;       daa[18*20+5]= 45;        daa[18*20+6]= 1;         
-            daa[18*20+7]= 21;        daa[18*20+8]= 307;       daa[18*20+9]= 26;        daa[18*20+10]= 64;       
-            daa[18*20+11]= 1;        daa[18*20+12]= 74;       daa[18*20+13]= 1017;     daa[18*20+14]= 14;       
-            daa[18*20+15]= 31;       daa[18*20+16]= 34;       daa[18*20+17]= 176;      daa[19*20+0]= 197;       
-            daa[19*20+1]= 29;        daa[19*20+2]= 21;        daa[19*20+3]= 6;         daa[19*20+4]= 295;       
-            daa[19*20+5]= 36;        daa[19*20+6]= 35;        daa[19*20+7]= 3;         daa[19*20+8]= 1;         
-            daa[19*20+9]= 1048;      daa[19*20+10]= 112;      daa[19*20+11]= 19;       daa[19*20+12]= 236;      
-            daa[19*20+13]= 92;       daa[19*20+14]= 25;       daa[19*20+15]= 39;       daa[19*20+16]= 196;      
-            daa[19*20+17]= 26;       daa[19*20+18]= 59;       
-            
-            f[0]= 0.0646;           f[1]= 0.0453;           f[2]= 0.0376;           f[3]= 0.0422;           
-            f[4]= 0.0114;           f[5]= 0.0606;           f[6]= 0.0607;           f[7]= 0.0639;           
-            f[8]= 0.0273;           f[9]= 0.0679;           f[10]= 0.1018;          f[11]= 0.0751;          
-            f[12]= 0.015;           f[13]= 0.0287;          f[14]= 0.0681;          f[15]= 0.0488;          
-            f[16]= 0.0622;          f[17]= 0.0251;          f[18]= 0.0318;          f[19]= 0.0619;                  
-          }
-          break;
-        case PLL_CPREV:
-          {
-            daa[1*20+0]= 105;        daa[2*20+0]= 227;        daa[2*20+1]= 357;        daa[3*20+0]= 175;        
-            daa[3*20+1]= 43;         daa[3*20+2]= 4435;       daa[4*20+0]= 669;        daa[4*20+1]= 823;        
-            daa[4*20+2]= 538;        daa[4*20+3]= 10;         daa[5*20+0]= 157;        daa[5*20+1]= 1745;       
-            daa[5*20+2]= 768;        daa[5*20+3]= 400;        daa[5*20+4]= 10;         daa[6*20+0]= 499;        
-            daa[6*20+1]= 152;        daa[6*20+2]= 1055;       daa[6*20+3]= 3691;       daa[6*20+4]= 10;         
-            daa[6*20+5]= 3122;       daa[7*20+0]= 665;        daa[7*20+1]= 243;        daa[7*20+2]= 653;        
-            daa[7*20+3]= 431;        daa[7*20+4]= 303;        daa[7*20+5]= 133;        daa[7*20+6]= 379;        
-            daa[8*20+0]= 66;         daa[8*20+1]= 715;        daa[8*20+2]= 1405;       daa[8*20+3]= 331;        
-            daa[8*20+4]= 441;        daa[8*20+5]= 1269;       daa[8*20+6]= 162;        daa[8*20+7]= 19;         
-            daa[9*20+0]= 145;        daa[9*20+1]= 136;        daa[9*20+2]= 168;        daa[9*20+3]= 10;         
-            daa[9*20+4]= 280;        daa[9*20+5]= 92;         daa[9*20+6]= 148;        daa[9*20+7]= 40;         
-            daa[9*20+8]= 29;         daa[10*20+0]= 197;       daa[10*20+1]= 203;       daa[10*20+2]= 113;       
-            daa[10*20+3]= 10;        daa[10*20+4]= 396;       daa[10*20+5]= 286;       daa[10*20+6]= 82;        
-            daa[10*20+7]= 20;        daa[10*20+8]= 66;        daa[10*20+9]= 1745;      daa[11*20+0]= 236;       
-            daa[11*20+1]= 4482;      daa[11*20+2]= 2430;      daa[11*20+3]= 412;       daa[11*20+4]= 48;        
-            daa[11*20+5]= 3313;      daa[11*20+6]= 2629;      daa[11*20+7]= 263;       daa[11*20+8]= 305;       
-            daa[11*20+9]= 345;       daa[11*20+10]= 218;      daa[12*20+0]= 185;       daa[12*20+1]= 125;       
-            daa[12*20+2]= 61;        daa[12*20+3]= 47;        daa[12*20+4]= 159;       daa[12*20+5]= 202;       
-            daa[12*20+6]= 113;       daa[12*20+7]= 21;        daa[12*20+8]= 10;        daa[12*20+9]= 1772;      
-            daa[12*20+10]= 1351;     daa[12*20+11]= 193;      daa[13*20+0]= 68;        daa[13*20+1]= 53;        
-            daa[13*20+2]= 97;        daa[13*20+3]= 22;        daa[13*20+4]= 726;       daa[13*20+5]= 10;        
-            daa[13*20+6]= 145;       daa[13*20+7]= 25;        daa[13*20+8]= 127;       daa[13*20+9]= 454;       
-            daa[13*20+10]= 1268;     daa[13*20+11]= 72;       daa[13*20+12]= 327;      daa[14*20+0]= 490;       
-            daa[14*20+1]= 87;        daa[14*20+2]= 173;       daa[14*20+3]= 170;       daa[14*20+4]= 285;       
-            daa[14*20+5]= 323;       daa[14*20+6]= 185;       daa[14*20+7]= 28;        daa[14*20+8]= 152;       
-            daa[14*20+9]= 117;       daa[14*20+10]= 219;      daa[14*20+11]= 302;      daa[14*20+12]= 100;      
-            daa[14*20+13]= 43;       daa[15*20+0]= 2440;      daa[15*20+1]= 385;       daa[15*20+2]= 2085;      
-            daa[15*20+3]= 590;       daa[15*20+4]= 2331;      daa[15*20+5]= 396;       daa[15*20+6]= 568;       
-            daa[15*20+7]= 691;       daa[15*20+8]= 303;       daa[15*20+9]= 216;       daa[15*20+10]= 516;      
-            daa[15*20+11]= 868;      daa[15*20+12]= 93;       daa[15*20+13]= 487;      daa[15*20+14]= 1202;     
-            daa[16*20+0]= 1340;      daa[16*20+1]= 314;       daa[16*20+2]= 1393;      daa[16*20+3]= 266;       
-            daa[16*20+4]= 576;       daa[16*20+5]= 241;       daa[16*20+6]= 369;       daa[16*20+7]= 92;        
-            daa[16*20+8]= 32;        daa[16*20+9]= 1040;      daa[16*20+10]= 156;      daa[16*20+11]= 918;      
-            daa[16*20+12]= 645;      daa[16*20+13]= 148;      daa[16*20+14]= 260;      daa[16*20+15]= 2151;     
-            daa[17*20+0]= 14;        daa[17*20+1]= 230;       daa[17*20+2]= 40;        daa[17*20+3]= 18;        
-            daa[17*20+4]= 435;       daa[17*20+5]= 53;        daa[17*20+6]= 63;        daa[17*20+7]= 82;        
-            daa[17*20+8]= 69;        daa[17*20+9]= 42;        daa[17*20+10]= 159;      daa[17*20+11]= 10;       
-            daa[17*20+12]= 86;       daa[17*20+13]= 468;      daa[17*20+14]= 49;       daa[17*20+15]= 73;       
-            daa[17*20+16]= 29;       daa[18*20+0]= 56;        daa[18*20+1]= 323;       daa[18*20+2]= 754;       
-            daa[18*20+3]= 281;       daa[18*20+4]= 1466;      daa[18*20+5]= 391;       daa[18*20+6]= 142;       
-            daa[18*20+7]= 10;        daa[18*20+8]= 1971;      daa[18*20+9]= 89;        daa[18*20+10]= 189;      
-            daa[18*20+11]= 247;      daa[18*20+12]= 215;      daa[18*20+13]= 2370;     daa[18*20+14]= 97;       
-            daa[18*20+15]= 522;      daa[18*20+16]= 71;       daa[18*20+17]= 346;      daa[19*20+0]= 968;       
-            daa[19*20+1]= 92;        daa[19*20+2]= 83;        daa[19*20+3]= 75;        daa[19*20+4]= 592;       
-            daa[19*20+5]= 54;        daa[19*20+6]= 200;       daa[19*20+7]= 91;        daa[19*20+8]= 25;        
-            daa[19*20+9]= 4797;      daa[19*20+10]= 865;      daa[19*20+11]= 249;      daa[19*20+12]= 475;      
-            daa[19*20+13]= 317;      daa[19*20+14]= 122;      daa[19*20+15]= 167;      daa[19*20+16]= 760;      
-            daa[19*20+17]= 10;       daa[19*20+18]= 119;      
-            
-            f[0]= 0.076;            f[1]= 0.062;            f[2]= 0.041;            f[3]= 0.037;            
-            f[4]= 0.009;            f[5]= 0.038;            f[6]= 0.049;            f[7]= 0.084;            
-            f[8]= 0.025;            f[9]= 0.081;            f[10]= 0.101;           f[11]= 0.05;            
-            f[12]= 0.022;           f[13]= 0.051;           f[14]= 0.043;           f[15]= 0.062;           
-            f[16]= 0.054;           f[17]= 0.018;           f[18]= 0.031;           f[19]= 0.066; 
-          }
-          break;
-        case PLL_VT:
-          {
-            /*
-              daa[1*20+0]= 0.233108;   daa[2*20+0]= 0.199097;   daa[2*20+1]= 0.210797;   daa[3*20+0]= 0.265145;   
-              daa[3*20+1]= 0.105191;   daa[3*20+2]= 0.883422;   daa[4*20+0]= 0.227333;   daa[4*20+1]= 0.031726;   
-              daa[4*20+2]= 0.027495;   daa[4*20+3]= 0.010313;   daa[5*20+0]= 0.310084;   daa[5*20+1]= 0.493763;   
-              daa[5*20+2]= 0.2757;     daa[5*20+3]= 0.205842;   daa[5*20+4]= 0.004315;   daa[6*20+0]= 0.567957;   
-              daa[6*20+1]= 0.25524;    daa[6*20+2]= 0.270417;   daa[6*20+3]= 1.599461;   daa[6*20+4]= 0.005321;   
-              daa[6*20+5]= 0.960976;   daa[7*20+0]= 0.876213;   daa[7*20+1]= 0.156945;   daa[7*20+2]= 0.362028;   
-              daa[7*20+3]= 0.311718;   daa[7*20+4]= 0.050876;   daa[7*20+5]= 0.12866;    daa[7*20+6]= 0.250447;   
-              daa[8*20+0]= 0.078692;   daa[8*20+1]= 0.213164;   daa[8*20+2]= 0.290006;   daa[8*20+3]= 0.134252;   
-              daa[8*20+4]= 0.016695;   daa[8*20+5]= 0.315521;   daa[8*20+6]= 0.104458;   daa[8*20+7]= 0.058131;   
-              daa[9*20+0]= 0.222972;   daa[9*20+1]= 0.08151;    daa[9*20+2]= 0.087225;   daa[9*20+3]= 0.01172;    
-              daa[9*20+4]= 0.046398;   daa[9*20+5]= 0.054602;   daa[9*20+6]= 0.046589;   daa[9*20+7]= 0.051089;   
-              daa[9*20+8]= 0.020039;   daa[10*20+0]= 0.42463;   daa[10*20+1]= 0.192364;  daa[10*20+2]= 0.069245;  
-              daa[10*20+3]= 0.060863;  daa[10*20+4]= 0.091709;  daa[10*20+5]= 0.24353;   daa[10*20+6]= 0.151924;  
-              daa[10*20+7]= 0.087056;  daa[10*20+8]= 0.103552;  daa[10*20+9]= 2.08989;   daa[11*20+0]= 0.393245;  
-              daa[11*20+1]= 1.755838;  daa[11*20+2]= 0.50306;   daa[11*20+3]= 0.261101;  daa[11*20+4]= 0.004067;  
-              daa[11*20+5]= 0.738208;  daa[11*20+6]= 0.88863;   daa[11*20+7]= 0.193243;  daa[11*20+8]= 0.153323;  
-              daa[11*20+9]= 0.093181;  daa[11*20+10]= 0.201204; daa[12*20+0]= 0.21155;   daa[12*20+1]= 0.08793;   
-              daa[12*20+2]= 0.05742;   daa[12*20+3]= 0.012182;  daa[12*20+4]= 0.02369;   daa[12*20+5]= 0.120801;  
-              daa[12*20+6]= 0.058643;  daa[12*20+7]= 0.04656;   daa[12*20+8]= 0.021157;  daa[12*20+9]= 0.493845;  
-              daa[12*20+10]= 1.105667; daa[12*20+11]= 0.096474; daa[13*20+0]= 0.116646;  daa[13*20+1]= 0.042569;  
-              daa[13*20+2]= 0.039769;  daa[13*20+3]= 0.016577;  daa[13*20+4]= 0.051127;  daa[13*20+5]= 0.026235;  
-              daa[13*20+6]= 0.028168;  daa[13*20+7]= 0.050143;  daa[13*20+8]= 0.079807;  daa[13*20+9]= 0.32102;   
-              daa[13*20+10]= 0.946499; daa[13*20+11]= 0.038261; daa[13*20+12]= 0.173052; daa[14*20+0]= 0.399143;  
-              daa[14*20+1]= 0.12848;   daa[14*20+2]= 0.083956;  daa[14*20+3]= 0.160063;  daa[14*20+4]= 0.011137;  
-              daa[14*20+5]= 0.15657;   daa[14*20+6]= 0.205134;  daa[14*20+7]= 0.124492;  daa[14*20+8]= 0.078892;  
-              daa[14*20+9]= 0.054797;  daa[14*20+10]= 0.169784; daa[14*20+11]= 0.212302; daa[14*20+12]= 0.010363; 
-              daa[14*20+13]= 0.042564; daa[15*20+0]= 1.817198;  daa[15*20+1]= 0.292327;  daa[15*20+2]= 0.847049;  
-              daa[15*20+3]= 0.461519;  daa[15*20+4]= 0.17527;   daa[15*20+5]= 0.358017;  daa[15*20+6]= 0.406035;  
-              daa[15*20+7]= 0.612843;  daa[15*20+8]= 0.167406;  daa[15*20+9]= 0.081567;  daa[15*20+10]= 0.214977; 
-              daa[15*20+11]= 0.400072; daa[15*20+12]= 0.090515; daa[15*20+13]= 0.138119; daa[15*20+14]= 0.430431; 
-              daa[16*20+0]= 0.877877;  daa[16*20+1]= 0.204109;  daa[16*20+2]= 0.471268;  daa[16*20+3]= 0.178197;  
-              daa[16*20+4]= 0.079511;  daa[16*20+5]= 0.248992;  daa[16*20+6]= 0.321028;  daa[16*20+7]= 0.136266;  
-              daa[16*20+8]= 0.101117;  daa[16*20+9]= 0.376588;  daa[16*20+10]= 0.243227; daa[16*20+11]= 0.446646; 
-              daa[16*20+12]= 0.184609; daa[16*20+13]= 0.08587;  daa[16*20+14]= 0.207143; daa[16*20+15]= 1.767766; 
-              daa[17*20+0]= 0.030309;  daa[17*20+1]= 0.046417;  daa[17*20+2]= 0.010459;  daa[17*20+3]= 0.011393;  
-              daa[17*20+4]= 0.007732;  daa[17*20+5]= 0.021248;  daa[17*20+6]= 0.018844;  daa[17*20+7]= 0.02399;   
-              daa[17*20+8]= 0.020009;  daa[17*20+9]= 0.034954;  daa[17*20+10]= 0.083439; daa[17*20+11]= 0.023321; 
-              daa[17*20+12]= 0.022019; daa[17*20+13]= 0.12805;  daa[17*20+14]= 0.014584; daa[17*20+15]= 0.035933; 
-              daa[17*20+16]= 0.020437; daa[18*20+0]= 0.087061;  daa[18*20+1]= 0.09701;   daa[18*20+2]= 0.093268;  
-              daa[18*20+3]= 0.051664;  daa[18*20+4]= 0.042823;  daa[18*20+5]= 0.062544;  daa[18*20+6]= 0.0552;    
-              daa[18*20+7]= 0.037568;  daa[18*20+8]= 0.286027;  daa[18*20+9]= 0.086237;  daa[18*20+10]= 0.189842; 
-              daa[18*20+11]= 0.068689; daa[18*20+12]= 0.073223; daa[18*20+13]= 0.898663; daa[18*20+14]= 0.032043; 
-              daa[18*20+15]= 0.121979; daa[18*20+16]= 0.094617; daa[18*20+17]= 0.124746; daa[19*20+0]= 1.230985;  
-              daa[19*20+1]= 0.113146;  daa[19*20+2]= 0.049824;  daa[19*20+3]= 0.048769;  daa[19*20+4]= 0.163831;  
-              daa[19*20+5]= 0.112027;  daa[19*20+6]= 0.205868;  daa[19*20+7]= 0.082579;  daa[19*20+8]= 0.068575;  
-              daa[19*20+9]= 3.65443;   daa[19*20+10]= 1.337571; daa[19*20+11]= 0.144587; daa[19*20+12]= 0.307309; 
-              daa[19*20+13]= 0.247329; daa[19*20+14]= 0.129315; daa[19*20+15]= 0.1277;   daa[19*20+16]= 0.740372; 
-              daa[19*20+17]= 0.022134; daa[19*20+18]= 0.125733;                     
-              
-              f[0]  = 0.07900;         f[1]= 0.05100;        f[2]  = 0.04200;         f[3]= 0.05300;         
-              f[4]  = 0.01500;         f[5]= 0.03700;        f[6]  = 0.06200;         f[7]= 0.07100;         
-              f[8]  = 0.02300;         f[9]= 0.06200;        f[10] = 0.09600;        f[11]= 0.05700;        
-              f[12] = 0.02400;        f[13]= 0.04300;        f[14] = 0.04400;        f[15]= 0.06400;        
-              f[16] = 0.05600;        f[17]= 0.01300;        f[18] = 0.03500;        f[19]= 0.07300; 
-            */
-
-            daa[1*20+0]=   1.2412691067876198;
-            daa[2*20+0]=   1.2184237953498958;
-            daa[2*20+1]=   1.5720770753326880;
-            daa[3*20+0]=   1.3759368509441177;
-            daa[3*20+1]=   0.7550654439001206;
-            daa[3*20+2]=   7.8584219153689405;
-            daa[4*20+0]=   2.4731223087544874;
-            daa[4*20+1]=   1.4414262567428417;
-            daa[4*20+2]=   0.9784679122774127;
-            daa[4*20+3]=   0.2272488448121475;
-            daa[5*20+0]=   2.2155167805137470;
-            daa[5*20+1]=   5.5120819705248678;
-            daa[5*20+2]=   3.0143201670924822;
-            daa[5*20+3]=   1.6562495638176040;
-            daa[5*20+4]=   0.4587469126746136;
-            daa[6*20+0]=   2.3379911207495061;
-            daa[6*20+1]=   1.3542404860613146;
-            daa[6*20+2]=   2.0093434778398112;
-            daa[6*20+3]=   9.6883451875685065;
-            daa[6*20+4]=   0.4519167943192672;
-            daa[6*20+5]=   6.8124601839937675;
-            daa[7*20+0]=   3.3386555146457697;
-            daa[7*20+1]=   1.3121700301622004;
-            daa[7*20+2]=   2.4117632898861809;
-            daa[7*20+3]=   1.9142079025990228;
-            daa[7*20+4]=   1.1034605684472507;
-            daa[7*20+5]=   0.8776110594765502;
-            daa[7*20+6]=   1.3860121390169038;
-            daa[8*20+0]=   0.9615841926910841;
-            daa[8*20+1]=   4.9238668283945266;
-            daa[8*20+2]=   6.1974384977884114;
-            daa[8*20+3]=   2.1459640610133781;
-            daa[8*20+4]=   1.5196756759380692;
-            daa[8*20+5]=   7.9943228564946525;
-            daa[8*20+6]=   1.6360079688522375;
-            daa[8*20+7]=   0.8561248973045037;
-            daa[9*20+0]=   0.8908203061925510;
-            daa[9*20+1]=   0.4323005487925516;
-            daa[9*20+2]=   0.9179291175331520;
-            daa[9*20+3]=   0.2161660372725585;
-            daa[9*20+4]=   0.9126668032539315;
-            daa[9*20+5]=   0.4882733432879921;
-            daa[9*20+6]=   0.4035497929633328;
-            daa[9*20+7]=   0.2888075033037488;
-            daa[9*20+8]=   0.5787937115407940;
-            daa[10*20+0]=  1.0778497408764076;
-            daa[10*20+1]=  0.8386701149158265;
-            daa[10*20+2]=  0.4098311270816011;
-            daa[10*20+3]=  0.3574207468998517;
-            daa[10*20+4]=  1.4081315998413697;
-            daa[10*20+5]=  1.3318097154194044;
-            daa[10*20+6]=  0.5610717242294755;
-            daa[10*20+7]=  0.3578662395745526;
-            daa[10*20+8]=  1.0765007949562073;
-            daa[10*20+9]=  6.0019110258426362;
-            daa[11*20+0]=  1.4932055816372476;
-            daa[11*20+1]=  10.017330817366002;
-            daa[11*20+2]=  4.4034547578962568;
-            daa[11*20+3]=  1.4521790561663968;
-            daa[11*20+4]=  0.3371091785647479;
-            daa[11*20+5]=  6.0519085243118811;
-            daa[11*20+6]=  4.3290086529582830;
-            daa[11*20+7]=  0.8945563662345198;
-            daa[11*20+8]=  1.8085136096039203;
-            daa[11*20+9]=  0.6244297525127139;
-            daa[11*20+10]= 0.5642322882556321;
-            daa[12*20+0]=  1.9006455961717605;
-            daa[12*20+1]=  1.2488638689609959;
-            daa[12*20+2]=  0.9378803706165143;
-            daa[12*20+3]=  0.4075239926000898;
-            daa[12*20+4]=  1.2213054800811556;
-            daa[12*20+5]=  1.9106190827629084;
-            daa[12*20+6]=  0.7471936218068498;
-            daa[12*20+7]=  0.5954812791740037;
-            daa[12*20+8]=  1.3808291710019667;
-            daa[12*20+9]=  6.7597899772045418;
-            daa[12*20+10]= 8.0327792947421148;
-            daa[12*20+11]= 1.7129670976916258;
-            daa[13*20+0]=  0.6883439026872615;
-            daa[13*20+1]=  0.4224945197276290;
-            daa[13*20+2]=  0.5044944273324311;
-            daa[13*20+3]=  0.1675129724559251;
-            daa[13*20+4]=  1.6953951980808002;
-            daa[13*20+5]=  0.3573432522499545;
-            daa[13*20+6]=  0.2317194387691585;
-            daa[13*20+7]=  0.3693722640980460;
-            daa[13*20+8]=  1.3629765501081097;
-            daa[13*20+9]=  2.2864286949316077;
-            daa[13*20+10]= 4.3611548063555778;
-            daa[13*20+11]= 0.3910559903834828;
-            daa[13*20+12]= 2.3201373546296349;
-            daa[14*20+0]=  2.7355620089953550;
-            daa[14*20+1]=  1.3091837782420783;
-            daa[14*20+2]=  0.7103720531974738;
-            daa[14*20+3]=  1.0714605979577547;
-            daa[14*20+4]=  0.4326227078645523;
-            daa[14*20+5]=  2.3019177728300728;
-            daa[14*20+6]=  1.5132807416252063;
-            daa[14*20+7]=  0.7744933618134962;
-            daa[14*20+8]=  1.8370555852070649;
-            daa[14*20+9]=  0.4811402387911145;
-            daa[14*20+10]= 1.0084320519837335;
-            daa[14*20+11]= 1.3918935593582853;
-            daa[14*20+12]= 0.4953193808676289;
-            daa[14*20+13]= 0.3746821107962129;
-            daa[15*20+0]=  6.4208961859142883;
-            daa[15*20+1]=  1.9202994262316166;
-            daa[15*20+2]=  6.1234512396801764;
-            daa[15*20+3]=  2.2161944596741829;
-            daa[15*20+4]=  3.6366815408744255;
-            daa[15*20+5]=  2.3193703643237220;
-            daa[15*20+6]=  1.8273535587773553;
-            daa[15*20+7]=  3.0637776193717610;
-            daa[15*20+8]=  1.9699895187387506;
-            daa[15*20+9]=  0.6047491507504744;
-            daa[15*20+10]= 0.8953754669269811;
-            daa[15*20+11]= 1.9776630140912268;
-            daa[15*20+12]= 1.0657482318076852;
-            daa[15*20+13]= 1.1079144700606407;
-            daa[15*20+14]= 3.5465914843628927;
-            daa[16*20+0]=  5.2892514169776437;
-            daa[16*20+1]=  1.3363401740560601;
-            daa[16*20+2]=  3.8852506105922231;
-            daa[16*20+3]=  1.5066839872944762;
-            daa[16*20+4]=  1.7557065205837685;
-            daa[16*20+5]=  2.1576510103471440;
-            daa[16*20+6]=  1.5839981708584689;
-            daa[16*20+7]=  0.7147489676267383;
-            daa[16*20+8]=  1.6136654573285647;
-            daa[16*20+9]=  2.6344778384442731;
-            daa[16*20+10]= 1.0192004372506540;
-            daa[16*20+11]= 2.5513781312660280;
-            daa[16*20+12]= 3.3628488360462363;
-            daa[16*20+13]= 0.6882725908872254;
-            daa[16*20+14]= 1.9485376673137556;
-            daa[16*20+15]= 8.8479984061248178;
-            daa[17*20+0]=  0.5488578478106930;
-            daa[17*20+1]=  1.5170142153962840;
-            daa[17*20+2]=  0.1808525752605976;
-            daa[17*20+3]=  0.2496584188151770;
-            daa[17*20+4]=  1.6275179891253113;
-            daa[17*20+5]=  0.8959082681546182;
-            daa[17*20+6]=  0.4198391148111098;
-            daa[17*20+7]=  0.9349753595598769;
-            daa[17*20+8]=  0.6301954684360302;
-            daa[17*20+9]=  0.5604648274060783;
-            daa[17*20+10]= 1.5183114434679339;
-            daa[17*20+11]= 0.5851920879490173;
-            daa[17*20+12]= 1.4680478689711018;
-            daa[17*20+13]= 3.3448437239772266;
-            daa[17*20+14]= 0.4326058001438786;
-            daa[17*20+15]= 0.6791126595939816;
-            daa[17*20+16]= 0.4514203099376473;
-            daa[18*20+0]=  0.5411769916657778;
-            daa[18*20+1]=  0.8912614404565405;
-            daa[18*20+2]=  1.0894926581511342;
-            daa[18*20+3]=  0.7447620891784513;
-            daa[18*20+4]=  2.1579775140421025;
-            daa[18*20+5]=  0.9183596801412757;
-            daa[18*20+6]=  0.5818111331782764;
-            daa[18*20+7]=  0.3374467649724478;
-            daa[18*20+8]=  7.7587442309146040;
-            daa[18*20+9]=  0.8626796044156272;
-            daa[18*20+10]= 1.2452243224541324;
-            daa[18*20+11]= 0.7835447533710449;
-            daa[18*20+12]= 1.0899165770956820;
-            daa[18*20+13]= 10.384852333133459;
-            daa[18*20+14]= 0.4819109019647465;
-            daa[18*20+15]= 0.9547229305958682;
-            daa[18*20+16]= 0.8564314184691215;
-            daa[18*20+17]= 4.5377235790405388;
-            daa[19*20+0]=  4.6501894691803214;
-            daa[19*20+1]=  0.7807017855806767;
-            daa[19*20+2]=  0.4586061981719967;
-            daa[19*20+3]=  0.4594535241660911;
-            daa[19*20+4]=  2.2627456996290891;
-            daa[19*20+5]=  0.6366932501396869;
-            daa[19*20+6]=  0.8940572875547330;
-            daa[19*20+7]=  0.6193321034173915;
-            daa[19*20+8]=  0.5333220944030346;
-            daa[19*20+9]=  14.872933461519061;
-            daa[19*20+10]= 3.5458093276667237;
-            daa[19*20+11]= 0.7801080335991272;
-            daa[19*20+12]= 4.0584577156753401;
-            daa[19*20+13]= 1.7039730522675411;
-            daa[19*20+14]= 0.5985498912985666;
-            daa[19*20+15]= 0.9305232113028208;
-            daa[19*20+16]= 3.4242218450865543;
-            daa[19*20+17]= 0.5658969249032649;
-            daa[19*20+18]= 1.0000000000000000;
-            
-            f[0]=  0.0770764620135024;
-            f[1]=  0.0500819370772208;
-            f[2]=  0.0462377395993731;
-            f[3]=  0.0537929860758246;
-            f[4]=  0.0144533387583345;
-            f[5]=  0.0408923608974345;
-            f[6]=  0.0633579339160905;
-            f[7]=  0.0655672355884439;
-            f[8]=  0.0218802687005936;
-            f[9]=  0.0591969699027449;
-            f[10]= 0.0976461276528445;
-            f[11]= 0.0592079410822730;
-            f[12]= 0.0220695876653368;
-            f[13]= 0.0413508521834260;
-            f[14]= 0.0476871596856874;
-            f[15]= 0.0707295165111524;
-            f[16]= 0.0567759161524817;
-            f[17]= 0.0127019797647213;
-            f[18]= 0.0323746050281867;
-            f[19]= 0.0669190817443274;
-          }
-          break;
-        case PLL_BLOSUM62:
-          {
-            daa[1*20+0]= 0.735790389698;  daa[2*20+0]= 0.485391055466;  daa[2*20+1]= 1.297446705134;  
-            daa[3*20+0]= 0.543161820899;  
-            daa[3*20+1]= 0.500964408555;  daa[3*20+2]= 3.180100048216;  daa[4*20+0]= 1.45999531047;   
-            daa[4*20+1]= 0.227826574209;  
-            daa[4*20+2]= 0.397358949897;  daa[4*20+3]= 0.240836614802;  daa[5*20+0]= 1.199705704602;  
-            daa[5*20+1]= 3.020833610064;  
-            daa[5*20+2]= 1.839216146992;  daa[5*20+3]= 1.190945703396;  daa[5*20+4]= 0.32980150463;   
-            daa[6*20+0]= 1.1709490428;    
-            daa[6*20+1]= 1.36057419042;   daa[6*20+2]= 1.24048850864;   daa[6*20+3]= 3.761625208368;  
-            daa[6*20+4]= 0.140748891814;  
-            daa[6*20+5]= 5.528919177928;  daa[7*20+0]= 1.95588357496;   daa[7*20+1]= 0.418763308518;  
-            daa[7*20+2]= 1.355872344485;  
-            daa[7*20+3]= 0.798473248968;  daa[7*20+4]= 0.418203192284;  daa[7*20+5]= 0.609846305383;  
-            daa[7*20+6]= 0.423579992176;  
-            daa[8*20+0]= 0.716241444998;  daa[8*20+1]= 1.456141166336;  daa[8*20+2]= 2.414501434208;  
-            daa[8*20+3]= 0.778142664022;  
-            daa[8*20+4]= 0.354058109831;  daa[8*20+5]= 2.43534113114;   daa[8*20+6]= 1.626891056982;  
-            daa[8*20+7]= 0.539859124954;  
-            daa[9*20+0]= 0.605899003687;  daa[9*20+1]= 0.232036445142;  daa[9*20+2]= 0.283017326278;  
-            daa[9*20+3]= 0.418555732462;  
-            daa[9*20+4]= 0.774894022794;  daa[9*20+5]= 0.236202451204;  daa[9*20+6]= 0.186848046932;  
-            daa[9*20+7]= 0.189296292376;  
-            daa[9*20+8]= 0.252718447885;  daa[10*20+0]= 0.800016530518; daa[10*20+1]= 0.622711669692; 
-            daa[10*20+2]= 0.211888159615; 
-            daa[10*20+3]= 0.218131577594; daa[10*20+4]= 0.831842640142; daa[10*20+5]= 0.580737093181; 
-            daa[10*20+6]= 0.372625175087; 
-            daa[10*20+7]= 0.217721159236; daa[10*20+8]= 0.348072209797; daa[10*20+9]= 3.890963773304; 
-            daa[11*20+0]= 1.295201266783; 
-            daa[11*20+1]= 5.411115141489; daa[11*20+2]= 1.593137043457; daa[11*20+3]= 1.032447924952; 
-            daa[11*20+4]= 0.285078800906; 
-            daa[11*20+5]= 3.945277674515; daa[11*20+6]= 2.802427151679; daa[11*20+7]= 0.752042440303; 
-            daa[11*20+8]= 1.022507035889; 
-            daa[11*20+9]= 0.406193586642; daa[11*20+10]= 0.445570274261;daa[12*20+0]= 1.253758266664; 
-            daa[12*20+1]= 0.983692987457; 
-            daa[12*20+2]= 0.648441278787; daa[12*20+3]= 0.222621897958; daa[12*20+4]= 0.76768882348;  
-            daa[12*20+5]= 2.494896077113; 
-            daa[12*20+6]= 0.55541539747;  daa[12*20+7]= 0.459436173579; daa[12*20+8]= 0.984311525359; 
-            daa[12*20+9]= 3.364797763104; 
-            daa[12*20+10]= 6.030559379572;daa[12*20+11]= 1.073061184332;daa[13*20+0]= 0.492964679748; 
-            daa[13*20+1]= 0.371644693209; 
-            daa[13*20+2]= 0.354861249223; daa[13*20+3]= 0.281730694207; daa[13*20+4]= 0.441337471187; 
-            daa[13*20+5]= 0.14435695975;  
-            daa[13*20+6]= 0.291409084165; daa[13*20+7]= 0.368166464453; daa[13*20+8]= 0.714533703928; 
-            daa[13*20+9]= 1.517359325954; 
-            daa[13*20+10]= 2.064839703237;daa[13*20+11]= 0.266924750511;daa[13*20+12]= 1.77385516883; 
-            daa[14*20+0]= 1.173275900924; 
-            daa[14*20+1]= 0.448133661718; daa[14*20+2]= 0.494887043702; daa[14*20+3]= 0.730628272998; 
-            daa[14*20+4]= 0.356008498769; 
-            daa[14*20+5]= 0.858570575674; daa[14*20+6]= 0.926563934846; daa[14*20+7]= 0.504086599527; daa[14*20+8]= 0.527007339151; 
-            daa[14*20+9]= 0.388355409206; daa[14*20+10]= 0.374555687471;daa[14*20+11]= 1.047383450722;daa[14*20+12]= 0.454123625103;
-            daa[14*20+13]= 0.233597909629;daa[15*20+0]= 4.325092687057; daa[15*20+1]= 1.12278310421;  daa[15*20+2]= 2.904101656456; 
-            daa[15*20+3]= 1.582754142065; daa[15*20+4]= 1.197188415094; daa[15*20+5]= 1.934870924596; daa[15*20+6]= 1.769893238937; 
-            daa[15*20+7]= 1.509326253224; daa[15*20+8]= 1.11702976291;  daa[15*20+9]= 0.35754441246;  daa[15*20+10]= 0.352969184527;
-            daa[15*20+11]= 1.752165917819;daa[15*20+12]= 0.918723415746;daa[15*20+13]= 0.540027644824;daa[15*20+14]= 1.169129577716;
-            daa[16*20+0]= 1.729178019485; daa[16*20+1]= 0.914665954563; daa[16*20+2]= 1.898173634533; daa[16*20+3]= 0.934187509431; 
-            daa[16*20+4]= 1.119831358516; daa[16*20+5]= 1.277480294596; daa[16*20+6]= 1.071097236007; daa[16*20+7]= 0.641436011405; 
-            daa[16*20+8]= 0.585407090225; daa[16*20+9]= 1.17909119726;  daa[16*20+10]= 0.915259857694;daa[16*20+11]= 1.303875200799;
-            daa[16*20+12]= 1.488548053722;daa[16*20+13]= 0.488206118793;daa[16*20+14]= 1.005451683149;daa[16*20+15]= 5.15155629227; 
-            daa[17*20+0]= 0.465839367725; daa[17*20+1]= 0.426382310122; daa[17*20+2]= 0.191482046247; daa[17*20+3]= 0.145345046279; 
-            daa[17*20+4]= 0.527664418872; daa[17*20+5]= 0.758653808642; daa[17*20+6]= 0.407635648938; daa[17*20+7]= 0.508358924638; 
-            daa[17*20+8]= 0.30124860078;  daa[17*20+9]= 0.34198578754;  daa[17*20+10]= 0.6914746346;  daa[17*20+11]= 0.332243040634;
-            daa[17*20+12]= 0.888101098152;daa[17*20+13]= 2.074324893497;daa[17*20+14]= 0.252214830027;daa[17*20+15]= 0.387925622098;
-            daa[17*20+16]= 0.513128126891;daa[18*20+0]= 0.718206697586; daa[18*20+1]= 0.720517441216; daa[18*20+2]= 0.538222519037; 
-            daa[18*20+3]= 0.261422208965; daa[18*20+4]= 0.470237733696; daa[18*20+5]= 0.95898974285;  daa[18*20+6]= 0.596719300346; 
-            daa[18*20+7]= 0.308055737035; daa[18*20+8]= 4.218953969389; daa[18*20+9]= 0.674617093228; daa[18*20+10]= 0.811245856323;
-            daa[18*20+11]= 0.7179934869;  daa[18*20+12]= 0.951682162246;daa[18*20+13]= 6.747260430801;daa[18*20+14]= 0.369405319355;
-            daa[18*20+15]= 0.796751520761;daa[18*20+16]= 0.801010243199;daa[18*20+17]= 4.054419006558;daa[19*20+0]= 2.187774522005; 
-            daa[19*20+1]= 0.438388343772; daa[19*20+2]= 0.312858797993; daa[19*20+3]= 0.258129289418; daa[19*20+4]= 1.116352478606; 
-            daa[19*20+5]= 0.530785790125; daa[19*20+6]= 0.524253846338; daa[19*20+7]= 0.25334079019;  daa[19*20+8]= 0.20155597175;  
-            daa[19*20+9]= 8.311839405458; daa[19*20+10]= 2.231405688913;daa[19*20+11]= 0.498138475304;daa[19*20+12]= 2.575850755315;
-            daa[19*20+13]= 0.838119610178;daa[19*20+14]= 0.496908410676;daa[19*20+15]= 0.561925457442;daa[19*20+16]= 2.253074051176;
-            daa[19*20+17]= 0.266508731426;daa[19*20+18]= 1;             
-            
-            f[0]= 0.074;                 f[1]= 0.052;                 f[2]= 0.045;                 f[3]= 0.054;                 
-            f[4]= 0.025;                 f[5]= 0.034;                 f[6]= 0.054;                 f[7]= 0.074;                 
-            f[8]= 0.026;                 f[9]= 0.068;                 f[10]= 0.099;                f[11]= 0.058;                
-            f[12]= 0.025;                f[13]= 0.047;                f[14]= 0.039;                f[15]= 0.057;                
-            f[16]= 0.051;                f[17]= 0.013;                f[18]= 0.032;                f[19]= 0.073;
-          }
-          break;
-        case PLL_MTMAM:
-          {
-            daa[1*20+0]= 32;              daa[2*20+0]= 2;    daa[2*20+1]= 4;               daa[3*20+0]= 11;
-            daa[3*20+1]= 0;               daa[3*20+2]= 864;  daa[4*20+0]= 0;               daa[4*20+1]= 186;
-            daa[4*20+2]= 0;               daa[4*20+3]= 0;    daa[5*20+0]= 0;               daa[5*20+1]= 246;
-            daa[5*20+2]= 8;               daa[5*20+3]= 49;   daa[5*20+4]= 0;               daa[6*20+0]= 0;
-            daa[6*20+1]= 0;               daa[6*20+2]= 0;    daa[6*20+3]= 569;             daa[6*20+4]= 0;
-            daa[6*20+5]= 274;             daa[7*20+0]= 78;   daa[7*20+1]= 18;              daa[7*20+2]= 47;
-            daa[7*20+3]= 79;              daa[7*20+4]= 0;    daa[7*20+5]= 0;               daa[7*20+6]= 22;
-            daa[8*20+0]= 8;               daa[8*20+1]= 232;  daa[8*20+2]= 458;             daa[8*20+3]= 11;
-            daa[8*20+4]= 305;             daa[8*20+5]= 550;  daa[8*20+6]= 22;              daa[8*20+7]= 0;
-            daa[9*20+0]= 75;              daa[9*20+1]= 0;    daa[9*20+2]= 19;              daa[9*20+3]= 0;
-            daa[9*20+4]= 41;              daa[9*20+5]= 0;    daa[9*20+6]= 0;               daa[9*20+7]= 0;
-            daa[9*20+8]= 0;               daa[10*20+0]= 21;  daa[10*20+1]= 6;              daa[10*20+2]= 0;
-            daa[10*20+3]= 0;              daa[10*20+4]= 27;  daa[10*20+5]= 20;             daa[10*20+6]= 0;
-            daa[10*20+7]= 0;              daa[10*20+8]= 26;  daa[10*20+9]= 232;            daa[11*20+0]= 0;
-            daa[11*20+1]= 50;             daa[11*20+2]= 408; daa[11*20+3]= 0;              daa[11*20+4]= 0;
-            daa[11*20+5]= 242;            daa[11*20+6]= 215; daa[11*20+7]= 0;              daa[11*20+8]= 0;
-            daa[11*20+9]= 6;              daa[11*20+10]= 4;  daa[12*20+0]= 76;             daa[12*20+1]= 0;
-            daa[12*20+2]= 21;             daa[12*20+3]= 0;   daa[12*20+4]= 0;              daa[12*20+5]= 22;
-            daa[12*20+6]= 0;              daa[12*20+7]= 0;   daa[12*20+8]= 0;              daa[12*20+9]= 378;
-            daa[12*20+10]= 609;           daa[12*20+11]= 59; daa[13*20+0]= 0;              daa[13*20+1]= 0;
-            daa[13*20+2]= 6;              daa[13*20+3]= 5;   daa[13*20+4]= 7;              daa[13*20+5]= 0;
-            daa[13*20+6]= 0;              daa[13*20+7]= 0;   daa[13*20+8]= 0;              daa[13*20+9]= 57;
-            daa[13*20+10]= 246;           daa[13*20+11]= 0;  daa[13*20+12]= 11;            daa[14*20+0]= 53;
-            daa[14*20+1]= 9;              daa[14*20+2]= 33;  daa[14*20+3]= 2;              daa[14*20+4]= 0;
-            daa[14*20+5]= 51;             daa[14*20+6]= 0;   daa[14*20+7]= 0;              daa[14*20+8]= 53;
-            daa[14*20+9]= 5;              daa[14*20+10]= 43; daa[14*20+11]= 18;            daa[14*20+12]= 0;
-            daa[14*20+13]= 17;            daa[15*20+0]= 342; daa[15*20+1]= 3;              daa[15*20+2]= 446;
-            daa[15*20+3]= 16;             daa[15*20+4]= 347; daa[15*20+5]= 30;             daa[15*20+6]= 21;
-            daa[15*20+7]= 112;            daa[15*20+8]= 20;  daa[15*20+9]= 0;              daa[15*20+10]= 74;
-            daa[15*20+11]= 65;            daa[15*20+12]= 47; daa[15*20+13]= 90;            daa[15*20+14]= 202;
-            daa[16*20+0]= 681;            daa[16*20+1]= 0;   daa[16*20+2]= 110;            daa[16*20+3]= 0;
-            daa[16*20+4]= 114;            daa[16*20+5]= 0;   daa[16*20+6]= 4;              daa[16*20+7]= 0;
-            daa[16*20+8]= 1;              daa[16*20+9]= 360; daa[16*20+10]= 34;            daa[16*20+11]= 50;
-            daa[16*20+12]= 691;           daa[16*20+13]= 8;  daa[16*20+14]= 78;            daa[16*20+15]= 614;
-            daa[17*20+0]= 5;              daa[17*20+1]= 16;  daa[17*20+2]= 6;              daa[17*20+3]= 0;
-            daa[17*20+4]= 65;             daa[17*20+5]= 0;   daa[17*20+6]= 0;              daa[17*20+7]= 0;
-            daa[17*20+8]= 0;              daa[17*20+9]= 0;   daa[17*20+10]= 12;            daa[17*20+11]= 0;
-            daa[17*20+12]= 13;            daa[17*20+13]= 0;  daa[17*20+14]= 7;             daa[17*20+15]= 17;
-            daa[17*20+16]= 0;             daa[18*20+0]= 0;   daa[18*20+1]= 0;              daa[18*20+2]= 156;
-            daa[18*20+3]= 0;              daa[18*20+4]= 530; daa[18*20+5]= 54;             daa[18*20+6]= 0;
-            daa[18*20+7]= 1;              daa[18*20+8]= 1525;daa[18*20+9]= 16;             daa[18*20+10]= 25;
-            daa[18*20+11]= 67;            daa[18*20+12]= 0;  daa[18*20+13]= 682;           daa[18*20+14]= 8;
-            daa[18*20+15]= 107;           daa[18*20+16]= 0;  daa[18*20+17]= 14;            daa[19*20+0]= 398;
-            daa[19*20+1]= 0;              daa[19*20+2]= 0;   daa[19*20+3]= 10;             daa[19*20+4]= 0;
-            daa[19*20+5]= 33;             daa[19*20+6]= 20;  daa[19*20+7]= 5;              daa[19*20+8]= 0;
-            daa[19*20+9]= 2220;           daa[19*20+10]= 100;daa[19*20+11]= 0;             daa[19*20+12]= 832;
-            daa[19*20+13]= 6;             daa[19*20+14]= 0;  daa[19*20+15]= 0;             daa[19*20+16]= 237;
-            daa[19*20+17]= 0;             daa[19*20+18]= 0;       
-            
-            f[0]= 0.06920;  f[1]=  0.01840;  f[2]= 0.04000;  f[3]= 0.018600;
-            f[4]= 0.00650;  f[5]=  0.02380;  f[6]= 0.02360;  f[7]= 0.055700;
-            f[8]= 0.02770;  f[9]=  0.09050;  f[10]=0.16750;  f[11]= 0.02210;
-            f[12]=0.05610;  f[13]= 0.06110;  f[14]=0.05360;  f[15]= 0.07250;
-            f[16]=0.08700;  f[17]= 0.02930;  f[18]=0.03400;  f[19]= 0.04280;
-          }
-          break;
-        case PLL_LG:
-          {
-            daa[1*20+0] = 0.425093;
-
-            daa[2*20+0] = 0.276818; daa[2*20+1] = 0.751878;
-
-            daa[3*20+0] = 0.395144; daa[3*20+1] = 0.123954; daa[3*20+2] = 5.076149;
-            
-            daa[4*20+0] = 2.489084; daa[4*20+1] = 0.534551; daa[4*20+2] = 0.528768; daa[4*20+3] = 0.062556;
-                                                                 
-            daa[5*20+0] = 0.969894; daa[5*20+1] = 2.807908; daa[5*20+2] = 1.695752; daa[5*20+3] = 0.523386; daa[5*20+4] = 0.084808;
-
-            daa[6*20+0] = 1.038545; daa[6*20+1] = 0.363970; daa[6*20+2] = 0.541712; daa[6*20+3] = 5.243870; daa[6*20+4] = 0.003499; daa[6*20+5] = 4.128591;
-
-            daa[7*20+0] = 2.066040; daa[7*20+1] = 0.390192; daa[7*20+2] = 1.437645; daa[7*20+3] = 0.844926; daa[7*20+4] = 0.569265; daa[7*20+5] = 0.267959; daa[7*20+6] = 0.348847;
- 
-            daa[8*20+0] = 0.358858; daa[8*20+1] = 2.426601; daa[8*20+2] = 4.509238; daa[8*20+3] = 0.927114; daa[8*20+4] = 0.640543; daa[8*20+5] = 4.813505; daa[8*20+6] = 0.423881; 
-            daa[8*20+7] = 0.311484;
-
-            daa[9*20+0] = 0.149830; daa[9*20+1] = 0.126991; daa[9*20+2] = 0.191503; daa[9*20+3] = 0.010690; daa[9*20+4] = 0.320627; daa[9*20+5] = 0.072854; daa[9*20+6] = 0.044265; 
-            daa[9*20+7] = 0.008705; daa[9*20+8] = 0.108882; 
-
-            daa[10*20+0] = 0.395337; daa[10*20+1] = 0.301848; daa[10*20+2] = 0.068427; daa[10*20+3] = 0.015076; daa[10*20+4] = 0.594007; daa[10*20+5] = 0.582457; daa[10*20+6] = 0.069673; 
-            daa[10*20+7] = 0.044261; daa[10*20+8] = 0.366317; daa[10*20+9] = 4.145067 ;
-
-            daa[11*20+0] = 0.536518; daa[11*20+1] = 6.326067; daa[11*20+2] = 2.145078; daa[11*20+3] = 0.282959; daa[11*20+4] = 0.013266; daa[11*20+5] = 3.234294; daa[11*20+6] = 1.807177; 
-            daa[11*20+7] = 0.296636; daa[11*20+8] = 0.697264; daa[11*20+9] = 0.159069; daa[11*20+10] = 0.137500;
-
-
-            daa[12*20+0] = 1.124035; daa[12*20+1] = 0.484133; daa[12*20+2] = 0.371004; daa[12*20+3] = 0.025548; daa[12*20+4] = 0.893680; daa[12*20+5] = 1.672569; daa[12*20+6] = 0.173735; 
-            daa[12*20+7] = 0.139538; daa[12*20+8] = 0.442472; daa[12*20+9] = 4.273607; daa[12*20+10] = 6.312358; daa[12*20+11] = 0.656604;
-
-            daa[13*20+0] = 0.253701; daa[13*20+1] = 0.052722;daa[13*20+2] = 0.089525; daa[13*20+3] = 0.017416; daa[13*20+4] = 1.105251; daa[13*20+5] = 0.035855; daa[13*20+6] = 0.018811; 
-            daa[13*20+7] = 0.089586; daa[13*20+8] = 0.682139; daa[13*20+9] = 1.112727; daa[13*20+10] = 2.592692; daa[13*20+11] = 0.023918; daa[13*20+12] = 1.798853;
-
-            daa[14*20+0] = 1.177651; daa[14*20+1] = 0.332533;daa[14*20+2] = 0.161787; daa[14*20+3] = 0.394456; daa[14*20+4] = 0.075382; daa[14*20+5] = 0.624294; daa[14*20+6] = 0.419409; 
-            daa[14*20+7] = 0.196961; daa[14*20+8] = 0.508851; daa[14*20+9] = 0.078281; daa[14*20+10] = 0.249060; daa[14*20+11] = 0.390322; daa[14*20+12] = 0.099849; 
-            daa[14*20+13] = 0.094464;
- 
-            daa[15*20+0] = 4.727182; daa[15*20+1] = 0.858151;daa[15*20+2] = 4.008358; daa[15*20+3] = 1.240275; daa[15*20+4] = 2.784478; daa[15*20+5] = 1.223828; daa[15*20+6] = 0.611973; 
-            daa[15*20+7] = 1.739990; daa[15*20+8] = 0.990012; daa[15*20+9] = 0.064105; daa[15*20+10] = 0.182287; daa[15*20+11] = 0.748683; daa[15*20+12] = 0.346960; 
-            daa[15*20+13] = 0.361819; daa[15*20+14] = 1.338132;
- 
-            daa[16*20+0] = 2.139501; daa[16*20+1] = 0.578987;daa[16*20+2] = 2.000679; daa[16*20+3] = 0.425860; daa[16*20+4] = 1.143480; daa[16*20+5] = 1.080136; daa[16*20+6] = 0.604545; 
-            daa[16*20+7] = 0.129836; daa[16*20+8] = 0.584262; daa[16*20+9] = 1.033739; daa[16*20+10] = 0.302936; daa[16*20+11] = 1.136863; daa[16*20+12] = 2.020366; 
-            daa[16*20+13] = 0.165001; daa[16*20+14] = 0.571468; daa[16*20+15] = 6.472279;
-
-            daa[17*20+0] = 0.180717; daa[17*20+1] = 0.593607;daa[17*20+2] = 0.045376; daa[17*20+3] = 0.029890; daa[17*20+4] = 0.670128; daa[17*20+5] = 0.236199; daa[17*20+6] = 0.077852; 
-            daa[17*20+7] = 0.268491; daa[17*20+8] = 0.597054; daa[17*20+9] = 0.111660; daa[17*20+10] = 0.619632; daa[17*20+11] = 0.049906; daa[17*20+12] = 0.696175; 
-            daa[17*20+13] = 2.457121; daa[17*20+14] = 0.095131; daa[17*20+15] = 0.248862; daa[17*20+16] = 0.140825;
-
-            daa[18*20+0] = 0.218959; daa[18*20+1] = 0.314440;daa[18*20+2] = 0.612025; daa[18*20+3] = 0.135107; daa[18*20+4] = 1.165532; daa[18*20+5] = 0.257336; daa[18*20+6] = 0.120037; 
-            daa[18*20+7] = 0.054679; daa[18*20+8] = 5.306834; daa[18*20+9] = 0.232523; daa[18*20+10] = 0.299648; daa[18*20+11] = 0.131932; daa[18*20+12] = 0.481306; 
-            daa[18*20+13] = 7.803902; daa[18*20+14] = 0.089613; daa[18*20+15] = 0.400547; daa[18*20+16] = 0.245841; daa[18*20+17] = 3.151815;
-
-            daa[19*20+0] = 2.547870; daa[19*20+1] = 0.170887;daa[19*20+2] = 0.083688; daa[19*20+3] = 0.037967; daa[19*20+4] = 1.959291; daa[19*20+5] = 0.210332; daa[19*20+6] = 0.245034; 
-            daa[19*20+7] = 0.076701; daa[19*20+8] = 0.119013; daa[19*20+9] = 10.649107; daa[19*20+10] = 1.702745; daa[19*20+11] = 0.185202; daa[19*20+12] = 1.898718; 
-            daa[19*20+13] = 0.654683; daa[19*20+14] = 0.296501; daa[19*20+15] = 0.098369; daa[19*20+16] = 2.188158; daa[19*20+17] = 0.189510; daa[19*20+18] = 0.249313;
-            
-            f[0]  = 0.079066; f[1]  = 0.055941; f[2]  = 0.041977; f[3]  = 0.053052;
-	    f[4]  = 0.012937; f[5]  = 0.040767; f[6]  = 0.071586; f[7]  = 0.057337;
-	    f[8]  = 0.022355; f[9]  = 0.062157; f[10] = 0.099081; f[11] = 0.064600;
-	    f[12] = 0.022951; f[13] = 0.042302; f[14] = 0.044040; f[15] = 0.061197;
-	    f[16] = 0.053287; f[17] = 0.012066; f[18] = 0.034155; f[19] = 0.069146;       
-          }       
-          break;
-          case PLL_LG4M:
-          {
-            double 
-              rates[4][190] = 
-              {
-                {
-                  0.269343
-                  , 0.254612, 0.150988
-                  , 0.236821, 0.031863, 0.659648
-                  , 2.506547, 0.938594, 0.975736, 0.175533
-                  , 0.359080, 0.348288, 0.697708, 0.086573, 0.095967
-                  , 0.304674, 0.156000, 0.377704, 0.449140, 0.064706, 4.342595
-                  , 1.692015, 0.286638, 0.565095, 0.380358, 0.617945, 0.202058, 0.264342
-                  , 0.251974, 0.921633, 1.267609, 0.309692, 0.390429, 2.344059, 0.217750, 0.104842
-                  , 1.085220, 0.325624, 0.818658, 0.037814, 1.144150, 0.534567, 0.222793, 0.062682, 0.567431
-                  , 0.676353, 0.602366, 0.217027, 0.007533, 1.595775, 0.671143, 0.158424, 0.070463, 0.764255, 8.226528
-                  , 0.179155, 0.971338, 1.343718, 0.133744, 0.122468, 0.983857, 0.994128, 0.220916, 0.410581, 0.387487, 0.181110
-                  , 1.636817, 0.515217, 0.670461, 0.071252, 1.534848, 5.288642, 0.255628, 0.094198, 0.257229, 25.667158, 6.819689, 1.591212
-                  , 0.235498, 0.123932, 0.099793, 0.030425, 0.897279, 0.112229, 0.022529, 0.047488, 0.762914, 1.344259, 0.865691, 0.038921, 2.030833
-                  , 1.265605, 0.040163, 0.173354, 0.027579, 0.259961, 0.580374, 0.088041, 0.145595, 0.143676, 0.298859, 1.020117, 0.000714, 0.190019, 0.093964
-                  , 5.368405, 0.470952, 5.267140, 0.780505, 4.986071, 0.890554, 0.377949, 1.755515, 0.786352, 0.527246, 0.667783, 0.659948, 0.731921, 0.837669, 1.355630
-                  , 1.539394, 0.326789, 1.688169, 0.283738, 1.389282, 0.329821, 0.231770, 0.117017, 0.449977, 3.531600, 0.721586, 0.497588, 2.691697, 0.152088, 0.698040, 16.321298
-                  , 0.140944, 0.375611, 0.025163, 0.002757, 0.801456, 0.257253, 0.103678, 0.132995, 0.345834, 0.377156, 0.839647, 0.176970, 0.505682, 1.670170, 0.091298, 0.210096, 0.013165
-                  , 0.199836, 0.146857, 0.806275, 0.234246, 1.436970, 0.319669, 0.010076, 0.036859, 3.503317, 0.598632, 0.738969, 0.154436, 0.579000, 4.245524, 0.074524, 0.454195, 0.232913, 1.178490
-                  , 9.435529, 0.285934, 0.395670, 0.130890, 6.097263, 0.516259, 0.503665, 0.222960, 0.149143, 13.666175, 2.988174, 0.162725, 5.973826, 0.843416, 0.597394, 0.701149, 4.680002, 0.300085, 0.416262
-                },
-                {
-                  0.133720
-                  , 0.337212, 0.749052
-                  , 0.110918, 0.105087, 4.773487
-                  , 3.993460, 0.188305, 1.590332, 0.304942
-                  , 0.412075, 2.585774, 1.906884, 0.438367, 0.242076
-                  , 0.435295, 0.198278, 0.296366, 7.470333, 0.008443, 3.295515
-                  , 7.837540, 0.164607, 0.431724, 0.153850, 1.799716, 0.269744, 0.242866
-                  , 0.203872, 2.130334, 9.374479, 1.080878, 0.152458, 12.299133, 0.279589, 0.089714
-                  , 0.039718, 0.024553, 0.135254, 0.014979, 0.147498, 0.033964, 0.005585, 0.007248, 0.022746
-                  , 0.075784, 0.080091, 0.084971, 0.014128, 0.308347, 0.500836, 0.022833, 0.022999, 0.161270, 1.511682
-                  , 0.177662, 10.373708, 1.036721, 0.038303, 0.043030, 2.181033, 0.321165, 0.103050, 0.459502, 0.021215, 0.078395
-                  , 0.420784, 0.192765, 0.329545, 0.008331, 0.883142, 1.403324, 0.168673, 0.160728, 0.612573, 1.520889, 7.763266, 0.307903
-                  , 0.071268, 0.019652, 0.088753, 0.013547, 0.566609, 0.071878, 0.020050, 0.041022, 0.625361, 0.382806, 1.763059, 0.044644, 1.551911
-                  , 0.959127, 1.496585, 0.377794, 0.332010, 0.318192, 1.386970, 0.915904, 0.224255, 2.611479, 0.029351, 0.068250, 1.542356, 0.047525, 0.182715
-                  , 11.721512, 0.359408, 2.399158, 0.219464, 9.104192, 0.767563, 0.235229, 3.621219, 0.971955, 0.033780, 0.043035, 0.236929, 0.319964, 0.124977, 0.840651
-                  , 2.847068, 0.218463, 1.855386, 0.109808, 4.347048, 0.765848, 0.164569, 0.312024, 0.231569, 0.356327, 0.159597, 0.403210, 1.135162, 0.106903, 0.269190, 9.816481
-                  , 0.030203, 0.387292, 0.118878, 0.067287, 0.190240, 0.122113, 0.007023, 0.137411, 0.585141, 0.020634, 0.228824, 0.000122, 0.474862, 3.135128, 0.030313, 0.093830, 0.119152
-                  , 0.067183, 0.130101, 0.348730, 0.061798, 0.301198, 0.095382, 0.095764, 0.044628, 2.107384, 0.046105, 0.100117, 0.017073, 0.192383, 8.367641, 0.000937, 0.137416, 0.044722, 4.179782
-                  , 0.679398, 0.041567, 0.092408, 0.023701, 1.271187, 0.115566, 0.055277, 0.086988, 0.060779, 8.235167, 0.609420, 0.061764, 0.581962, 0.184187, 0.080246, 0.098033, 1.438350, 0.023439, 0.039124
-                },          
-                {
-                  0.421017
-                  , 0.316236, 0.693340
-                  , 0.285984, 0.059926, 6.158219
-                  , 4.034031, 1.357707, 0.708088, 0.063669
-                  , 0.886972, 2.791622, 1.701830, 0.484347, 0.414286
-                  , 0.760525, 0.233051, 0.378723, 4.032667, 0.081977, 4.940411
-                  , 0.754103, 0.402894, 2.227443, 1.102689, 0.416576, 0.459376, 0.508409
-                  , 0.571422, 2.319453, 5.579973, 0.885376, 1.439275, 4.101979, 0.576745, 0.428799
-                  , 0.162152, 0.085229, 0.095692, 0.006129, 0.490937, 0.104843, 0.045514, 0.004705, 0.098934
-                  , 0.308006, 0.287051, 0.056994, 0.007102, 0.958988, 0.578990, 0.067119, 0.024403, 0.342983, 3.805528
-                  , 0.390161, 7.663209, 1.663641, 0.105129, 0.135029, 3.364474, 0.652618, 0.457702, 0.823674, 0.129858, 0.145630
-                  , 1.042298, 0.364551, 0.293222, 0.037983, 1.486520, 1.681752, 0.192414, 0.070498, 0.222626, 4.529623, 4.781730, 0.665308
-                  , 0.362476, 0.073439, 0.129245, 0.020078, 1.992483, 0.114549, 0.023272, 0.064490, 1.491794, 1.113437, 2.132006, 0.041677, 1.928654
-                  , 1.755491, 0.087050, 0.099325, 0.163817, 0.242851, 0.322939, 0.062943, 0.198698, 0.192904, 0.062948, 0.180283, 0.059655, 0.129323, 0.065778
-                  , 3.975060, 0.893398, 5.496314, 1.397313, 3.575120, 1.385297, 0.576191, 1.733288, 1.021255, 0.065131, 0.129115, 0.600308, 0.387276, 0.446001, 1.298493
-                  , 2.565079, 0.534056, 2.143993, 0.411388, 2.279084, 0.893006, 0.528209, 0.135731, 0.518741, 0.972662, 0.280700, 0.890086, 1.828755, 0.189028, 0.563778, 7.788147
-                  , 0.283631, 0.497926, 0.075454, 0.043794, 1.335322, 0.308605, 0.140137, 0.150797, 1.409726, 0.119868, 0.818331, 0.080591, 1.066017, 3.754687, 0.073415, 0.435046, 0.197272
-                  , 0.242513, 0.199157, 0.472207, 0.085937, 2.039787, 0.262751, 0.084578, 0.032247, 7.762326, 0.153966, 0.299828, 0.117255, 0.438215, 14.506235, 0.089180, 0.352766, 0.215417, 5.054245
-                  , 2.795818, 0.107130, 0.060909, 0.029724, 2.986426, 0.197267, 0.196977, 0.044327, 0.116751, 7.144311, 1.848622, 0.118020, 1.999696, 0.705747, 0.272763, 0.096935, 1.820982, 0.217007, 0.172975
-                },
-                {
-                  0.576160
-                  , 0.567606, 0.498643
-                  , 0.824359, 0.050698, 3.301401
-                  , 0.822724, 4.529235, 1.291808, 0.101930
-                  , 1.254238, 2.169809, 1.427980, 0.449474, 0.868679
-                  , 1.218615, 0.154502, 0.411471, 3.172277, 0.050239, 2.138661
-                  , 1.803443, 0.604673, 2.125496, 1.276384, 1.598679, 0.502653, 0.479490
-                  , 0.516862, 2.874265, 4.845769, 0.719673, 3.825677, 4.040275, 0.292773, 0.596643
-                  , 0.180898, 0.444586, 0.550969, 0.023542, 2.349573, 0.370160, 0.142187, 0.016618, 0.500788
-                  , 0.452099, 0.866322, 0.201033, 0.026731, 2.813990, 1.645178, 0.135556, 0.072152, 1.168817, 5.696116
-                  , 0.664186, 2.902886, 2.101971, 0.127988, 0.200218, 2.505933, 0.759509, 0.333569, 0.623100, 0.547454, 0.363656
-                  , 0.864415, 0.835049, 0.632649, 0.079201, 2.105931, 1.633544, 0.216462, 0.252419, 0.665406, 7.994105, 11.751178, 1.096842
-                  , 0.324478, 0.208947, 0.280339, 0.041683, 4.788477, 0.107022, 0.067711, 0.171320, 3.324779, 2.965328, 5.133843, 0.084856, 4.042591
-                  , 1.073043, 0.173826, 0.041985, 0.270336, 0.121299, 0.351384, 0.228565, 0.225318, 0.376089, 0.058027, 0.390354, 0.214230, 0.058954, 0.126299
-                  , 3.837562, 0.884342, 4.571911, 0.942751, 6.592827, 1.080063, 0.465397, 3.137614, 1.119667, 0.362516, 0.602355, 0.716940, 0.506796, 1.444484, 1.432558
-                  , 2.106026, 0.750016, 2.323325, 0.335915, 1.654673, 1.194017, 0.617231, 0.318671, 0.801030, 4.455842, 0.580191, 1.384210, 3.522468, 0.473128, 0.432718, 5.716300
-                  , 0.163720, 0.818102, 0.072322, 0.068275, 3.305436, 0.373790, 0.054323, 0.476587, 1.100360, 0.392946, 1.703323, 0.085720, 1.725516, 5.436253, 0.053108, 0.498594, 0.231832
-                  , 0.241167, 0.302440, 1.055095, 0.246940, 9.741942, 0.249895, 0.129973, 0.052363, 11.542498, 1.047449, 1.319667, 0.139770, 1.330225, 26.562270, 0.046986, 0.737653, 0.313460, 5.165098
-                  , 1.824586, 0.435795, 0.179086, 0.091739, 3.609570, 0.649507, 0.656681, 0.225234, 0.473437, 19.897252, 3.001995, 0.452926, 3.929598, 1.692159, 0.370204, 0.373501, 3.329822, 0.326593, 0.860743
-                }
-              };
-            
-            double
-              freqs[4][20] = 
-              {{0.082276,0.055172,0.043853,0.053484,0.018957,0.028152,0.046679,0.157817,0.033297,0.028284,0.054284,0.025275,0.023665,0.041874,0.063071,0.066501,0.065424,0.023837,0.038633,0.049465},
-               {0.120900,0.036460,0.026510,0.040410,0.015980,0.021132,0.025191,0.036369,0.015884,0.111029,0.162852,0.024820,0.028023,0.074058,0.012065,0.041963,0.039072,0.012666,0.040478,0.114137},
-               {0.072639,0.051691,0.038642,0.055580,0.009829,0.031374,0.048731,0.065283,0.023791,0.086640,0.120847,0.052177,0.026728,0.032589,0.039238,0.046748,0.053361,0.008024,0.037426,0.098662},
-               {0.104843,0.078835,0.043513,0.090498,0.002924,0.066163,0.151640,0.038843,0.022556,0.018383,0.038687,0.104462,0.010166,0.009089,0.066950,0.053667,0.049486,0.004409,0.012924,0.031963}};
-            
-            int 
-              i, 
-              j, 
-              r = 0;
-            
-            for(i = 1; i < 20; i++)
-              for(j = 0; j < i; j++)
-                {
-                  daa[i * 20 + j] = rates[lg4_index][r];
-                  r++;
-                }
-            
-            assert(r == 190);
-            
-            for(i = 0; i < 20; i++)
-              f[i] = freqs[lg4_index][i];         
-            
-          }
-          break;
-      	case PLL_LG4X:
-			{
-			  double
-			  rates[4][190] =
-				  {
-				  {
-				  0.295719,
-				  0.067388, 0.448317,
-				  0.253712, 0.457483, 2.358429,
-				  1.029289, 0.576016, 0.251987, 0.189008,
-				  0.107964, 1.741924, 0.216561, 0.599450, 0.029955,
-				  0.514644, 0.736017, 0.503084, 109.901504, 0.084794, 4.117654,
-				  10.868848, 0.704334, 0.435271, 1.070052, 1.862626, 0.246260, 1.202023,
-				  0.380498, 5.658311, 4.873453, 5.229858, 0.553477, 6.508329, 1.634845, 0.404968,
-				  0.084223, 0.123387, 0.090748, 0.052764, 0.151733, 0.054187, 0.060194, 0.048984, 0.204296,
-				  0.086976, 0.221777, 0.033310, 0.021407, 0.230320, 0.195703, 0.069359, 0.069963, 0.504221, 1.495537,
-				  0.188789, 93.433377, 0.746537, 0.621146, 0.096955, 1.669092, 2.448827, 0.256662, 1.991533, 0.091940, 0.122332,
-				  0.286389, 0.382175, 0.128905, 0.081091, 0.352526, 0.810168, 0.232297, 0.228519, 0.655465, 1.994320, 3.256485, 0.457430,
-				  0.155567, 0.235965, 0.127321, 0.205164, 0.590018, 0.066081, 0.064822, 0.241077, 6.799829, 0.754940, 2.261319, 0.163849, 1.559944,
-				  1.671061, 6.535048, 0.904011, 5.164456, 0.386853, 2.437439, 3.537387, 4.320442, 11.291065, 0.170343, 0.848067, 5.260446, 0.426508, 0.438856,
-				  2.132922, 0.525521, 0.939733, 0.747330, 1.559564, 0.165666, 0.435384, 3.656545, 0.961142, 0.050315, 0.064441, 0.360946, 0.132547, 0.306683, 4.586081,
-				  0.529591, 0.303537, 0.435450, 0.308078, 0.606648, 0.106333, 0.290413, 0.290216, 0.448965, 0.372166, 0.102493, 0.389413, 0.498634, 0.109129, 2.099355, 3.634276,
-				  0.115551, 0.641259, 0.046646, 0.260889, 0.587531, 0.093417, 0.280695, 0.307466, 6.227274, 0.206332, 0.459041, 0.033291, 0.559069, 18.392863, 0.411347, 0.101797, 0.034710,
-				  0.102453, 0.289466, 0.262076, 0.185083, 0.592318, 0.035149, 0.105999, 0.096556, 20.304886, 0.097050, 0.133091, 0.115301, 0.264728, 66.647302, 0.476350, 0.148995, 0.063603, 20.561407,
-				  0.916683, 0.102065, 0.043986, 0.080708, 0.885230, 0.072549, 0.206603, 0.306067, 0.205944, 5.381403, 0.561215, 0.112593, 0.693307, 0.400021, 0.584622, 0.089177, 0.755865, 0.133790, 0.154902
-				  },
-				  {
-				  0.066142,
-				  0.590377, 0.468325,
-				  0.069930, 0.013688, 2.851667,
-				  9.850951, 0.302287, 3.932151, 0.146882,
-				  1.101363, 1.353957, 8.159169, 0.249672, 0.582670,
-				  0.150375, 0.028386, 0.219934, 0.560142, 0.005035, 3.054085,
-				  0.568586, 0.037750, 0.421974, 0.046719, 0.275844, 0.129551, 0.037250,
-				  0.051668, 0.262130, 2.468752, 0.106259, 0.098208, 4.210126, 0.029788, 0.013513,
-				  0.127170, 0.016923, 0.344765, 0.003656, 0.445038, 0.165753, 0.008541, 0.002533, 0.031779,
-				  0.292429, 0.064289, 0.210724, 0.004200, 1.217010, 1.088704, 0.014768, 0.005848, 0.064558, 7.278994,
-				  0.071458, 0.855973, 1.172204, 0.014189, 0.033969, 1.889645, 0.125869, 0.031390, 0.065585, 0.029917, 0.042762,
-				  1.218562, 0.079621, 0.763553, 0.009876, 1.988516, 3.344809, 0.056702, 0.021612, 0.079927, 7.918203, 14.799537, 0.259400,
-				  0.075144, 0.011169, 0.082464, 0.002656, 0.681161, 0.111063, 0.004186, 0.004854, 0.095591, 0.450964, 1.506485, 0.009457, 1.375871,
-				  7.169085, 0.161937, 0.726566, 0.040244, 0.825960, 2.067758, 0.110993, 0.129497, 0.196886, 0.169797, 0.637893, 0.090576, 0.457399, 0.143327,
-				  30.139501, 0.276530, 11.149790, 0.267322, 18.762977, 3.547017, 0.201148, 0.976631, 0.408834, 0.104288, 0.123793, 0.292108, 0.598048, 0.328689, 3.478333,
-				  13.461692, 0.161053, 4.782635, 0.053740, 11.949233, 2.466507, 0.139705, 0.053397, 0.126088, 1.578530, 0.641351, 0.297913, 4.418398, 0.125011, 2.984862, 13.974326,
-				  0.021372, 0.081472, 0.058046, 0.006597, 0.286794, 0.188236, 0.009201, 0.019475, 0.037226, 0.015909, 0.154810, 0.017172, 0.239749, 0.562720, 0.061299, 0.154326, 0.060703,
-				  0.045779, 0.036742, 0.498072, 0.027639, 0.534219, 0.203493, 0.012095, 0.004964, 0.452302, 0.094365, 0.140750, 0.021976, 0.168432, 1.414883, 0.077470, 0.224675, 0.123480, 0.447011,
-				  4.270235, 0.030342, 0.258487, 0.012745, 4.336817, 0.281953, 0.043812, 0.015539, 0.016212, 16.179952, 3.416059, 0.032578, 2.950318, 0.227807, 1.050562, 0.112000, 5.294490, 0.033381, 0.045528
-				  },
-				  {
-				  0.733336,
-				  0.558955, 0.597671,
-				  0.503360, 0.058964, 5.581680,
-				  4.149599, 2.863355, 1.279881, 0.225860,
-				  1.415369, 2.872594, 1.335650, 0.434096, 1.043232,
-				  1.367574, 0.258365, 0.397108, 2.292917, 0.209978, 4.534772,
-				  1.263002, 0.366868, 1.840061, 1.024707, 0.823594, 0.377181, 0.496780,
-				  0.994098, 2.578946, 5.739035, 0.821921, 3.039380, 4.877840, 0.532488, 0.398817,
-				  0.517204, 0.358350, 0.284730, 0.027824, 1.463390, 0.370939, 0.232460, 0.008940, 0.349195,
-				  0.775054, 0.672023, 0.109781, 0.021443, 1.983693, 1.298542, 0.169219, 0.043707, 0.838324, 5.102837,
-				  0.763094, 5.349861, 1.612642, 0.088850, 0.397640, 3.509873, 0.755219, 0.436013, 0.888693, 0.561690, 0.401070,
-				  1.890137, 0.691594, 0.466979, 0.060820, 2.831098, 2.646440, 0.379926, 0.087640, 0.488389, 7.010411, 8.929538, 1.357738,
-				  0.540460, 0.063347, 0.141582, 0.018288, 4.102068, 0.087872, 0.020447, 0.064863, 1.385133, 3.054968, 5.525874, 0.043394, 3.135353,
-				  0.200122, 0.032875, 0.019509, 0.042687, 0.059723, 0.072299, 0.023282, 0.036426, 0.050226, 0.039318, 0.067505, 0.023126, 0.012695, 0.015631,
-				  4.972745, 0.821562, 4.670980, 1.199607, 5.901348, 1.139018, 0.503875, 1.673207, 0.962470, 0.204155, 0.273372, 0.567639, 0.570771, 0.458799, 0.233109,
-				  1.825593, 0.580847, 1.967383, 0.420710, 2.034980, 0.864479, 0.577513, 0.124068, 0.502294, 2.653232, 0.437116, 1.048288, 2.319555, 0.151684, 0.077004, 8.113282,
-				  0.450842, 0.661866, 0.088064, 0.037642, 2.600668, 0.390688, 0.109318, 0.218118, 1.065585, 0.564368, 1.927515, 0.120994, 1.856122, 4.154750, 0.011074, 0.377578, 0.222293,
-				  0.526135, 0.265730, 0.581928, 0.141233, 5.413080, 0.322761, 0.153776, 0.039217, 8.351808, 0.854294, 0.940458, 0.180650, 0.975427, 11.429924, 0.026268, 0.429221, 0.273138, 4.731579,
-				  3.839269, 0.395134, 0.145401, 0.090101, 4.193725, 0.625409, 0.696533, 0.104335, 0.377304, 15.559906, 2.508169, 0.449074, 3.404087, 1.457957, 0.052132, 0.260296, 2.903836, 0.564762, 0.681215
-				  },
-				  {
-				  0.658412,
-				  0.566269, 0.540749,
-				  0.854111, 0.058015, 3.060574,
-				  0.884454, 5.851132, 1.279257, 0.160296,
-				  1.309554, 2.294145, 1.438430, 0.482619, 0.992259,
-				  1.272639, 0.182966, 0.431464, 2.992763, 0.086318, 2.130054,
-				  1.874713, 0.684164, 2.075952, 1.296206, 2.149634, 0.571406, 0.507160,
-				  0.552007, 3.192521, 4.840271, 0.841829, 5.103188, 4.137385, 0.351381, 0.679853,
-				  0.227683, 0.528161, 0.644656, 0.031467, 3.775817, 0.437589, 0.189152, 0.025780, 0.665865,
-				  0.581512, 1.128882, 0.266076, 0.048542, 3.954021, 2.071689, 0.217780, 0.082005, 1.266791, 8.904999,
-				  0.695190, 3.010922, 2.084975, 0.132774, 0.190734, 2.498630, 0.767361, 0.326441, 0.680174, 0.652629, 0.440178,
-				  0.967985, 1.012866, 0.720060, 0.133055, 1.776095, 1.763546, 0.278392, 0.343977, 0.717301, 10.091413, 14.013035, 1.082703,
-				  0.344015, 0.227296, 0.291854, 0.056045, 4.495841, 0.116381, 0.092075, 0.195877, 4.001286, 2.671718, 5.069337, 0.091278, 4.643214,
-				  0.978992, 0.156635, 0.028961, 0.209188, 0.264277, 0.296578, 0.177263, 0.217424, 0.362942, 0.086367, 0.539010, 0.172734, 0.121821, 0.161015,
-				  3.427163, 0.878405, 4.071574, 0.925172, 7.063879, 1.033710, 0.451893, 3.057583, 1.189259, 0.359932, 0.742569, 0.693405, 0.584083, 1.531223, 1.287474,
-				  2.333253, 0.802754, 2.258357, 0.360522, 2.221150, 1.283423, 0.653836, 0.377558, 0.964545, 4.797423, 0.780580, 1.422571, 4.216178, 0.599244, 0.444362, 5.231362,
-				  0.154701, 0.830884, 0.073037, 0.094591, 3.017954, 0.312579, 0.074620, 0.401252, 1.350568, 0.336801, 1.331875, 0.068958, 1.677263, 5.832025, 0.076328, 0.548763, 0.208791,
-				  0.221089, 0.431617, 1.238426, 0.313945, 8.558815, 0.305772, 0.181992, 0.072258, 12.869737, 1.021885, 1.531589, 0.163829, 1.575754, 33.873091, 0.079916, 0.831890, 0.307846, 5.910440,
-				  2.088785, 0.456530, 0.199728, 0.118104, 4.310199, 0.681277, 0.752277, 0.241015, 0.531100, 23.029406, 4.414850, 0.481711, 5.046403, 1.914768, 0.466823, 0.382271, 3.717971, 0.282540, 0.964421
-				  }
-				  };
-			  double
-			  freqs[4][20] =
-				  {{0.147383 , 0.017579 , 0.058208 , 0.017707 , 0.026331 , 0.041582 , 0.017494 , 0.027859 , 0.011849 , 0.076971 ,
-				  0.147823 , 0.019535 , 0.037132 , 0.029940 , 0.008059 , 0.088179 , 0.089653 , 0.006477 , 0.032308 , 0.097931},
-				  {0.063139 , 0.066357 , 0.011586 , 0.066571 , 0.010800 , 0.009276 , 0.053984 , 0.146986 , 0.034214 , 0.088822 ,
-				  0.098196 , 0.032390 , 0.021263 , 0.072697 , 0.016761 , 0.020711 , 0.020797 , 0.025463 , 0.045615 , 0.094372},
-				  {0.062457 , 0.066826 , 0.049332 , 0.065270 , 0.006513 , 0.041231 , 0.058965 , 0.080852 , 0.028024 , 0.037024 ,
-				  0.075925 , 0.064131 , 0.019620 , 0.028710 , 0.104579 , 0.056388 , 0.062027 , 0.008241 , 0.033124 , 0.050760},
-				  {0.106471 , 0.074171 , 0.044513 , 0.096390 , 0.002148 , 0.066733 , 0.158908 , 0.037625 , 0.020691 , 0.014608 ,
-				  0.028797 , 0.105352 , 0.007864 , 0.007477 , 0.083595 , 0.055726 , 0.047711 , 0.003975 , 0.010088 , 0.027159}};
-			  int
-			  i,
-			  j,
-			  r = 0;
-			  for(i = 1; i < 20; i++)
-				  for(j = 0; j < i; j++)
-				  {
-					  daa[i * 20 + j] = rates[lg4_index][r];
-					  r++;
-				  }
-			  assert(r == 190);
-			  for(i = 0; i < 20; i++)
-				  f[i] = freqs[lg4_index][i];
-		  }
-		  break;
-        case PLL_MTART:
-          {
-           
-
-            daa[1*20+0]=   0.2;
-            daa[2*20+0]=   0.2;
-           daa[2*20+1]=   0.2;
-           daa[3*20+0]=   1;
-           daa[3*20+1]=   4;
-           daa[3*20+2]=   500;
-           daa[4*20+0]=   254;
-           daa[4*20+1]=   36;
-           daa[4*20+2]=   98;
-           daa[4*20+3]=   11;
-           daa[5*20+0]=   0.2;
-           daa[5*20+1]=   154;
-           daa[5*20+2]=   262;
-           daa[5*20+3]=   0.2;
-           daa[5*20+4]=   0.2;
-           daa[6*20+0]=   0.2;
-           daa[6*20+1]=   0.2;
-           daa[6*20+2]=   183;
-           daa[6*20+3]=   862;
-           daa[6*20+4]=   0.2;
-           daa[6*20+5]=   262;
-           daa[7*20+0]=   200;
-           daa[7*20+1]=   0.2;
-           daa[7*20+2]=   121;
-           daa[7*20+3]=   12;
-           daa[7*20+4]=   81;
-           daa[7*20+5]=   3;
-           daa[7*20+6]=   44;
-           daa[8*20+0]=   0.2;
-           daa[8*20+1]=   41;
-           daa[8*20+2]=   180;
-           daa[8*20+3]=   0.2;
-           daa[8*20+4]=   12;
-           daa[8*20+5]=   314;
-           daa[8*20+6]=   15;
-           daa[8*20+7]=   0.2;
-           daa[9*20+0]=   26;
-           daa[9*20+1]=   2;
-           daa[9*20+2]=   21;
-           daa[9*20+3]=   7;
-           daa[9*20+4]=   63;
-           daa[9*20+5]=   11;
-           daa[9*20+6]=   7;
-           daa[9*20+7]=   3;
-           daa[9*20+8]=   0.2;
-           daa[10*20+0]=  4;
-           daa[10*20+1]=  2;
-           daa[10*20+2]=  13;
-           daa[10*20+3]=  1;
-           daa[10*20+4]=  79;
-           daa[10*20+5]=  16;
-           daa[10*20+6]=  2;
-           daa[10*20+7]=  1;
-           daa[10*20+8]=  6;
-           daa[10*20+9]=  515;
-           daa[11*20+0]=  0.2;
-           daa[11*20+1]=  209;
-           daa[11*20+2]=  467;
-           daa[11*20+3]=  2;
-           daa[11*20+4]=  0.2;
-           daa[11*20+5]=  349;
-           daa[11*20+6]=  106;
-           daa[11*20+7]=  0.2;
-           daa[11*20+8]=  0.2;
-           daa[11*20+9]=  3;
-           daa[11*20+10]= 4;
-           daa[12*20+0]=  121;
-           daa[12*20+1]=  5;
-           daa[12*20+2]=  79;
-           daa[12*20+3]=  0.2;
-           daa[12*20+4]=  312;
-           daa[12*20+5]=  67;
-           daa[12*20+6]=  0.2;
-           daa[12*20+7]=  56;
-           daa[12*20+8]=  0.2;
-           daa[12*20+9]=  515;
-           daa[12*20+10]= 885;
-           daa[12*20+11]= 106;
-           daa[13*20+0]=  13;
-           daa[13*20+1]=  5;
-           daa[13*20+2]=  20;
-           daa[13*20+3]=  0.2;
-           daa[13*20+4]=  184;
-           daa[13*20+5]=  0.2;
-           daa[13*20+6]=  0.2;
-           daa[13*20+7]=  1;
-           daa[13*20+8]=  14;
-           daa[13*20+9]=  118;
-           daa[13*20+10]= 263;
-           daa[13*20+11]= 11;
-           daa[13*20+12]= 322;
-           daa[14*20+0]=  49;
-           daa[14*20+1]=  0.2;
-           daa[14*20+2]=  17;
-           daa[14*20+3]=  0.2;
-           daa[14*20+4]=  0.2;
-           daa[14*20+5]=  39;
-           daa[14*20+6]=  8;
-           daa[14*20+7]=  0.2;
-           daa[14*20+8]=  1;
-           daa[14*20+9]=  0.2;
-           daa[14*20+10]= 12;
-           daa[14*20+11]= 17;
-           daa[14*20+12]= 5;
-           daa[14*20+13]= 15;
-           daa[15*20+0]=  673;
-           daa[15*20+1]=  3;
-           daa[15*20+2]=  398;
-           daa[15*20+3]=  44;
-           daa[15*20+4]=  664;
-           daa[15*20+5]=  52;
-           daa[15*20+6]=  31;
-           daa[15*20+7]=  226;
-           daa[15*20+8]=  11;
-           daa[15*20+9]=  7;
-           daa[15*20+10]= 8;
-           daa[15*20+11]= 144;
-           daa[15*20+12]= 112;
-           daa[15*20+13]= 36;
-           daa[15*20+14]= 87;
-           daa[16*20+0]=  244;
-           daa[16*20+1]=  0.2;
-           daa[16*20+2]=  166;
-           daa[16*20+3]=  0.2;
-           daa[16*20+4]=  183;
-           daa[16*20+5]=  44;
-           daa[16*20+6]=  43;
-           daa[16*20+7]=  0.2;
-           daa[16*20+8]=  19;
-           daa[16*20+9]=  204;
-           daa[16*20+10]= 48;
-           daa[16*20+11]= 70;
-           daa[16*20+12]= 289;
-           daa[16*20+13]= 14;
-           daa[16*20+14]= 47;
-           daa[16*20+15]= 660;
-           daa[17*20+0]=  0.2;
-           daa[17*20+1]=  0.2;
-           daa[17*20+2]=  8;
-           daa[17*20+3]=  0.2;
-           daa[17*20+4]=  22;
-           daa[17*20+5]=  7;
-           daa[17*20+6]=  11;
-           daa[17*20+7]=  2;
-           daa[17*20+8]=  0.2;
-           daa[17*20+9]=  0.2;
-           daa[17*20+10]= 21;
-           daa[17*20+11]= 16;
-           daa[17*20+12]= 71;
-           daa[17*20+13]= 54;
-           daa[17*20+14]= 0.2;
-           daa[17*20+15]= 2;
-           daa[17*20+16]= 0.2;
-           daa[18*20+0]=  1;
-           daa[18*20+1]=  4;
-           daa[18*20+2]=  251;
-           daa[18*20+3]=  0.2;
-           daa[18*20+4]=  72;
-           daa[18*20+5]=  87;
-           daa[18*20+6]=  8;
-           daa[18*20+7]=  9;
-           daa[18*20+8]=  191;
-           daa[18*20+9]=  12;
-           daa[18*20+10]= 20;
-           daa[18*20+11]= 117;
-           daa[18*20+12]= 71;
-           daa[18*20+13]= 792;
-           daa[18*20+14]= 18;
-           daa[18*20+15]= 30;
-           daa[18*20+16]= 46;
-           daa[18*20+17]= 38;
-           daa[19*20+0]=  340;
-           daa[19*20+1]=  0.2;
-           daa[19*20+2]=  23;
-           daa[19*20+3]=  0.2;
-           daa[19*20+4]=  350;
-           daa[19*20+5]=  0.2;
-           daa[19*20+6]=  14;
-           daa[19*20+7]=  3;
-           daa[19*20+8]=  0.2;
-           daa[19*20+9]=  1855;
-           daa[19*20+10]= 85;
-           daa[19*20+11]= 26;
-           daa[19*20+12]= 281;
-           daa[19*20+13]= 52;
-           daa[19*20+14]= 32;
-           daa[19*20+15]= 61;
-           daa[19*20+16]= 544;
-           daa[19*20+17]= 0.2;
-           daa[19*20+18]= 2;
-           
-           f[0]=  0.054116;
-           f[1]=  0.018227;
-           f[2]=  0.039903;
-           f[3]=  0.020160;
-           f[4]=  0.009709;
-           f[5]=  0.018781;
-           f[6]=  0.024289;
-           f[7]=  0.068183;
-           f[8]=  0.024518;
-           f[9]=  0.092638;
-           f[10]= 0.148658;
-           f[11]= 0.021718;
-           f[12]= 0.061453;
-           f[13]= 0.088668;
-           f[14]= 0.041826;
-           f[15]= 0.091030;
-           f[16]= 0.049194;
-           f[17]= 0.029786;
-           f[18]= 0.039443;
-           f[19]= 0.057700;
-          }
-          break;
-        case PLL_MTZOA:
-          {
-           daa[1*20+0]=   3.3;
-           daa[2*20+0]=   1.7;
-           daa[2*20+1]=   33.6;
-           daa[3*20+0]=   16.1;
-           daa[3*20+1]=   3.2;
-           daa[3*20+2]=   617.0;
-           daa[4*20+0]=   272.5;
-           daa[4*20+1]=   61.1;
-           daa[4*20+2]=   94.6;
-           daa[4*20+3]=   9.5;
-           daa[5*20+0]=   7.3;
-           daa[5*20+1]=   231.0;
-           daa[5*20+2]=   190.3;
-           daa[5*20+3]=   19.3;
-           daa[5*20+4]=   49.1;
-           daa[6*20+0]=   17.1;
-           daa[6*20+1]=   6.4;
-           daa[6*20+2]=   174.0;
-           daa[6*20+3]=   883.6;
-           daa[6*20+4]=   3.4;
-           daa[6*20+5]=   349.4;
-           daa[7*20+0]=   289.3;
-           daa[7*20+1]=   7.2;
-           daa[7*20+2]=   99.3;
-           daa[7*20+3]=   26.0;
-           daa[7*20+4]=   82.4;
-           daa[7*20+5]=   8.9;
-           daa[7*20+6]=   43.1;
-           daa[8*20+0]=   2.3;
-           daa[8*20+1]=   61.7;
-           daa[8*20+2]=   228.9;
-           daa[8*20+3]=   55.6;
-           daa[8*20+4]=   37.5;
-           daa[8*20+5]=   421.8;
-           daa[8*20+6]=   14.9;
-           daa[8*20+7]=   7.4;
-           daa[9*20+0]=   33.2;
-           daa[9*20+1]=   0.2;
-           daa[9*20+2]=   24.3;
-           daa[9*20+3]=   1.5;
-           daa[9*20+4]=   48.8;
-           daa[9*20+5]=   0.2;
-           daa[9*20+6]=   7.3;
-           daa[9*20+7]=   3.4;
-           daa[9*20+8]=   1.6;
-           daa[10*20+0]=  15.6;
-           daa[10*20+1]=  4.1;
-           daa[10*20+2]=  7.9;
-           daa[10*20+3]=  0.5;
-           daa[10*20+4]=  59.7;
-           daa[10*20+5]=  23.0;
-           daa[10*20+6]=  1.0;
-           daa[10*20+7]=  3.5;
-           daa[10*20+8]=  6.6;
-           daa[10*20+9]=  425.2;
-           daa[11*20+0]=  0.2;
-           daa[11*20+1]=  292.3;
-           daa[11*20+2]=  413.4;
-           daa[11*20+3]=  0.2;
-           daa[11*20+4]=  0.2;
-           daa[11*20+5]=  334.0;
-           daa[11*20+6]=  163.2;
-           daa[11*20+7]=  10.1;
-           daa[11*20+8]=  23.9;
-           daa[11*20+9]=  8.4;
-           daa[11*20+10]= 6.7;
-           daa[12*20+0]=  136.5;
-           daa[12*20+1]=  3.8;
-           daa[12*20+2]=  73.7;
-           daa[12*20+3]=  0.2;
-           daa[12*20+4]=  264.8;
-           daa[12*20+5]=  83.9;
-           daa[12*20+6]=  0.2;
-           daa[12*20+7]=  52.2;
-           daa[12*20+8]=  7.1;
-           daa[12*20+9]=  449.7;
-           daa[12*20+10]= 636.3;
-           daa[12*20+11]= 83.0;
-           daa[13*20+0]=  26.5;
-           daa[13*20+1]=  0.2;
-           daa[13*20+2]=  12.9;
-           daa[13*20+3]=  2.0;
-           daa[13*20+4]=  167.8;
-           daa[13*20+5]=  9.5;
-           daa[13*20+6]=  0.2;
-           daa[13*20+7]=  5.8;
-           daa[13*20+8]=  13.1;
-           daa[13*20+9]=  90.3;
-           daa[13*20+10]= 234.2;
-           daa[13*20+11]= 16.3;
-           daa[13*20+12]= 215.6;
-           daa[14*20+0]=  61.8;
-           daa[14*20+1]=  7.5;
-           daa[14*20+2]=  22.6;
-           daa[14*20+3]=  0.2;
-           daa[14*20+4]=  8.1;
-           daa[14*20+5]=  52.2;
-           daa[14*20+6]=  20.6;
-           daa[14*20+7]=  1.3;
-           daa[14*20+8]=  15.6;
-           daa[14*20+9]=  2.6;
-           daa[14*20+10]= 11.4;
-           daa[14*20+11]= 24.3;
-           daa[14*20+12]= 5.4;
-           daa[14*20+13]= 10.5;
-           daa[15*20+0]=  644.9;
-           daa[15*20+1]=  11.8;
-           daa[15*20+2]=  420.2;
-           daa[15*20+3]=  51.4;
-           daa[15*20+4]=  656.3;
-           daa[15*20+5]=  96.4;
-           daa[15*20+6]=  38.4;
-           daa[15*20+7]=  257.1;
-           daa[15*20+8]=  23.1;
-           daa[15*20+9]=  7.2;
-           daa[15*20+10]= 15.2;
-           daa[15*20+11]= 144.9;
-           daa[15*20+12]= 95.3;
-           daa[15*20+13]= 32.2;
-           daa[15*20+14]= 79.7;
-           daa[16*20+0]=  378.1;
-           daa[16*20+1]=  3.2;
-           daa[16*20+2]=  184.6;
-           daa[16*20+3]=  2.3;
-           daa[16*20+4]=  199.0;
-           daa[16*20+5]=  39.4;
-           daa[16*20+6]=  34.5;
-           daa[16*20+7]=  5.2;
-           daa[16*20+8]=  19.4;
-           daa[16*20+9]=  222.3;
-           daa[16*20+10]= 50.0;
-           daa[16*20+11]= 75.5;
-           daa[16*20+12]= 305.1;
-           daa[16*20+13]= 19.3;
-           daa[16*20+14]= 56.9;
-           daa[16*20+15]= 666.3;
-           daa[17*20+0]=  3.1;
-           daa[17*20+1]=  16.9;
-           daa[17*20+2]=  6.4;
-           daa[17*20+3]=  0.2;
-           daa[17*20+4]=  36.1;
-           daa[17*20+5]=  6.1;
-           daa[17*20+6]=  3.5;
-           daa[17*20+7]=  12.3;
-           daa[17*20+8]=  4.5;
-           daa[17*20+9]=  9.7;
-           daa[17*20+10]= 27.2;
-           daa[17*20+11]= 6.6;
-           daa[17*20+12]= 48.7;
-           daa[17*20+13]= 58.2;
-           daa[17*20+14]= 1.3;
-           daa[17*20+15]= 10.3;
-           daa[17*20+16]= 3.6;
-           daa[18*20+0]=  2.1;
-           daa[18*20+1]=  13.8;
-           daa[18*20+2]=  141.6;
-           daa[18*20+3]=  13.9;
-           daa[18*20+4]=  76.7;
-           daa[18*20+5]=  52.3;
-           daa[18*20+6]=  10.0;
-           daa[18*20+7]=  4.3;
-           daa[18*20+8]=  266.5;
-           daa[18*20+9]=  13.1;
-           daa[18*20+10]= 5.7;
-           daa[18*20+11]= 45.0;
-           daa[18*20+12]= 41.4;
-           daa[18*20+13]= 590.5;
-           daa[18*20+14]= 4.2;
-           daa[18*20+15]= 29.7;
-           daa[18*20+16]= 29.0;
-           daa[18*20+17]= 79.8;
-           daa[19*20+0]=  321.9;
-           daa[19*20+1]=  5.1;
-           daa[19*20+2]=  7.1;
-           daa[19*20+3]=  3.7;
-           daa[19*20+4]=  243.8;
-           daa[19*20+5]=  9.0;
-           daa[19*20+6]=  16.3;
-           daa[19*20+7]=  23.7;
-           daa[19*20+8]=  0.3;
-           daa[19*20+9]=  1710.6;
-           daa[19*20+10]= 126.1;
-           daa[19*20+11]= 11.1;
-           daa[19*20+12]= 279.6;
-           daa[19*20+13]= 59.6;
-           daa[19*20+14]= 17.9;
-           daa[19*20+15]= 49.5;
-           daa[19*20+16]= 396.4;
-           daa[19*20+17]= 13.7;
-           daa[19*20+18]= 15.6;
-           
-           f[0]=  0.069;
-           f[1]=  0.021;
-           f[2]=  0.030;
-           f[3]=  0.020;
-           f[4]=  0.010;
-           f[5]=  0.019;
-           f[6]=  0.025;
-           f[7]=  0.072;
-           f[8]=  0.027;
-           f[9]=  0.085;
-           f[10]= 0.157;
-           f[11]= 0.019;
-           f[12]= 0.051;
-           f[13]= 0.082;
-           f[14]= 0.045;
-           f[15]= 0.081;
-           f[16]= 0.056;
-           f[17]= 0.028;
-           f[18]= 0.037;
-           f[19]= 0.066;
-          }
-          break;
-        case PLL_PMB:
-          {
-           daa[1*20+0]=   0.674995699;
-           daa[2*20+0]=   0.589645178;
-           daa[2*20+1]=   1.189067034;
-           daa[3*20+0]=   0.462499504;
-           daa[3*20+1]=   0.605460903;
-           daa[3*20+2]=   3.573373315;
-           daa[4*20+0]=   1.065445546;
-           daa[4*20+1]=   0.31444833;
-           daa[4*20+2]=   0.589852457;
-           daa[4*20+3]=   0.246951424;
-           daa[5*20+0]=   1.111766964;
-           daa[5*20+1]=   2.967840934;
-           daa[5*20+2]=   2.299755865;
-           daa[5*20+3]=   1.686058219;
-           daa[5*20+4]=   0.245163782;
-           daa[6*20+0]=   1.046334652;
-           daa[6*20+1]=   1.201770702;
-           daa[6*20+2]=   1.277836748;
-           daa[6*20+3]=   4.399995525;
-           daa[6*20+4]=   0.091071867;
-           daa[6*20+5]=   4.15967899;
-           daa[7*20+0]=   1.587964372;
-           daa[7*20+1]=   0.523770553;
-           daa[7*20+2]=   1.374854049;
-           daa[7*20+3]=   0.734992057;
-           daa[7*20+4]=   0.31706632;
-           daa[7*20+5]=   0.596789898;
-           daa[7*20+6]=   0.463812837;
-           daa[8*20+0]=   0.580830874;
-           daa[8*20+1]=   1.457127446;
-           daa[8*20+2]=   2.283037894;
-           daa[8*20+3]=   0.839348444;
-           daa[8*20+4]=   0.411543728;
-           daa[8*20+5]=   1.812173605;
-           daa[8*20+6]=   0.877842609;
-           daa[8*20+7]=   0.476331437;
-           daa[9*20+0]=   0.464590585;
-           daa[9*20+1]=   0.35964586;
-           daa[9*20+2]=   0.426069419;
-           daa[9*20+3]=   0.266775558;
-           daa[9*20+4]=   0.417547309;
-           daa[9*20+5]=   0.315256838;
-           daa[9*20+6]=   0.30421529;
-           daa[9*20+7]=   0.180198883;
-           daa[9*20+8]=   0.285186418;
-           daa[10*20+0]=  0.804404505;
-           daa[10*20+1]=  0.520701585;
-           daa[10*20+2]=  0.41009447;
-           daa[10*20+3]=  0.269124919;
-           daa[10*20+4]=  0.450795211;
-           daa[10*20+5]=  0.625792937;
-           daa[10*20+6]=  0.32078471;
-           daa[10*20+7]=  0.259854426;
-           daa[10*20+8]=  0.363981358;
-           daa[10*20+9]=  4.162454693;
-           daa[11*20+0]=  0.831998835;
-           daa[11*20+1]=  4.956476453;
-           daa[11*20+2]=  2.037575629;
-           daa[11*20+3]=  1.114178954;
-           daa[11*20+4]=  0.274163536;
-           daa[11*20+5]=  3.521346591;
-           daa[11*20+6]=  2.415974716;
-           daa[11*20+7]=  0.581001076;
-           daa[11*20+8]=  0.985885486;
-           daa[11*20+9]=  0.374784947;
-           daa[11*20+10]= 0.498011337;
-           daa[12*20+0]=  1.546725076;
-           daa[12*20+1]=  0.81346254;
-           daa[12*20+2]=  0.737846301;
-           daa[12*20+3]=  0.341932741;
-           daa[12*20+4]=  0.618614612;
-           daa[12*20+5]=  2.067388546;
-           daa[12*20+6]=  0.531773639;
-           daa[12*20+7]=  0.465349326;
-           daa[12*20+8]=  0.380925433;
-           daa[12*20+9]=  3.65807012;
-           daa[12*20+10]= 5.002338375;
-           daa[12*20+11]= 0.661095832;
-           daa[13*20+0]=  0.546169219;
-           daa[13*20+1]=  0.303437244;
-           daa[13*20+2]=  0.425193716;
-           daa[13*20+3]=  0.219005213;
-           daa[13*20+4]=  0.669206193;
-           daa[13*20+5]=  0.406042546;
-           daa[13*20+6]=  0.224154698;
-           daa[13*20+7]=  0.35402891;
-           daa[13*20+8]=  0.576231691;
-           daa[13*20+9]=  1.495264661;
-           daa[13*20+10]= 2.392638293;
-           daa[13*20+11]= 0.269496317;
-           daa[13*20+12]= 2.306919847;
-           daa[14*20+0]=  1.241586045;
-           daa[14*20+1]=  0.65577338;
-           daa[14*20+2]=  0.711495595;
-           daa[14*20+3]=  0.775624818;
-           daa[14*20+4]=  0.198679914;
-           daa[14*20+5]=  0.850116543;
-           daa[14*20+6]=  0.794584081;
-           daa[14*20+7]=  0.588254139;
-           daa[14*20+8]=  0.456058589;
-           daa[14*20+9]=  0.366232942;
-           daa[14*20+10]= 0.430073179;
-           daa[14*20+11]= 1.036079005;
-           daa[14*20+12]= 0.337502282;
-           daa[14*20+13]= 0.481144863;
-           daa[15*20+0]=  3.452308792;
-           daa[15*20+1]=  0.910144334;
-           daa[15*20+2]=  2.572577221;
-           daa[15*20+3]=  1.440896785;
-           daa[15*20+4]=  0.99870098;
-           daa[15*20+5]=  1.348272505;
-           daa[15*20+6]=  1.205509425;
-           daa[15*20+7]=  1.402122097;
-           daa[15*20+8]=  0.799966711;
-           daa[15*20+9]=  0.530641901;
-           daa[15*20+10]= 0.402471997;
-           daa[15*20+11]= 1.234648153;
-           daa[15*20+12]= 0.945453716;
-           daa[15*20+13]= 0.613230817;
-           daa[15*20+14]= 1.217683028;
-           daa[16*20+0]=  1.751412803;
-           daa[16*20+1]=  0.89517149;
-           daa[16*20+2]=  1.823161023;
-           daa[16*20+3]=  0.994227284;
-           daa[16*20+4]=  0.847312432;
-           daa[16*20+5]=  1.320626678;
-           daa[16*20+6]=  0.949599791;
-           daa[16*20+7]=  0.542185658;
-           daa[16*20+8]=  0.83039281;
-           daa[16*20+9]=  1.114132523;
-           daa[16*20+10]= 0.779827336;
-           daa[16*20+11]= 1.290709079;
-           daa[16*20+12]= 1.551488041;
-           daa[16*20+13]= 0.718895136;
-           daa[16*20+14]= 0.780913179;
-           daa[16*20+15]= 4.448982584;
-           daa[17*20+0]=  0.35011051;
-           daa[17*20+1]=  0.618778365;
-           daa[17*20+2]=  0.422407388;
-           daa[17*20+3]=  0.362495245;
-           daa[17*20+4]=  0.445669347;
-           daa[17*20+5]=  0.72038474;
-           daa[17*20+6]=  0.261258229;
-           daa[17*20+7]=  0.37874827;
-           daa[17*20+8]=  0.72436751;
-           daa[17*20+9]=  0.516260502;
-           daa[17*20+10]= 0.794797115;
-           daa[17*20+11]= 0.43340962;
-           daa[17*20+12]= 0.768395107;
-           daa[17*20+13]= 3.29519344;
-           daa[17*20+14]= 0.499869138;
-           daa[17*20+15]= 0.496334956;
-           daa[17*20+16]= 0.38372361;
-           daa[18*20+0]=  0.573154753;
-           daa[18*20+1]=  0.628599063;
-           daa[18*20+2]=  0.720013799;
-           daa[18*20+3]=  0.436220437;
-           daa[18*20+4]=  0.55626163;
-           daa[18*20+5]=  0.728970584;
-           daa[18*20+6]=  0.50720003;
-           daa[18*20+7]=  0.284727562;
-           daa[18*20+8]=  2.210952064;
-           daa[18*20+9]=  0.570562395;
-           daa[18*20+10]= 0.811019594;
-           daa[18*20+11]= 0.664884513;
-           daa[18*20+12]= 0.93253606;
-           daa[18*20+13]= 5.894735673;
-           daa[18*20+14]= 0.433748126;
-           daa[18*20+15]= 0.593795813;
-           daa[18*20+16]= 0.523549536;
-           daa[18*20+17]= 2.996248013;
-           daa[19*20+0]=  2.063050067;
-           daa[19*20+1]=  0.388680158;
-           daa[19*20+2]=  0.474418852;
-           daa[19*20+3]=  0.275658381;
-           daa[19*20+4]=  0.998911631;
-           daa[19*20+5]=  0.634408285;
-           daa[19*20+6]=  0.527640634;
-           daa[19*20+7]=  0.314700907;
-           daa[19*20+8]=  0.305792277;
-           daa[19*20+9]=  8.002789424;
-           daa[19*20+10]= 2.113077156;
-           daa[19*20+11]= 0.526184203;
-           daa[19*20+12]= 1.737356217;
-           daa[19*20+13]= 0.983844803;
-           daa[19*20+14]= 0.551333603;
-           daa[19*20+15]= 0.507506011;
-           daa[19*20+16]= 1.89965079;
-           daa[19*20+17]= 0.429570747;
-           daa[19*20+18]= 0.716795463;
-           
-           f[0]=  0.076;
-           f[1]=  0.054;
-           f[2]=  0.038;
-           f[3]=  0.045;
-           f[4]=  0.028;
-           f[5]=  0.034;
-           f[6]=  0.053;
-           f[7]=  0.078;
-           f[8]=  0.030;
-           f[9]=  0.060;
-           f[10]= 0.096;
-           f[11]= 0.052;
-           f[12]= 0.022;
-           f[13]= 0.045;
-           f[14]= 0.042;
-           f[15]= 0.068;
-           f[16]= 0.056;
-           f[17]= 0.016;
-           f[18]= 0.036;
-           f[19]= 0.071;
-          }
-          break;
-        case PLL_HIVB:
-          {
-           daa[1*20+0]=   0.30750700;
-           daa[2*20+0]=   0.00500000;
-           daa[2*20+1]=   0.29554300;
-           daa[3*20+0]=   1.45504000;
-           daa[3*20+1]=   0.00500000;
-           daa[3*20+2]=   17.66120000;
-           daa[4*20+0]=   0.12375800;
-           daa[4*20+1]=   0.35172100;
-           daa[4*20+2]=   0.08606420;
-           daa[4*20+3]=   0.00500000;
-           daa[5*20+0]=   0.05511280;
-           daa[5*20+1]=   3.42150000;
-           daa[5*20+2]=   0.67205200;
-           daa[5*20+3]=   0.00500000;
-           daa[5*20+4]=   0.00500000;
-           daa[6*20+0]=   1.48135000;
-           daa[6*20+1]=   0.07492180;
-           daa[6*20+2]=   0.07926330;
-           daa[6*20+3]=   10.58720000;
-           daa[6*20+4]=   0.00500000;
-           daa[6*20+5]=   2.56020000;
-           daa[7*20+0]=   2.13536000;
-           daa[7*20+1]=   3.65345000;
-           daa[7*20+2]=   0.32340100;
-           daa[7*20+3]=   2.83806000;
-           daa[7*20+4]=   0.89787100;
-           daa[7*20+5]=   0.06191370;
-           daa[7*20+6]=   3.92775000;
-           daa[8*20+0]=   0.08476130;
-           daa[8*20+1]=   9.04044000;
-           daa[8*20+2]=   7.64585000;
-           daa[8*20+3]=   1.91690000;
-           daa[8*20+4]=   0.24007300;
-           daa[8*20+5]=   7.05545000;
-           daa[8*20+6]=   0.11974000;
-           daa[8*20+7]=   0.00500000;
-           daa[9*20+0]=   0.00500000;
-           daa[9*20+1]=   0.67728900;
-           daa[9*20+2]=   0.68056500;
-           daa[9*20+3]=   0.01767920;
-           daa[9*20+4]=   0.00500000;
-           daa[9*20+5]=   0.00500000;
-           daa[9*20+6]=   0.00609079;
-           daa[9*20+7]=   0.00500000;
-           daa[9*20+8]=   0.10311100;
-           daa[10*20+0]=  0.21525600;
-           daa[10*20+1]=  0.70142700;
-           daa[10*20+2]=  0.00500000;
-           daa[10*20+3]=  0.00876048;
-           daa[10*20+4]=  0.12977700;
-           daa[10*20+5]=  1.49456000;
-           daa[10*20+6]=  0.00500000;
-           daa[10*20+7]=  0.00500000;
-           daa[10*20+8]=  1.74171000;
-           daa[10*20+9]=  5.95879000;
-           daa[11*20+0]=  0.00500000;
-           daa[11*20+1]=  20.45000000;
-           daa[11*20+2]=  7.90443000;
-           daa[11*20+3]=  0.00500000;
-           daa[11*20+4]=  0.00500000;
-           daa[11*20+5]=  6.54737000;
-           daa[11*20+6]=  4.61482000;
-           daa[11*20+7]=  0.52170500;
-           daa[11*20+8]=  0.00500000;
-           daa[11*20+9]=  0.32231900;
-           daa[11*20+10]= 0.08149950;
-           daa[12*20+0]=  0.01866430;
-           daa[12*20+1]=  2.51394000;
-           daa[12*20+2]=  0.00500000;
-           daa[12*20+3]=  0.00500000;
-           daa[12*20+4]=  0.00500000;
-           daa[12*20+5]=  0.30367600;
-           daa[12*20+6]=  0.17578900;
-           daa[12*20+7]=  0.00500000;
-           daa[12*20+8]=  0.00500000;
-           daa[12*20+9]=  11.20650000;
-           daa[12*20+10]= 5.31961000;
-           daa[12*20+11]= 1.28246000;
-           daa[13*20+0]=  0.01412690;
-           daa[13*20+1]=  0.00500000;
-           daa[13*20+2]=  0.00500000;
-           daa[13*20+3]=  0.00500000;
-           daa[13*20+4]=  9.29815000;
-           daa[13*20+5]=  0.00500000;
-           daa[13*20+6]=  0.00500000;
-           daa[13*20+7]=  0.29156100;
-           daa[13*20+8]=  0.14555800;
-           daa[13*20+9]=  3.39836000;
-           daa[13*20+10]= 8.52484000;
-           daa[13*20+11]= 0.03426580;
-           daa[13*20+12]= 0.18802500;
-           daa[14*20+0]=  2.12217000;
-           daa[14*20+1]=  1.28355000;
-           daa[14*20+2]=  0.00739578;
-           daa[14*20+3]=  0.03426580;
-           daa[14*20+4]=  0.00500000;
-           daa[14*20+5]=  4.47211000;
-           daa[14*20+6]=  0.01202260;
-           daa[14*20+7]=  0.00500000;
-           daa[14*20+8]=  2.45318000;
-           daa[14*20+9]=  0.04105930;
-           daa[14*20+10]= 2.07757000;
-           daa[14*20+11]= 0.03138620;
-           daa[14*20+12]= 0.00500000;
-           daa[14*20+13]= 0.00500000;
-           daa[15*20+0]=  2.46633000;
-           daa[15*20+1]=  3.47910000;
-           daa[15*20+2]=  13.14470000;
-           daa[15*20+3]=  0.52823000;
-           daa[15*20+4]=  4.69314000;
-           daa[15*20+5]=  0.11631100;
-           daa[15*20+6]=  0.00500000;
-           daa[15*20+7]=  4.38041000;
-           daa[15*20+8]=  0.38274700;
-           daa[15*20+9]=  1.21803000;
-           daa[15*20+10]= 0.92765600;
-           daa[15*20+11]= 0.50411100;
-           daa[15*20+12]= 0.00500000;
-           daa[15*20+13]= 0.95647200;
-           daa[15*20+14]= 5.37762000;
-           daa[16*20+0]=  15.91830000;
-           daa[16*20+1]=  2.86868000;
-           daa[16*20+2]=  6.88667000;
-           daa[16*20+3]=  0.27472400;
-           daa[16*20+4]=  0.73996900;
-           daa[16*20+5]=  0.24358900;
-           daa[16*20+6]=  0.28977400;
-           daa[16*20+7]=  0.36961500;
-           daa[16*20+8]=  0.71159400;
-           daa[16*20+9]=  8.61217000;
-           daa[16*20+10]= 0.04376730;
-           daa[16*20+11]= 4.67142000;
-           daa[16*20+12]= 4.94026000;
-           daa[16*20+13]= 0.01412690;
-           daa[16*20+14]= 2.01417000;
-           daa[16*20+15]= 8.93107000;
-           daa[17*20+0]=  0.00500000;
-           daa[17*20+1]=  0.99133800;
-           daa[17*20+2]=  0.00500000;
-           daa[17*20+3]=  0.00500000;
-           daa[17*20+4]=  2.63277000;
-           daa[17*20+5]=  0.02665600;
-           daa[17*20+6]=  0.00500000;
-           daa[17*20+7]=  1.21674000;
-           daa[17*20+8]=  0.06951790;
-           daa[17*20+9]=  0.00500000;
-           daa[17*20+10]= 0.74884300;
-           daa[17*20+11]= 0.00500000;
-           daa[17*20+12]= 0.08907800;
-           daa[17*20+13]= 0.82934300;
-           daa[17*20+14]= 0.04445060;
-           daa[17*20+15]= 0.02487280;
-           daa[17*20+16]= 0.00500000;
-           daa[18*20+0]=  0.00500000;
-           daa[18*20+1]=  0.00991826;
-           daa[18*20+2]=  1.76417000;
-           daa[18*20+3]=  0.67465300;
-           daa[18*20+4]=  7.57932000;
-           daa[18*20+5]=  0.11303300;
-           daa[18*20+6]=  0.07926330;
-           daa[18*20+7]=  0.00500000;
-           daa[18*20+8]=  18.69430000;
-           daa[18*20+9]=  0.14816800;
-           daa[18*20+10]= 0.11198600;
-           daa[18*20+11]= 0.00500000;
-           daa[18*20+12]= 0.00500000;
-           daa[18*20+13]= 15.34000000;
-           daa[18*20+14]= 0.03043810;
-           daa[18*20+15]= 0.64802400;
-           daa[18*20+16]= 0.10565200;
-           daa[18*20+17]= 1.28022000;
-           daa[19*20+0]=  7.61428000;
-           daa[19*20+1]=  0.08124540;
-           daa[19*20+2]=  0.02665600;
-           daa[19*20+3]=  1.04793000;
-           daa[19*20+4]=  0.42002700;
-           daa[19*20+5]=  0.02091530;
-           daa[19*20+6]=  1.02847000;
-           daa[19*20+7]=  0.95315500;
-           daa[19*20+8]=  0.00500000;
-           daa[19*20+9]=  17.73890000;
-           daa[19*20+10]= 1.41036000;
-           daa[19*20+11]= 0.26582900;
-           daa[19*20+12]= 6.85320000;
-           daa[19*20+13]= 0.72327400;
-           daa[19*20+14]= 0.00500000;
-           daa[19*20+15]= 0.07492180;
-           daa[19*20+16]= 0.70922600;
-           daa[19*20+17]= 0.00500000;
-           daa[19*20+18]= 0.04105930;
-           
-	   f[0]= 0.060490222;           f[1]= 0.066039665;           f[2]= 0.044127815;           f[3]= 0.042109048;
-           f[4]= 0.020075899;           f[5]= 0.053606488;           f[6]= 0.071567447;           f[7]= 0.072308239;
-           f[8]= 0.022293943;           f[9]= 0.069730629;           f[10]= 0.098851122;          f[11]= 0.056968211;
-           f[12]= 0.019768318;          f[13]= 0.028809447;          f[14]= 0.046025282;          f[15]= 0.05060433;
-           f[16]= 0.053636813;          f[17]= 0.033011601;          f[18]= 0.028350243;          f[19]= 0.061625237;
-          }
-          break;
-        case PLL_HIVW:
-          {
-           daa[1*20+0]=   0.0744808;
-           daa[2*20+0]=   0.6175090;
-           daa[2*20+1]=   0.1602400;
-           daa[3*20+0]=   4.4352100;
-           daa[3*20+1]=   0.0674539;
-           daa[3*20+2]=   29.4087000;
-           daa[4*20+0]=   0.1676530;
-           daa[4*20+1]=   2.8636400;
-           daa[4*20+2]=   0.0604932;
-           daa[4*20+3]=   0.0050000;
-           daa[5*20+0]=   0.0050000;
-           daa[5*20+1]=   10.6746000;
-           daa[5*20+2]=   0.3420680;
-           daa[5*20+3]=   0.0050000;
-           daa[5*20+4]=   0.0050000;
-           daa[6*20+0]=   5.5632500;
-           daa[6*20+1]=   0.0251632;
-           daa[6*20+2]=   0.2015260;
-           daa[6*20+3]=   12.1233000;
-           daa[6*20+4]=   0.0050000;
-           daa[6*20+5]=   3.2065600;
-           daa[7*20+0]=   1.8685000;
-           daa[7*20+1]=   13.4379000;
-           daa[7*20+2]=   0.0604932;
-           daa[7*20+3]=   10.3969000;
-           daa[7*20+4]=   0.0489798;
-           daa[7*20+5]=   0.0604932;
-           daa[7*20+6]=   14.7801000;
-           daa[8*20+0]=   0.0050000;
-           daa[8*20+1]=   6.8440500;
-           daa[8*20+2]=   8.5987600;
-           daa[8*20+3]=   2.3177900;
-           daa[8*20+4]=   0.0050000;
-           daa[8*20+5]=   18.5465000;
-           daa[8*20+6]=   0.0050000;
-           daa[8*20+7]=   0.0050000;
-           daa[9*20+0]=   0.0050000;
-           daa[9*20+1]=   1.3406900;
-           daa[9*20+2]=   0.9870280;
-           daa[9*20+3]=   0.1451240;
-           daa[9*20+4]=   0.0050000;
-           daa[9*20+5]=   0.0342252;
-           daa[9*20+6]=   0.0390512;
-           daa[9*20+7]=   0.0050000;
-           daa[9*20+8]=   0.0050000;
-           daa[10*20+0]=  0.1602400;
-           daa[10*20+1]=  0.5867570;
-           daa[10*20+2]=  0.0050000;
-           daa[10*20+3]=  0.0050000;
-           daa[10*20+4]=  0.0050000;
-           daa[10*20+5]=  2.8904800;
-           daa[10*20+6]=  0.1298390;
-           daa[10*20+7]=  0.0489798;
-           daa[10*20+8]=  1.7638200;
-           daa[10*20+9]=  9.1024600;
-           daa[11*20+0]=  0.5927840;
-           daa[11*20+1]=  39.8897000;
-           daa[11*20+2]=  10.6655000;
-           daa[11*20+3]=  0.8943130;
-           daa[11*20+4]=  0.0050000;
-           daa[11*20+5]=  13.0705000;
-           daa[11*20+6]=  23.9626000;
-           daa[11*20+7]=  0.2794250;
-           daa[11*20+8]=  0.2240600;
-           daa[11*20+9]=  0.8174810;
-           daa[11*20+10]= 0.0050000;
-           daa[12*20+0]=  0.0050000;
-           daa[12*20+1]=  3.2865200;
-           daa[12*20+2]=  0.2015260;
-           daa[12*20+3]=  0.0050000;
-           daa[12*20+4]=  0.0050000;
-           daa[12*20+5]=  0.0050000;
-           daa[12*20+6]=  0.0050000;
-           daa[12*20+7]=  0.0489798;
-           daa[12*20+8]=  0.0050000;
-           daa[12*20+9]=  17.3064000;
-           daa[12*20+10]= 11.3839000;
-           daa[12*20+11]= 4.0956400;
-           daa[13*20+0]=  0.5979230;
-           daa[13*20+1]=  0.0050000;
-           daa[13*20+2]=  0.0050000;
-           daa[13*20+3]=  0.0050000;
-           daa[13*20+4]=  0.3629590;
-           daa[13*20+5]=  0.0050000;
-           daa[13*20+6]=  0.0050000;
-           daa[13*20+7]=  0.0050000;
-           daa[13*20+8]=  0.0050000;
-           daa[13*20+9]=  1.4828800;
-           daa[13*20+10]= 7.4878100;
-           daa[13*20+11]= 0.0050000;
-           daa[13*20+12]= 0.0050000;
-           daa[14*20+0]=  1.0098100;
-           daa[14*20+1]=  0.4047230;
-           daa[14*20+2]=  0.3448480;
-           daa[14*20+3]=  0.0050000;
-           daa[14*20+4]=  0.0050000;
-           daa[14*20+5]=  3.0450200;
-           daa[14*20+6]=  0.0050000;
-           daa[14*20+7]=  0.0050000;
-           daa[14*20+8]=  13.9444000;
-           daa[14*20+9]=  0.0050000;
-           daa[14*20+10]= 9.8309500;
-           daa[14*20+11]= 0.1119280;
-           daa[14*20+12]= 0.0050000;
-           daa[14*20+13]= 0.0342252;
-           daa[15*20+0]=  8.5942000;
-           daa[15*20+1]=  8.3502400;
-           daa[15*20+2]=  14.5699000;
-           daa[15*20+3]=  0.4278810;
-           daa[15*20+4]=  1.1219500;
-           daa[15*20+5]=  0.1602400;
-           daa[15*20+6]=  0.0050000;
-           daa[15*20+7]=  6.2796600;
-           daa[15*20+8]=  0.7251570;
-           daa[15*20+9]=  0.7400910;
-           daa[15*20+10]= 6.1439600;
-           daa[15*20+11]= 0.0050000;
-           daa[15*20+12]= 0.3925750;
-           daa[15*20+13]= 4.2793900;
-           daa[15*20+14]= 14.2490000;
-           daa[16*20+0]=  24.1422000;
-           daa[16*20+1]=  0.9282030;
-           daa[16*20+2]=  4.5420600;
-           daa[16*20+3]=  0.6303950;
-           daa[16*20+4]=  0.0050000;
-           daa[16*20+5]=  0.2030910;
-           daa[16*20+6]=  0.4587430;
-           daa[16*20+7]=  0.0489798;
-           daa[16*20+8]=  0.9595600;
-           daa[16*20+9]=  9.3634500;
-           daa[16*20+10]= 0.0050000;
-           daa[16*20+11]= 4.0480200;
-           daa[16*20+12]= 7.4131300;
-           daa[16*20+13]= 0.1145120;
-           daa[16*20+14]= 4.3370100;
-           daa[16*20+15]= 6.3407900;
-           daa[17*20+0]=  0.0050000;
-           daa[17*20+1]=  5.9656400;
-           daa[17*20+2]=  0.0050000;
-           daa[17*20+3]=  0.0050000;
-           daa[17*20+4]=  5.4989400;
-           daa[17*20+5]=  0.0443298;
-           daa[17*20+6]=  0.0050000;
-           daa[17*20+7]=  2.8258000;
-           daa[17*20+8]=  0.0050000;
-           daa[17*20+9]=  0.0050000;
-           daa[17*20+10]= 1.3703100;
-           daa[17*20+11]= 0.0050000;
-           daa[17*20+12]= 0.0050000;
-           daa[17*20+13]= 0.0050000;
-           daa[17*20+14]= 0.0050000;
-           daa[17*20+15]= 1.1015600;
-           daa[17*20+16]= 0.0050000;
-           daa[18*20+0]=  0.0050000;
-           daa[18*20+1]=  0.0050000;
-           daa[18*20+2]=  5.0647500;
-           daa[18*20+3]=  2.2815400;
-           daa[18*20+4]=  8.3483500;
-           daa[18*20+5]=  0.0050000;
-           daa[18*20+6]=  0.0050000;
-           daa[18*20+7]=  0.0050000;
-           daa[18*20+8]=  47.4889000;
-           daa[18*20+9]=  0.1145120;
-           daa[18*20+10]= 0.0050000;
-           daa[18*20+11]= 0.0050000;
-           daa[18*20+12]= 0.5791980;
-           daa[18*20+13]= 4.1272800;
-           daa[18*20+14]= 0.0050000;
-           daa[18*20+15]= 0.9331420;
-           daa[18*20+16]= 0.4906080;
-           daa[18*20+17]= 0.0050000;
-           daa[19*20+0]=  24.8094000;
-           daa[19*20+1]=  0.2794250;
-           daa[19*20+2]=  0.0744808;
-           daa[19*20+3]=  2.9178600;
-           daa[19*20+4]=  0.0050000;
-           daa[19*20+5]=  0.0050000;
-           daa[19*20+6]=  2.1995200;
-           daa[19*20+7]=  2.7962200;
-           daa[19*20+8]=  0.8274790;
-           daa[19*20+9]=  24.8231000;
-           daa[19*20+10]= 2.9534400;
-           daa[19*20+11]= 0.1280650;
-           daa[19*20+12]= 14.7683000;
-           daa[19*20+13]= 2.2800000;
-           daa[19*20+14]= 0.0050000;
-           daa[19*20+15]= 0.8626370;
-           daa[19*20+16]= 0.0050000;
-           daa[19*20+17]= 0.0050000;
-           daa[19*20+18]= 1.3548200;
-           
-	   f[0]= 0.0377494;             f[1]= 0.057321;              f[2]= 0.0891129;             f[3]= 0.0342034;
-           f[4]= 0.0240105;             f[5]= 0.0437824;             f[6]= 0.0618606;             f[7]= 0.0838496;
-           f[8]= 0.0156076;             f[9]= 0.0983641;             f[10]= 0.0577867;            f[11]= 0.0641682;
-           f[12]= 0.0158419;            f[13]= 0.0422741;            f[14]= 0.0458601;            f[15]= 0.0550846;
-           f[16]= 0.0813774;            f[17]= 0.019597;             f[18]= 0.0205847;            f[19]= 0.0515638;
-          }
-          break;
-        case PLL_JTTDCMUT:
-          {
-           daa[1*20+0]=   0.531678;
-           daa[2*20+0]=   0.557967;
-           daa[2*20+1]=   0.451095;
-           daa[3*20+0]=   0.827445;
-           daa[3*20+1]=   0.154899;
-           daa[3*20+2]=   5.549530;
-           daa[4*20+0]=   0.574478;
-           daa[4*20+1]=   1.019843;
-           daa[4*20+2]=   0.313311;
-           daa[4*20+3]=   0.105625;
-           daa[5*20+0]=   0.556725;
-           daa[5*20+1]=   3.021995;
-           daa[5*20+2]=   0.768834;
-           daa[5*20+3]=   0.521646;
-           daa[5*20+4]=   0.091304;
-           daa[6*20+0]=   1.066681;
-           daa[6*20+1]=   0.318483;
-           daa[6*20+2]=   0.578115;
-           daa[6*20+3]=   7.766557;
-           daa[6*20+4]=   0.053907;
-           daa[6*20+5]=   3.417706;
-           daa[7*20+0]=   1.740159;
-           daa[7*20+1]=   1.359652;
-           daa[7*20+2]=   0.773313;
-           daa[7*20+3]=   1.272434;
-           daa[7*20+4]=   0.546389;
-           daa[7*20+5]=   0.231294;
-           daa[7*20+6]=   1.115632;
-           daa[8*20+0]=   0.219970;
-           daa[8*20+1]=   3.210671;
-           daa[8*20+2]=   4.025778;
-           daa[8*20+3]=   1.032342;
-           daa[8*20+4]=   0.724998;
-           daa[8*20+5]=   5.684080;
-           daa[8*20+6]=   0.243768;
-           daa[8*20+7]=   0.201696;
-           daa[9*20+0]=   0.361684;
-           daa[9*20+1]=   0.239195;
-           daa[9*20+2]=   0.491003;
-           daa[9*20+3]=   0.115968;
-           daa[9*20+4]=   0.150559;
-           daa[9*20+5]=   0.078270;
-           daa[9*20+6]=   0.111773;
-           daa[9*20+7]=   0.053769;
-           daa[9*20+8]=   0.181788;
-           daa[10*20+0]=  0.310007;
-           daa[10*20+1]=  0.372261;
-           daa[10*20+2]=  0.137289;
-           daa[10*20+3]=  0.061486;
-           daa[10*20+4]=  0.164593;
-           daa[10*20+5]=  0.709004;
-           daa[10*20+6]=  0.097485;
-           daa[10*20+7]=  0.069492;
-           daa[10*20+8]=  0.540571;
-           daa[10*20+9]=  2.335139;
-           daa[11*20+0]=  0.369437;
-           daa[11*20+1]=  6.529255;
-           daa[11*20+2]=  2.529517;
-           daa[11*20+3]=  0.282466;
-           daa[11*20+4]=  0.049009;
-           daa[11*20+5]=  2.966732;
-           daa[11*20+6]=  1.731684;
-           daa[11*20+7]=  0.269840;
-           daa[11*20+8]=  0.525096;
-           daa[11*20+9]=  0.202562;
-           daa[11*20+10]= 0.146481;
-           daa[12*20+0]=  0.469395;
-           daa[12*20+1]=  0.431045;
-           daa[12*20+2]=  0.330720;
-           daa[12*20+3]=  0.190001;
-           daa[12*20+4]=  0.409202;
-           daa[12*20+5]=  0.456901;
-           daa[12*20+6]=  0.175084;
-           daa[12*20+7]=  0.130379;
-           daa[12*20+8]=  0.329660;
-           daa[12*20+9]=  4.831666;
-           daa[12*20+10]= 3.856906;
-           daa[12*20+11]= 0.624581;
-           daa[13*20+0]=  0.138293;
-           daa[13*20+1]=  0.065314;
-           daa[13*20+2]=  0.073481;
-           daa[13*20+3]=  0.032522;
-           daa[13*20+4]=  0.678335;
-           daa[13*20+5]=  0.045683;
-           daa[13*20+6]=  0.043829;
-           daa[13*20+7]=  0.050212;
-           daa[13*20+8]=  0.453428;
-           daa[13*20+9]=  0.777090;
-           daa[13*20+10]= 2.500294;
-           daa[13*20+11]= 0.024521;
-           daa[13*20+12]= 0.436181;
-           daa[14*20+0]=  1.959599;
-           daa[14*20+1]=  0.710489;
-           daa[14*20+2]=  0.121804;
-           daa[14*20+3]=  0.127164;
-           daa[14*20+4]=  0.123653;
-           daa[14*20+5]=  1.608126;
-           daa[14*20+6]=  0.191994;
-           daa[14*20+7]=  0.208081;
-           daa[14*20+8]=  1.141961;
-           daa[14*20+9]=  0.098580;
-           daa[14*20+10]= 1.060504;
-           daa[14*20+11]= 0.216345;
-           daa[14*20+12]= 0.164215;
-           daa[14*20+13]= 0.148483;
-           daa[15*20+0]=  3.887095;
-           daa[15*20+1]=  1.001551;
-           daa[15*20+2]=  5.057964;
-           daa[15*20+3]=  0.589268;
-           daa[15*20+4]=  2.155331;
-           daa[15*20+5]=  0.548807;
-           daa[15*20+6]=  0.312449;
-           daa[15*20+7]=  1.874296;
-           daa[15*20+8]=  0.743458;
-           daa[15*20+9]=  0.405119;
-           daa[15*20+10]= 0.592511;
-           daa[15*20+11]= 0.474478;
-           daa[15*20+12]= 0.285564;
-           daa[15*20+13]= 0.943971;
-           daa[15*20+14]= 2.788406;
-           daa[16*20+0]=  4.582565;
-           daa[16*20+1]=  0.650282;
-           daa[16*20+2]=  2.351311;
-           daa[16*20+3]=  0.425159;
-           daa[16*20+4]=  0.469823;
-           daa[16*20+5]=  0.523825;
-           daa[16*20+6]=  0.331584;
-           daa[16*20+7]=  0.316862;
-           daa[16*20+8]=  0.477355;
-           daa[16*20+9]=  2.553806;
-           daa[16*20+10]= 0.272514;
-           daa[16*20+11]= 0.965641;
-           daa[16*20+12]= 2.114728;
-           daa[16*20+13]= 0.138904;
-           daa[16*20+14]= 1.176961;
-           daa[16*20+15]= 4.777647;
-           daa[17*20+0]=  0.084329;
-           daa[17*20+1]=  1.257961;
-           daa[17*20+2]=  0.027700;
-           daa[17*20+3]=  0.057466;
-           daa[17*20+4]=  1.104181;
-           daa[17*20+5]=  0.172206;
-           daa[17*20+6]=  0.114381;
-           daa[17*20+7]=  0.544180;
-           daa[17*20+8]=  0.128193;
-           daa[17*20+9]=  0.134510;
-           daa[17*20+10]= 0.530324;
-           daa[17*20+11]= 0.089134;
-           daa[17*20+12]= 0.201334;
-           daa[17*20+13]= 0.537922;
-           daa[17*20+14]= 0.069965;
-           daa[17*20+15]= 0.310927;
-           daa[17*20+16]= 0.080556;
-           daa[18*20+0]=  0.139492;
-           daa[18*20+1]=  0.235601;
-           daa[18*20+2]=  0.700693;
-           daa[18*20+3]=  0.453952;
-           daa[18*20+4]=  2.114852;
-           daa[18*20+5]=  0.254745;
-           daa[18*20+6]=  0.063452;
-           daa[18*20+7]=  0.052500;
-           daa[18*20+8]=  5.848400;
-           daa[18*20+9]=  0.303445;
-           daa[18*20+10]= 0.241094;
-           daa[18*20+11]= 0.087904;
-           daa[18*20+12]= 0.189870;
-           daa[18*20+13]= 5.484236;
-           daa[18*20+14]= 0.113850;
-           daa[18*20+15]= 0.628608;
-           daa[18*20+16]= 0.201094;
-           daa[18*20+17]= 0.747889;
-           daa[19*20+0]=  2.924161;
-           daa[19*20+1]=  0.171995;
-           daa[19*20+2]=  0.164525;
-           daa[19*20+3]=  0.315261;
-           daa[19*20+4]=  0.621323;
-           daa[19*20+5]=  0.179771;
-           daa[19*20+6]=  0.465271;
-           daa[19*20+7]=  0.470140;
-           daa[19*20+8]=  0.121827;
-           daa[19*20+9]=  9.533943;
-           daa[19*20+10]= 1.761439;
-           daa[19*20+11]= 0.124066;
-           daa[19*20+12]= 3.038533;
-           daa[19*20+13]= 0.593478;
-           daa[19*20+14]= 0.211561;
-           daa[19*20+15]= 0.408532;
-           daa[19*20+16]= 1.143980;
-           daa[19*20+17]= 0.239697;
-           daa[19*20+18]= 0.165473;
-           
-           f[0]=  0.077;
-           f[1]=  0.051;
-           f[2]=  0.043;
-           f[3]=  0.051;
-           f[4]=  0.020;
-           f[5]=  0.041;
-           f[6]=  0.062;
-           f[7]=  0.075;
-           f[8]=  0.023;
-           f[9]=  0.053;
-           f[10]= 0.091;
-           f[11]= 0.059;
-           f[12]= 0.024;
-           f[13]= 0.040;
-           f[14]= 0.051;
-           f[15]= 0.068;
-           f[16]= 0.059;
-           f[17]= 0.014;
-           f[18]= 0.032;
-           f[19]= 0.066;
-          }
-          break;
-        case PLL_FLU:
-          {
-            daa[ 1*20+ 0]       =       0.138658765     ;
-            daa[ 2*20+ 0]       =       0.053366579     ;
-            daa[ 2*20+ 1]       =       0.161000889     ;
-            daa[ 3*20+ 0]       =       0.584852306     ;
-            daa[ 3*20+ 1]       =       0.006771843     ;
-            daa[ 3*20+ 2]       =       7.737392871     ;
-            daa[ 4*20+ 0]       =       0.026447095     ;
-            daa[ 4*20+ 1]       =       0.167207008     ;
-            daa[ 4*20+ 2]       =       1.30E-05        ;
-            daa[ 4*20+ 3]       =       1.41E-02        ;
-            daa[ 5*20+ 0]       =       0.353753982     ;
-            daa[ 5*20+ 1]       =       3.292716942     ;
-            daa[ 5*20+ 2]       =       0.530642655     ;
-            daa[ 5*20+ 3]       =       0.145469388     ;
-            daa[ 5*20+ 4]       =       0.002547334     ;
-            daa[ 6*20+ 0]       =       1.484234503     ;
-            daa[ 6*20+ 1]       =       0.124897617     ;
-            daa[ 6*20+ 2]       =       0.061652192     ;
-            daa[ 6*20+ 3]       =       5.370511279     ;
-            daa[ 6*20+ 4]       =       3.91E-11        ;
-            daa[ 6*20+ 5]       =       1.195629122     ;
-            daa[ 7*20+ 0]       =       1.132313122     ;
-            daa[ 7*20+ 1]       =       1.190624465     ;
-            daa[ 7*20+ 2]       =       0.322524648     ;
-            daa[ 7*20+ 3]       =       1.934832784     ;
-            daa[ 7*20+ 4]       =       0.116941459     ;
-            daa[ 7*20+ 5]       =       0.108051341     ;
-            daa[ 7*20+ 6]       =       1.593098825     ;
-            daa[ 8*20+ 0]       =       0.214757862     ;
-            daa[ 8*20+ 1]       =       1.879569938     ;
-            daa[ 8*20+ 2]       =       1.387096032     ;
-            daa[ 8*20+ 3]       =       0.887570549     ;
-            daa[ 8*20+ 4]       =       2.18E-02        ;
-            daa[ 8*20+ 5]       =       5.330313412     ;
-            daa[ 8*20+ 6]       =       0.256491863     ;
-            daa[ 8*20+ 7]       =       0.058774527     ;
-            daa[ 9*20+ 0]       =       0.149926734     ;
-            daa[ 9*20+ 1]       =       0.246117172     ;
-            daa[ 9*20+ 2]       =       0.218571975     ;
-            daa[ 9*20+ 3]       =       0.014085917     ;
-            daa[ 9*20+ 4]       =       0.001112158     ;
-            daa[ 9*20+ 5]       =       0.02883995      ;
-            daa[ 9*20+ 6]       =       1.42E-02        ;
-            daa[ 9*20+ 7]       =       1.63E-05        ;
-            daa[ 9*20+ 8]       =       0.243190142     ;
-            daa[10*20+ 0]       =       0.023116952     ;
-            daa[10*20+ 1]       =       0.296045557     ;
-            daa[10*20+ 2]       =       8.36E-04        ;
-            daa[10*20+ 3]       =       0.005730682     ;
-            daa[10*20+ 4]       =       0.005613627     ;
-            daa[10*20+ 5]       =       1.020366955     ;
-            daa[10*20+ 6]       =       0.016499536     ;
-            daa[10*20+ 7]       =       0.006516229     ;
-            daa[10*20+ 8]       =       0.321611694     ;
-            daa[10*20+ 9]       =       3.512072282     ;
-            daa[11*20+ 0]       =       0.47433361      ;
-            daa[11*20+ 1]       =       15.30009662     ;
-            daa[11*20+ 2]       =       2.646847965     ;
-            daa[11*20+ 3]       =       0.29004298      ;
-            daa[11*20+ 4]       =       3.83E-06        ;
-            daa[11*20+ 5]       =       2.559587177     ;
-            daa[11*20+ 6]       =       3.881488809     ;
-            daa[11*20+ 7]       =       0.264148929     ;
-            daa[11*20+ 8]       =       0.347302791     ;
-            daa[11*20+ 9]       =       0.227707997     ;
-            daa[11*20+10]       =       0.129223639     ;
-            daa[12*20+ 0]       =       0.058745423     ;
-            daa[12*20+ 1]       =       0.890162346     ;
-            daa[12*20+ 2]       =       0.005251688     ;
-            daa[12*20+ 3]       =       0.041762964     ;
-            daa[12*20+ 4]       =       0.11145731      ;
-            daa[12*20+ 5]       =       0.190259181     ;
-            daa[12*20+ 6]       =       0.313974351     ;
-            daa[12*20+ 7]       =       0.001500467     ;
-            daa[12*20+ 8]       =       0.001273509     ;
-            daa[12*20+ 9]       =       9.017954203     ;
-            daa[12*20+10]       =       6.746936485     ;
-            daa[12*20+11]       =       1.331291619     ;
-            daa[13*20+ 0]       =       0.080490909     ;
-            daa[13*20+ 1]       =       1.61E-02        ;
-            daa[13*20+ 2]       =       8.36E-04        ;
-            daa[13*20+ 3]       =       1.06E-06        ;
-            daa[13*20+ 4]       =       0.104053666     ;
-            daa[13*20+ 5]       =       0.032680657     ;
-            daa[13*20+ 6]       =       0.001003501     ;
-            daa[13*20+ 7]       =       0.001236645     ;
-            daa[13*20+ 8]       =       0.119028506     ;
-            daa[13*20+ 9]       =       1.463357278     ;
-            daa[13*20+10]       =       2.986800036     ;
-            daa[13*20+11]       =       3.20E-01        ;
-            daa[13*20+12]       =       0.279910509     ;
-            daa[14*20+ 0]       =       0.659311478     ;
-            daa[14*20+ 1]       =       0.15402718      ;
-            daa[14*20+ 2]       =       3.64E-02        ;
-            daa[14*20+ 3]       =       0.188539456     ;
-            daa[14*20+ 4]       =       1.59E-13        ;
-            daa[14*20+ 5]       =       0.712769599     ;
-            daa[14*20+ 6]       =       0.319558828     ;
-            daa[14*20+ 7]       =       0.038631761     ;
-            daa[14*20+ 8]       =       0.924466914     ;
-            daa[14*20+ 9]       =       0.080543327     ;
-            daa[14*20+10]       =       0.634308521     ;
-            daa[14*20+11]       =       0.195750632     ;
-            daa[14*20+12]       =       5.69E-02        ;
-            daa[14*20+13]       =       0.00713243      ;
-            daa[15*20+ 0]       =       3.011344519     ;
-            daa[15*20+ 1]       =       0.95013841      ;
-            daa[15*20+ 2]       =       3.881310531     ;
-            daa[15*20+ 3]       =       0.338372183     ;
-            daa[15*20+ 4]       =       0.336263345     ;
-            daa[15*20+ 5]       =       0.487822499     ;
-            daa[15*20+ 6]       =       0.307140298     ;
-            daa[15*20+ 7]       =       1.585646577     ;
-            daa[15*20+ 8]       =       0.58070425      ;
-            daa[15*20+ 9]       =       0.290381075     ;
-            daa[15*20+10]       =       0.570766693     ;
-            daa[15*20+11]       =       0.283807672     ;
-            daa[15*20+12]       =       0.007026588     ;
-            daa[15*20+13]       =       0.99668567      ;
-            daa[15*20+14]       =       2.087385344     ;
-            daa[16*20+ 0]       =       5.418298175     ;
-            daa[16*20+ 1]       =       0.183076905     ;
-            daa[16*20+ 2]       =       2.140332316     ;
-            daa[16*20+ 3]       =       0.135481233     ;
-            daa[16*20+ 4]       =       0.011975266     ;
-            daa[16*20+ 5]       =       0.602340963     ;
-            daa[16*20+ 6]       =       0.280124895     ;
-            daa[16*20+ 7]       =       0.01880803      ;
-            daa[16*20+ 8]       =       0.368713573     ;
-            daa[16*20+ 9]       =       2.904052286     ;
-            daa[16*20+10]       =       0.044926357     ;
-            daa[16*20+11]       =       1.5269642       ;
-            daa[16*20+12]       =       2.031511321     ;
-            daa[16*20+13]       =       0.000134906     ;
-            daa[16*20+14]       =       0.542251094     ;
-            daa[16*20+15]       =       2.206859934     ;
-            daa[17*20+ 0]       =       1.96E-01        ;
-            daa[17*20+ 1]       =       1.369429408     ;
-            daa[17*20+ 2]       =       5.36E-04        ;
-            daa[17*20+ 3]       =       1.49E-05        ;
-            daa[17*20+ 4]       =       0.09410668      ;
-            daa[17*20+ 5]       =       4.40E-02        ;
-            daa[17*20+ 6]       =       0.155245492     ;
-            daa[17*20+ 7]       =       0.196486447     ;
-            daa[17*20+ 8]       =       2.24E-02        ;
-            daa[17*20+ 9]       =       0.03213215      ;
-            daa[17*20+10]       =       0.431277663     ;
-            daa[17*20+11]       =       4.98E-05        ;
-            daa[17*20+12]       =       0.070460039     ;
-            daa[17*20+13]       =       0.814753094     ;
-            daa[17*20+14]       =       0.000431021     ;
-            daa[17*20+15]       =       0.099835753     ;
-            daa[17*20+16]       =       0.207066206     ;
-            daa[18*20+ 0]       =       0.018289288     ;
-            daa[18*20+ 1]       =       0.099855497     ;
-            daa[18*20+ 2]       =       0.373101927     ;
-            daa[18*20+ 3]       =       0.525398543     ;
-            daa[18*20+ 4]       =       0.601692431     ;
-            daa[18*20+ 5]       =       0.072205935     ;
-            daa[18*20+ 6]       =       0.10409287      ;
-            daa[18*20+ 7]       =       0.074814997     ;
-            daa[18*20+ 8]       =       6.448954446     ;
-            daa[18*20+ 9]       =       0.273934263     ;
-            daa[18*20+10]       =       0.340058468     ;
-            daa[18*20+11]       =       0.012416222     ;
-            daa[18*20+12]       =       0.874272175     ;
-            daa[18*20+13]       =       5.393924245     ;
-            daa[18*20+14]       =       1.82E-04        ;
-            daa[18*20+15]       =       0.39255224      ;
-            daa[18*20+16]       =       0.12489802      ;
-            daa[18*20+17]       =       0.42775543      ;
-            daa[19*20+ 0]       =       3.53200527      ;
-            daa[19*20+ 1]       =       0.103964386     ;
-            daa[19*20+ 2]       =       0.010257517     ;
-            daa[19*20+ 3]       =       0.297123975     ;
-            daa[19*20+ 4]       =       0.054904564     ;
-            daa[19*20+ 5]       =       0.406697814     ;
-            daa[19*20+ 6]       =       0.285047948     ;
-            daa[19*20+ 7]       =       0.337229619     ;
-            daa[19*20+ 8]       =       0.098631355     ;
-            daa[19*20+ 9]       =       14.39405219     ;
-            daa[19*20+10]       =       0.890598579     ;
-            daa[19*20+11]       =       0.07312793      ;
-            daa[19*20+12]       =       4.904842235     ;
-            daa[19*20+13]       =       0.592587985     ;
-            daa[19*20+14]       =       0.058971975     ;
-            daa[19*20+15]       =       0.088256423     ;
-            daa[19*20+16]       =       0.654109108     ;
-            daa[19*20+17]       =       0.256900461     ;
-            daa[19*20+18]       =       0.167581647     ;
-            
- 
-  
-            f[0]        =       0.0471  ;
-            f[1]        =       0.0509  ;
-            f[2]        =       0.0742  ;
-            f[3]        =       0.0479  ;
-            f[4]        =       0.0250  ;
-            f[5]        =       0.0333  ;
-            f[6]        =       0.0546  ;
-            f[7]        =       0.0764  ;
-            f[8]        =       0.0200  ;
-            f[9]        =       0.0671  ;
-            f[10]       =       0.0715  ;
-            f[11]       =       0.0568  ;
-            f[12]       =       0.0181  ;
-            f[13]       =       0.0305  ;
-            f[14]       =       0.0507  ;
-            f[15]       =       0.0884  ;
-            f[16]       =       0.0743  ;
-            f[17]       =       0.0185  ;
-            f[18]       =       0.0315  ;
-            f[19]       =       0.0632  ;
-          }
-          break;     
-        default: 
-          assert(0);
-        }
-    }
-
-
-  /*
-    
-  TODO review frequency sums for fixed as well as empirical base frequencies !
-  
-  NUMERICAL BUG fix, rounded AA freqs in some models, such that 
-  they actually really sum to 1.0 +/- epsilon 
-  
-  {
-    double acc = 0.0;
-  
-    for(i = 0; i < 20; i++)
-      acc += f[i];
-    
-    printf("%1.80f\n", acc);
-    assert(acc == 1.0);  
-  }
-  */
- 
-
-
-  /* fill the upper triangle (above the diagonal) with the corresponding values
-     from the lower triangle */
-  for (i=0; i<20; i++)  
-    for (j=0; j<i; j++)               
-      daa[j*20+i] = daa[i*20+j];
-
-  
-  /*
-    for (i=0; i<20; i++)  
-    {
-    for (j=0; j<20; j++)
-    {
-    if(i == j)
-    printf("0.0 ");
-    else
-    printf("%f ", daa[i * 20 + j]);
-    }
-    printf("\n");
-    }
-    
-    for (i=0; i<20; i++) 
-    printf("%f ", f[i]);
-    printf("\n");
-  */
-  
-
-  max = 0;
-  
-  /* copy the triangle above the diagonal from daa (which is a linear block) to
-     the triangle above the diagonal of a square matrix q. Store the maximal
-     value in variable max */
-  for(i = 0; i < 19; i++)
-    for(j = i + 1; j < 20; j++)
-      {
-        q[i][j] = temp = daa[i * 20 + j];
-        if(temp > max) 
-          max = temp;
-      }
- 
-  scaler = PLL_AA_SCALE / max;
-   
-  /* SCALING HAS BEEN RE-INTRODUCED TO RESOLVE NUMERICAL  PROBLEMS */   
-
-  /* copy and scale values to the initialRates array */
-  r = 0;
-  for(i = 0; i < 19; i++)
-    {      
-      for(j = i + 1; j < 20; j++)
-        {  
-        
-          q[i][j] *= scaler;
-          
-          
-          assert(q[i][j] <= PLL_AA_SCALE_PLUS_EPSILON);
-          
-          initialRates[r++] = q[i][j];
-        }
-    }             
-}
-
-/** @brief Set the frac
-  *
-  * Update \a partitionContribution in each partition by setting it to the fraction of sites in
-  * that partition to the total number of sites. Also set \a tr->fracchange according to the
-  * computes \a fracchange of each partition.
-  *
-  * @param tr
-  *   PLL instance
-  *
-  * @param pr
-  *   List of partitions
-  * 
-  * @todo 
-      I understand how fracchange is computed for each partition, but I dont know
-      what is it for. Also what is tr->fracchange for?
-*/
-static void updateFracChange(pllInstance *tr, partitionList *pr)
-{   
-  int numberOfModels = pr->numberOfPartitions;
-  if(numberOfModels == 1)
-    {   
-      assert(pr->partitionData[0]->fracchange != -1.0);
-     
-      tr->fracchange = pr->partitionData[0]->fracchange;
-      pr->partitionData[0]->fracchange = -1.0;
-      pr->partitionData[0]->rawFracchange = pr->partitionData[0]->fracchange;
-    }      
-  else
-    {
-      int model;
-      double *modelWeights = (double *)rax_calloc((size_t)numberOfModels, sizeof(double));
-      double wgtsum = 0.0;  
-     
-      assert(numberOfModels > 1);
-
-      tr->fracchange = 0.0;              
-      
-       for(model = 0; model < numberOfModels; model++)
-         {
-           size_t
-             lower = pr->partitionData[model]->lower,
-             upper = pr->partitionData[model]->upper,
-             i;
-           
-           for(i = lower; i < upper; i++)
-             {
-               modelWeights[model] += (double)tr->aliaswgt[i];
-               wgtsum              += (double)tr->aliaswgt[i];
-             }
-         }
-
-       /*for(i = 0; i < tr->originalCrunchedLength; i++)
-        {
-          modelWeights[tr->model[i]]  += (double)tr->aliaswgt[i];
-          wgtsum                      += (double)tr->aliaswgt[i];
-          }*/  
-
-      
-                
-      for(model = 0; model < numberOfModels; model++)
-        {                        
-          pr->partitionData[model]->partitionContribution = modelWeights[model] / wgtsum;
-          tr->fracchange +=  pr->partitionData[model]->partitionContribution * pr->partitionData[model]->fracchange;
-          pr->partitionData[model]->rawFracchange = pr->partitionData[model]->fracchange;
-        }
-    
-      rax_free(modelWeights);
-    }
-
-    tr->rawFracchange = tr->fracchange;
-}
-
-/** @brief Not sure what this function does
-  * 
-  * @todo
-  *   Comment this function
-  */
-static void mytred2(double **a, const int n, double *d, double *e)
-{
-  int     l, k, j, i;
-  double  scale, hh, h, g, f; 
- 
-  for (i = n; i > 1; i--)
-    {
-      l = i - 1;
-      h = 0.0;
-      scale = 0.0;
-      
-      if (l > 1)
-        {
-          for (k = 1; k <= l; k++)
-            scale += fabs(a[k - 1][i - 1]);
-          if (scale == 0.0)
-            e[i - 1] = a[l - 1][i - 1];
-          else
-            {
-              for (k = 1; k <= l; k++)
-                {
-                  a[k - 1][i - 1] /= scale;
-                  h += a[k - 1][i - 1] * a[k - 1][i - 1];
-                }
-              f = a[l - 1][i - 1];
-              g = ((f > 0) ? -sqrt(h) : sqrt(h)); /* diff */
-              e[i - 1] = scale * g;
-              h -= f * g;
-              a[l - 1][i - 1] = f - g;
-              f = 0.0;
-              for (j = 1; j <= l; j++)
-                {
-                  a[i - 1][j - 1] = a[j - 1][i - 1] / h;
-                  g = 0.0;
-                  for (k = 1; k <= j; k++)
-                    g += a[k - 1][j - 1] * a[k - 1][i - 1];
-                  for (k = j + 1; k <= l; k++)
-                    g += a[j - 1][k - 1] * a[k - 1][i - 1];
-                  e[j - 1] = g / h;
-                  f += e[j - 1] * a[j - 1][i - 1];
-                }
-              hh = f / (h + h);
-              for (j = 1; j <= l; j++)
-                {
-                  f = a[j - 1][i - 1];
-                  g = e[j - 1] - hh * f;
-                  e[j - 1] = g;
-                  for (k = 1; k <= j; k++)
-                    a[k - 1][j - 1] -= (f * e[k - 1] + g * a[k - 1][i - 1]);
-                }
-            }
-        } 
-      else
-        e[i - 1] = a[l - 1][i - 1];
-      d[i - 1] = h;
-    }
-  d[0] = 0.0;
-  e[0] = 0.0;
-  
-  for (i = 1; i <= n; i++)
-    {
-      l = i - 1;
-      if (d[i - 1] != 0.0)
-        {
-          for (j = 1; j <= l; j++)
-            {
-                g = 0.0;
-                for (k = 1; k <= l; k++)
-                  g += a[k - 1][i - 1] * a[j - 1][k - 1];
-                for(k = 1; k <= l; k++)
-                  a[j - 1][k - 1] -= g * a[i - 1][k - 1];
-            }
-        }
-      d[i - 1] = a[i - 1][i - 1];
-      a[i - 1][i - 1] = 1.0;
-      for (j = 1; j <= l; j++)
-        a[i - 1][j - 1] = a[j - 1][i - 1] = 0.0;
-    }
- 
- 
-}
-/*#define MYSIGN(a,b) ((b)<0 ? -fabs(a) : fabs(a))*/
-
-/** @brief Not sure what this function does
-  * 
-  * @todo
-  *   Comment this function
-  */
-static int mytqli(double *d, double *e, const int n, double **z)
-{
-  int     m, l, iter, i, k;
-  double  s, r, p, g, f, dd, c, b;
-   
-  for (i = 2; i <= n; i++)
-    e[i - 2] = e[i - 1];
-
-  e[n - 1] = 0.0;
-
-  for (l = 1; l <= n; l++)
-    {
-      iter = 0;
-      do
-        {
-          for (m = l; m <= n - 1; m++)
-            {
-              dd = fabs(d[m - 1]) + fabs(d[m]);
-              if (fabs(e[m - 1]) + dd == dd)
-                break;
-            }
-
-          if (m != l)
-           {
-             assert(iter < 30);
-             
-             g = (d[l] - d[l - 1]) / (2.0 * e[l - 1]);
-             r = sqrt((g * g) + 1.0);
-             g = d[m - 1] - d[l - 1] + e[l - 1] / (g + ((g < 0)?-fabs(r):fabs(r)));/*MYSIGN(r, g));*/
-             s = c = 1.0;
-             p = 0.0;
-
-             for (i = m - 1; i >= l; i--)
-               {
-                 f = s * e[i - 1];
-                 b = c * e[i - 1];
-                 if (fabs(f) >= fabs(g))
-                   {
-                     c = g / f;
-                     r = sqrt((c * c) + 1.0);
-                     e[i] = f * r;
-                     c *= (s = 1.0 / r);
-                   } 
-                 else
-                   {
-                     s = f / g;
-                     r = sqrt((s * s) + 1.0);
-                     e[i] = g * r;
-                     s *= (c = 1.0 / r);
-                   }
-                 g = d[i] - p;
-                 r = (d[i - 1] - g) * s + 2.0 * c * b;
-                 p = s * r;
-                 d[i] = g + p;
-                 g = c * r - b;
-                 for (k = 1; k <= n; k++)
-                   {
-                     f = z[i][k-1];
-                     z[i][k-1] = s * z[i - 1][k - 1] + c * f;
-                     z[i - 1][k - 1] = c * z[i - 1][k - 1] - s * f;
-                   }
-               }
-
-             d[l - 1] = d[l - 1] - p;
-             e[l - 1] = g;
-             e[m - 1] = 0.0;
-           }
-        } 
-      while (m != l);
-    }
-
-    
- 
-    return (1);
- }
-
-
-/** @brief Compute the eigenvectors and eigenvalues
-  *
-  * @param _a
-  *   The Q matrix
-  *
-  * @param states
-  *   Number of states
-  *
-  * @param d
-  *  Eigenvalues I think? 
-  * 
-  * @param e
-  *  Not sure why this is passed as a parameter. It is uninitialized, it is first set in mytqli(...) and it is never used in initGeneric()
-  *
-  * @todo
-  *   Remove e from parameter?
-*/
-static void makeEigen(double **_a, const int states, double *d, double *e)
-{
-  mytred2(_a, states, d, e);
-  mytqli(d, e, states, _a);
-}
-
-/** @brief Generic initialization of parameters and decomposition of the Q matrix
-  *
-  * Decompose the Q matrix into eigenvectors and eigenvalues. 
-  *
-  * @param states
-  *  Number of states of the current model
-  *
-  * @param valueVector
-  *  Pointer where the tipVector will be stored
-  *
-  * @param valueVectorLength
-  *  Number of elements (of size \a states) of the tipVector
-  *
-  * @param fracchange
-  *  Variable where the computed fracchange will be stored
-  *
-  * @param ext_EIGN
-  *   Array where the eigenvalues will be stored
-  *
-  * @param EV
-  *   Array where the eigenvectors will be stored
-  *  
-  * @param EI
-  *   Array where the inverse eigenvectors will be stored
-  *
-  * @param frequencies
-  *   The model frequencies
-  *
-  * @param ext_initialRates
-  *   The model substitution rates
-  *
-  * @param tipVector
-  *   Array where the computed tipVector will be stored
-  *
-  * @todo
-  *   Perhaps we could change this also to the way pllOptRatesGeneric and other functions are implemented.
-  *   That is, instead of passing all these parameters, pass the partition index instead and load the
-  *   values within the code. Will make the code more readable. 
-*/
-static void initGeneric(const int states, 
-                        const unsigned int *valueVector, 
-                        int valueVectorLength,
-                        double *fracchange,
-                        double *ext_EIGN,
-                        double *EV,
-                        double *EI,
-                        double *frequencies,
-                        double *ext_initialRates,
-                        double *tipVector
-                      )
-{
-  double 
-    **r, 
-    **a, 
-    **EIGV,
-    *initialRates = ext_initialRates, 
-    *f, 
-    *e, 
-    *d, 
-    *invfreq, 
-    *EIGN,
-    *eptr; 
-  
-  int 
-    i, 
-    j, 
-    k, 
-    m, 
-    l;  
-
-  r    = (double **)rax_malloc((size_t)states * sizeof(double *));
-  EIGV = (double **)rax_malloc((size_t)states * sizeof(double *));  
-  a    = (double **)rax_malloc((size_t)states * sizeof(double *));        
-  
-  for(i = 0; i < states; i++)
-    {
-      a[i]    = (double*)rax_malloc((size_t)states * sizeof(double));
-      EIGV[i] = (double*)rax_malloc((size_t)states * sizeof(double));
-      r[i]    = (double*)rax_malloc((size_t)states * sizeof(double));
-    }
-
-  f       = (double*)rax_malloc((size_t)states * sizeof(double));
-  e       = (double*)rax_malloc((size_t)states * sizeof(double));
-  d       = (double*)rax_malloc((size_t)states * sizeof(double));
-  invfreq = (double*)rax_malloc((size_t)states * sizeof(double));
-  EIGN    = (double*)rax_malloc((size_t)states * sizeof(double));
-  
-  for(l = 0; l < states; l++) 
-    f[l] = frequencies[l];      
-    
-  
-  i = 0;
-  
-  for(j = 0; j < states; j++)    
-    for(k = 0; k < states; k++)
-      r[j][k] = 0.0;
-  
-  for(j = 0; j < states - 1; j++)
-    for (k = j + 1; k < states; k++)              
-      r[j][k] = initialRates[i++];         
-  
-  for (j = 0; j < states; j++) 
-    {
-      r[j][j] = 0.0;
-      for (k = 0; k < j; k++)
-        r[j][k] = r[k][j];
-    }                         
-  
-  
-
-  *fracchange = 0.0;
-  
-  for (j = 0; j < states; j++)
-    for (k = 0; k < states; k++)
-      *fracchange += f[j] * r[j][k] * f[k];
-  
-  m = 0;
-  
-  for(i=0; i< states; i++) 
-    a[i][i] = 0;
-  
-  /*  assert(r[states - 2][states - 1] == 1.0);*/
-  
-  /* compute a matrix from the rates such that each element of the diagonal
-     equals to the negative sum of all other values in the current row */
-  for(i = 0; i < states; i++) 
-    {
-      for(j = i + 1;  j < states; j++) 
-        {
-          double factor =  initialRates[m++];
-          a[i][j] = a[j][i] = factor * sqrt( f[i] * f[j]);
-          a[i][i] -= factor * f[j];
-          a[j][j] -= factor * f[i];
-        }
-    }                           
-
-  makeEigen(a, states, d, e);
-  
- 
-  
-  for (i = 0; i < states; i++)     
-    for (j = 0; j < states; j++)       
-      a[i][j] *= sqrt(f[j]);
-   
-  
-  
-  for (i = 0; i < states; i++)
-    {     
-      if (d[i] > -1e-8) 
-        {             
-          if (i != 0) 
-            {               
-              double tmp = d[i], sum=0;
-              d[i] = d[0];
-              d[0] = tmp;
-              for (j=0; j < states; j++) 
-                {
-                  tmp = a[i][j];
-                  a[i][j] = a[0][j];
-                  sum += (a[0][j] = tmp);
-                }
-              for (j=0; j < states; j++) 
-                a[0][j] /= sum;
-            }
-          break;
-        }
-    }
-  
-  for (i = 0; i < states; i++) 
-    {
-      EIGN[i] = -d[i];
-      
-      for (j=0; j < states; j++)
-        EIGV[i][j] = a[j][i];
-      invfreq[i] = 1 / EIGV[i][0]; 
-    }                                    
-  
-  ext_EIGN[0] = 0.0;
-
-  for (l = 1; l < states; l++)
-    {
-      ext_EIGN[l] = EIGN[l]; 
-      assert(ext_EIGN[l] > 0.0);
-    }
-  
-  eptr = EV;
-  
-  for (i = 0; i < states; i++)            
-    for (j = 0; j < states; j++)
-      {
-        *eptr++ = EIGV[i][j];    /* EIGV: Eigenvalues */ 
-        
-      }
-  for (i = 0; i < states; i++)
-    for (j = 0; j < states; j++)
-      {
-        if(j == 0)
-          EI[i * states + j] = 1.0;
-        else
-          EI[i * states + j] = EV[i * states + j] * invfreq[i];   /* EV = Eigenvector, EI = Inverse Eigenvector,   $ u_{i,x}^{-1} = \pi_x u_{x,i} */
-      }
-  
-  for (i = 0; i < valueVectorLength; i++)
-    {
-      unsigned int value = valueVector[i];
-      
-      for(j = 0; j < states; j++)
-        tipVector[i * states + j]     = 0;                  
-
-      if(value > 0)
-        {                     
-          for (j = 0; j < states; j++) 
-            {       
-              if ((value >> j) & 1) 
-                {
-                  int l;
-                  for (l = 0; l < states; l++)
-                    tipVector[i * states + l] += EIGV[j][l];
-                }                         
-            }       
-        }     
-    }
-
-  for (i = 0; i < valueVectorLength; i++)
-    {
-       for(j = 0; j < states; j++)
-         if(tipVector[i * states + j] > PLL_MAX_TIP_EV)
-           tipVector[i * states + j] = PLL_MAX_TIP_EV;
-    }
-
-
-  
-
-  for (i = 0; i < states; i++)
-    {
-      rax_free(EIGV[i]);
-      rax_free(a[i]);
-      rax_free(r[i]);
-    }
-
-  rax_free(r);
-  rax_free(a);
-  rax_free(EIGV);
-
-  rax_free(f);
-  rax_free(e);
-  rax_free(d);
-  rax_free(invfreq);
-  rax_free(EIGN);
-}
-
-/** @brief Initialize GTR
-  *
-  * Wrapper function for the decomposition of the substitution rates matrix
-  * into eigenvectors and eigenvalues
-  *
-  * @param tr
-  *   PLL instance
-  *
-  * @param pr
-  *   List of partitions
-  *
-  * @param model
-  *   Partition index
-  */
-void pllInitReversibleGTR(pllInstance * tr, partitionList * pr, int model)
-{ 
- double   
-   *ext_EIGN         = pr->partitionData[model]->EIGN,
-   *EV               = pr->partitionData[model]->EV,
-   *EI               = pr->partitionData[model]->EI,
-   *frequencies      = pr->partitionData[model]->frequencies,
-   *empiricalFrequencies = pr->partitionData[model]->empiricalFrequencies,
-   *ext_initialRates = pr->partitionData[model]->substRates,
-   *tipVector        = pr->partitionData[model]->tipVector,
-   *fracchange       = &(pr->partitionData[model]->fracchange);
- 
-  
- int states = pr->partitionData[model]->states;
-
- switch(pr->partitionData[model]->dataType)
-   { 
-   case PLL_GENERIC_32:
-   case PLL_GENERIC_64:
-   case PLL_SECONDARY_DATA_6:
-   case PLL_SECONDARY_DATA_7: 
-   case PLL_SECONDARY_DATA:
-   case PLL_DNA_DATA:
-   case PLL_BINARY_DATA:    
-     initGeneric(states, 
-                 getBitVector(pr->partitionData[model]->dataType),
-                 getUndetermined(pr->partitionData[model]->dataType) + 1,
-                 fracchange,
-                 ext_EIGN, 
-                 EV, 
-                 EI, 
-                 frequencies, 
-                 ext_initialRates,
-                 tipVector
-                 // model
-                );
-     break;   
-   case PLL_AA_DATA:
-     if(pr->partitionData[model]->protModels != PLL_GTR)
-       {
-         double f[20];
-         int l;
-
-         if(pr->partitionData[model]->protModels == PLL_LG4M || pr->partitionData[model]->protModels == PLL_LG4X)
-           {
-             int 
-               i;
-             
-             for(i = 0; i < 4; i++)
-               {                 
-                 initProtMat(f, pr->partitionData[model]->protModels, &(pr->partitionData[model]->substRates_LG4[i][0]), i);
-                 
-                 if(!pr->partitionData[model]->optimizeBaseFrequencies)
-                 {
-                   if(!pr->partitionData[model]->protUseEmpiricalFreqs)
-                   {
-                     for(l = 0; l < 20; l++)            
-                       pr->partitionData[model]->frequencies_LG4[i][l] = f[l];
-                   }
-                   else
-                   {
-                     for(l = 0; l < 20; l++)            
-                       pr->partitionData[model]->frequencies_LG4[i][l] = empiricalFrequencies[l];
-                   }
-                 }
-                 else
-                 {
-                   memcpy(pr->partitionData[model]->frequencies_LG4[i], frequencies, 20 * sizeof(double));
-                 }
-               }
-           }
-         else
-           {
-             if(pr->partitionData[model]->protModels == PLL_AUTO)
-               initProtMat(f, pr->partitionData[model]->autoProtModels, ext_initialRates, 0);
-             else         
-               {
-                 initProtMat(f, pr->partitionData[model]->protModels, ext_initialRates, 0);
-               }
-
-             /*if(adef->protEmpiricalFreqs && tr->NumberOfModels == 1)
-               assert(tr->partitionData[model].protUseEmpiricalFreqs);*/
-         
-              if (!pr->partitionData[model]->optimizeBaseFrequencies) {
-                  if(!pr->partitionData[model]->protUseEmpiricalFreqs)
-                  {                 
-                      for(l = 0; l < 20; l++)           
-                         frequencies[l] = f[l];
-                  } else {
-                      for(l = 0; l < 20; l++)           
-                         frequencies[l] = empiricalFrequencies[l];
-                  }
-              }
-           }  
-       }
-               
-     if(pr->partitionData[model]->protModels == PLL_LG4M || pr->partitionData[model]->protModels == PLL_LG4X)
-       {
-         int 
-           i;
-
-         double 
-           *fracchanges_LG4[4],
-           acc = 0.0;
-
-         /* TODO frac change !*/
-
-         for(i = 0; i < 4; i++)
-           {
-             fracchanges_LG4[i]  = (double *)rax_malloc(pr->numberOfPartitions * sizeof(double));
-             initGeneric(states, 
-                         bitVectorAA, 
-                         23, 
-                         fracchanges_LG4[i],
-                         pr->partitionData[model]->EIGN_LG4[i], 
-                         pr->partitionData[model]->EV_LG4[i],
-                         pr->partitionData[model]->EI_LG4[i],
-                         pr->partitionData[model]->frequencies_LG4[i],
-                         pr->partitionData[model]->substRates_LG4[i],
-                         pr->partitionData[model]->tipVector_LG4[i]
-             //            model
-                        );   
-           }
-
-         for(i = 0; i < 4; i++)
-           {        
-             acc += fracchanges_LG4[i][model];
-             rax_free(fracchanges_LG4[i]);
-           }
-
-         //tr->fracchanges[model] = acc / 4;
-         //TODO check if valid
-         fracchange[model] = acc / 4;
-       }
-     else
-       initGeneric(states, 
-                   bitVectorAA, 
-                   23, 
-                   fracchange,
-                   ext_EIGN, 
-                   EV, 
-                   EI, 
-                   frequencies, 
-                   ext_initialRates,
-                   tipVector
-       //            model
-                  );
-    break;  
-   default:
-     assert(0);
-   } 
-
- updateFracChange(tr, pr);
-}
-
-
-double LnGamma (double alpha)
-{
-/* returns ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places.  
-   Stirling's formula is used for the central polynomial part of the procedure.
-   Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function.
-   Communications of the Association for Computing Machinery, 9:684
-*/
-  double x, f, z, result;
-
-  x = alpha;
-  f = 0.0;
-  
-  if ( x < 7.0) 
-     {
-       f = 1.0;  
-       z = alpha - 1.0;
-      
-       while ((z = z + 1.0) < 7.0)  
-         {        
-           f *= z;
-         }
-       x = z;   
-     
-       assert(f != 0.0);
-        
-       f=-log(f);
-     }
-   
-   z = 1/(x*x);
-   
-   result = f + (x-0.5)*log(x) - x + .918938533204673 
-          + (((-.000595238095238*z+.000793650793651)*z-.002777777777778)*z
-               +.083333333333333)/x;  
-
-   return result;
-}
-
-
-
-double IncompleteGamma (double x, double alpha, double ln_gamma_alpha)
-{
-/* returns the incomplete gamma ratio I(x,alpha) where x is the upper 
-           limit of the integration and alpha is the shape parameter.
-   returns (-1) if in error
-   ln_gamma_alpha = ln(Gamma(alpha)), is almost redundant.
-   (1) series expansion     if (alpha>x || x<=1)
-   (2) continued fraction   otherwise
-   RATNEST FORTRAN by
-   Bhattacharjee GP (1970) The incomplete gamma integral.  Applied Statistics,
-   19: 285-287 (AS32)
-*/
-   int i;
-   double p=alpha, g=ln_gamma_alpha;
-   double accurate=1e-8, overflow=1e30;
-   double factor, gin=0, rn=0, a=0,b=0,an=0,dif=0, term=0, pn[6];
-
-
-   if (x==0) return (0);
-   if (x<0 || p<=0) return (-1);
-
-   
-   factor=exp(p*log(x)-x-g);   
-   if (x>1 && x>=p) goto l30;
-   /* (1) series expansion */
-   gin=1;  term=1;  rn=p;
- l20:
-   rn++;
-   term*=x/rn;   gin+=term;
-
-   if (term > accurate) goto l20;
-   gin*=factor/p;
-   goto l50;
- l30:  
-   /* (2) continued fraction */
-   a=1-p;   b=a+x+1;  term=0;
-   pn[0]=1;  pn[1]=x;  pn[2]=x+1;  pn[3]=x*b;
-   gin=pn[2]/pn[3];   
- l32:  
-   a++;  
-   b+=2;  
-   term++;   
-   an=a*term;
-   for (i=0; i<2; i++) 
-     pn[i+4]=b*pn[i+2]-an*pn[i];
-   if (pn[5] == 0) goto l35;
-   rn=pn[4]/pn[5];   
-   dif=fabs(gin-rn);  
-   if (dif>accurate) goto l34;
-   if (dif<=accurate*rn) goto l42;
- l34:   
-   gin=rn;
- l35: 
-   for (i=0; i<4; i++) 
-     pn[i]=pn[i+2];
-   if (fabs(pn[4]) < overflow)            
-     goto l32;        
-   
-   for (i=0; i<4; i++) 
-     pn[i]/=overflow;
-
-   
-   goto l32;
- l42:  
-   gin=1-factor*gin;
-
- l50: 
-   return (gin);
-}
-
-
-
-
-double PointNormal (double prob)
-{
-/* returns z so that Prob{x<z}=prob where x ~ N(0,1) and (1e-12)<prob<1-(1e-12)
-   returns (-9999) if in error
-   Odeh RE & Evans JO (1974) The percentage points of the normal distribution.
-   Applied Statistics 22: 96-97 (AS70)
-
-   Newer methods:
-     Wichura MJ (1988) Algorithm AS 241: the percentage points of the
-       normal distribution.  37: 477-484.
-     Beasley JD & Springer SG  (1977).  Algorithm AS 111: the percentage 
-       points of the normal distribution.  26: 118-121.
-
-*/
-   double a0=-.322232431088, a1=-1, a2=-.342242088547, a3=-.0204231210245;
-   double a4=-.453642210148e-4, b0=.0993484626060, b1=.588581570495;
-   double b2=.531103462366, b3=.103537752850, b4=.0038560700634;
-   double y, z=0, p=prob, p1;
-
-   p1 = (p<0.5 ? p : 1-p);
-   if (p1<1e-20) return (-9999);
-
-   y = sqrt (log(1/(p1*p1)));   
-   z = y + ((((y*a4+a3)*y+a2)*y+a1)*y+a0) / ((((y*b4+b3)*y+b2)*y+b1)*y+b0);
-   return (p<0.5 ? -z : z);
-}
-
-
-double PointChi2 (double prob, double v)
-{
-/* returns z so that Prob{x<z}=prob where x is Chi2 distributed with df=v
-   returns -1 if in error.   0.000002<prob<0.999998
-   RATNEST FORTRAN by
-       Best DJ & Roberts DE (1975) The percentage points of the 
-       Chi2 distribution.  Applied Statistics 24: 385-388.  (AS91)
-   Converted into C by Ziheng Yang, Oct. 1993.
-*/
-   double e=.5e-6, aa=.6931471805, p=prob, g;
-   double xx, c, ch, a=0,q=0,p1=0,p2=0,t=0,x=0,b=0,s1,s2,s3,s4,s5,s6;
-  
-   if (p<.000002 || p>.999998 || v<=0) return (-1);
-  
-   g = LnGamma(v/2);
-   
-   xx=v/2;   c=xx-1;
-   if (v >= -1.24*log(p)) goto l1;
-
-   ch=pow((p*xx*exp(g+xx*aa)), 1/xx);
-   if (ch-e<0) return (ch);
-   goto l4;
-l1:
-   if (v>.32) goto l3;
-   ch=0.4;   a=log(1-p);
-l2:
-   q=ch;  p1=1+ch*(4.67+ch);  p2=ch*(6.73+ch*(6.66+ch));
-   t=-0.5+(4.67+2*ch)/p1 - (6.73+ch*(13.32+3*ch))/p2;
-   ch-=(1-exp(a+g+.5*ch+c*aa)*p2/p1)/t;
-   if (fabs(q/ch-1)-.01 <= 0) goto l4;
-   else                       goto l2;
-  
-l3:    
-   x=PointNormal (p);
-   p1=0.222222/v;   ch=v*pow((x*sqrt(p1)+1-p1), 3.0);
-   if (ch>2.2*v+6)  ch=-2*(log(1-p)-c*log(.5*ch)+g);
-l4:
-   q=ch;   p1=.5*ch;   
-   if ((t=IncompleteGamma (p1, xx, g))< 0.0) 
-     {
-       printf ("IncompleteGamma \n");      
-       return (-1);
-     }
-  
-   p2=p-t;
-   t=p2*exp(xx*aa+g+p1-c*log(ch));   
-   b=t/ch;  a=0.5*t-b*c;
-
-   s1=(210+a*(140+a*(105+a*(84+a*(70+60*a))))) / 420;
-   s2=(420+a*(735+a*(966+a*(1141+1278*a))))/2520;
-   s3=(210+a*(462+a*(707+932*a)))/2520;
-   s4=(252+a*(672+1182*a)+c*(294+a*(889+1740*a)))/5040;
-   s5=(84+264*a+c*(175+606*a))/2520;
-   s6=(120+c*(346+127*c))/5040;
-   ch+=t*(1+0.5*t*s1-b*c*(s1-b*(s2-b*(s3-b*(s4-b*(s5-b*s6))))));
-   if (fabs(q/ch-1) > e) goto l4;
-
-   return (ch);
-}
-
-/** @brief Compute the gamma rates
-    
-    Compute the gamma rates
-
-    @param alpha
-      Alpha parameter
-
-    @param gammaRates
-      Array where to store the computed gamma rates
-
-    @param K
-      Number of categories
-
-    @param useMedian
-      Boolean flag whether to use a median or not
-
-    @todo
-       Document this more.
-*/
-void pllMakeGammaCats(double alpha, double *gammaRates, int K, pllBoolean useMedian)
-{
-  int 
-    i;
-
-  double 
-    factor = alpha / alpha * K, 
-    lnga1, 
-    alfa = alpha, 
-    beta = alpha,
-    *gammaProbs = (double *)rax_malloc(K * sizeof(double));
-
-  /* Note that PLL_ALPHA_MIN setting is somewhat critical due to   */
-  /* numerical instability caused by very small rate[0] values */
-  /* induced by low alpha values around 0.01 */
-
-  assert(alfa >= PLL_ALPHA_MIN); 
-
-  if(useMedian)
-    {
-      double  
-        middle = 1.0 / (2.0*K),
-        t = 0.0; 
-      
-      for(i = 0; i < K; i++)     
-        gammaRates[i] = PLL_POINT_GAMMA((double)(i * 2 + 1) * middle, alfa, beta);
-      
-      for (i = 0; i < K; i++) 
-        t += gammaRates[i];
-       for( i = 0; i < K; i++)     
-         gammaRates[i] *= factor / t;
-    }
-  else
-    {
-      lnga1 = LnGamma(alfa + 1);
-
-      for (i = 0; i < K - 1; i++)
-        gammaProbs[i] = PLL_POINT_GAMMA((i + 1.0) / K, alfa, beta);
-
-      for (i = 0; i < K - 1; i++)
-        gammaProbs[i] = IncompleteGamma(gammaProbs[i] * beta, alfa + 1, lnga1);   
-
-      gammaRates[0] = gammaProbs[0] * factor;
-      
-      gammaRates[K - 1] = (1 - gammaProbs[K - 2]) * factor;
-
-      for (i= 1; i < K - 1; i++)  
-        gammaRates[i] = (gammaProbs[i] - gammaProbs[i - 1]) * factor;      
-    }
-  /* assert(gammaRates[0] >= 0.00000000000000000000000000000044136090435925743185910935350715027016962154188875); */
-
-  rax_free(gammaProbs);
-
-  return;  
-}
-
-
-/** @brief Set the substitution rates
-  *
-  * @brief Set \a rates - 1  substitution rates. Set the last rate to 1.
-  *
-  * @param r
-  *  Array of substitution rates
-  *
-  * @param rates
-  *   Number of rates to set
-  */
-static void setRates(double *r, int rates)
-{
-  int i;
-
-  //changed to 1.0 instead of 0.5 for making the 
-  //implementation of an interface function to set other models 
-  //than GTR easier 
-
-  for(i = 0; i < rates - 1; i++)
-    r[i] = 1.0;
-
-  r[rates - 1] = 1.0;
-}
-
-/** @brief Initialize the substitution rates matrix
-  *
-  * Initialize the substitution rates matrices for all partitions
-  *
-  * @param tr
-  *   The PLL instance
-  *
-  * @param pr
-  *   List of partitions
-  *
-  * @todo
-  *   Do we need the secondary structure and binary? Will we only use GTR? If yes,
-  *   we could rename this function to initRateMatrixGTR
-  */
-void initRateMatrix(pllInstance *tr, partitionList *pr)
-{
-  int model;
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-    {   
-      int       
-        i,
-        states = pr->partitionData[model]->states,
-        rates  = (states * states - states) / 2;
-      
-      switch(pr->partitionData[model]->dataType)
-        {
-        case PLL_BINARY_DATA:
-        case PLL_DNA_DATA:
-        case PLL_SECONDARY_DATA:
-        case PLL_SECONDARY_DATA_6:
-        case PLL_SECONDARY_DATA_7:
-          setRates(pr->partitionData[model]->substRates, rates);
-          break;          
-        case PLL_GENERIC_32:
-        case PLL_GENERIC_64:      
-          switch(tr->multiStateModel)
-            {
-            case PLL_ORDERED_MULTI_STATE:
-              {
-                int 
-                  j, 
-                  k, 
-                  i = 0;
-                
-                for(j = 0; j < states; j++)
-                  for(k = j + 1; k < states; k++)
-                    pr->partitionData[model]->substRates[i++] = (double)(k - j);
-                assert(i == rates);             
-              }
-              break;
-            case PLL_MK_MULTI_STATE:
-              for(i = 0; i < rates; i++)
-                pr->partitionData[model]->substRates[i] = 1.0;
-              
-              break;
-            case PLL_GTR_MULTI_STATE:
-              setRates(pr->partitionData[model]->substRates, rates);
-              break;
-            default:
-              assert(0);
-            }
-          break;
-        case PLL_AA_DATA:
-          if(pr->partitionData[model]->protModels == PLL_GTR)
-            {
-              //set optimizeSubstRates to true !
-              pr->partitionData[model]->optimizeSubstitutionRates = PLL_TRUE;
-              putWAG(pr->partitionData[model]->substRates);
-            }
-          break;
-        default:
-          assert(0);
-        }           
-      
-      if(pr->partitionData[model]->nonGTR)
-        {
-          assert(pr->partitionData[model]->dataType == PLL_SECONDARY_DATA ||
-                 pr->partitionData[model]->dataType == PLL_SECONDARY_DATA_6 ||
-                 pr->partitionData[model]->dataType == PLL_SECONDARY_DATA_7);
-                  
-          for(i = 0; i < rates; i++)
-            {
-              if(pr->partitionData[model]->symmetryVector[i] == -1)
-                pr->partitionData[model]->substRates[i] = 0.0;
-              else
-                {
-                  if(pr->partitionData[model]->symmetryVector[i] == pr->partitionData[model]->symmetryVector[rates - 1])
-                    pr->partitionData[model]->substRates[i] = 1.0;
-                }
-            }
-        }
-    }  
-}
-
-/** @brief Function for setting secondary structure symmetries
-  *
-  * @todo
-  *   Do we need this function?
-*/
-static void setSymmetry(int *s, int *sDest, const int sCount, int *f, int *fDest, const int fCount)
-{
-  int i;
-
-  for(i = 0; i < sCount; i++)
-    sDest[i] = s[i];
-
-  for(i = 0; i < fCount; i++)
-    fDest[i] = f[i];
-}
-
-/** @brief Wrapper function for setting secondary structure symmetries
-  *
-  * @todo
-  *   Do we need this function?
-*/
-static void setupSecondaryStructureSymmetries(pllInstance *tr, partitionList *partitions)
-{
-  int model;
-  int numberOfModels = partitions->numberOfPartitions;
-
-  for(model = 0; model < numberOfModels; model++)
-    {
-      if(partitions->partitionData[model]->dataType == PLL_SECONDARY_DATA ||
-                  partitions->partitionData[model]->dataType == PLL_SECONDARY_DATA_6 ||
-                  partitions->partitionData[model]->dataType == PLL_SECONDARY_DATA_7)
-        {       
-          switch(tr->secondaryStructureModel)
-            {
-            case PLL_SEC_6_A:
-                partitions->partitionData[model]->nonGTR = PLL_FALSE;
-              break;
-            case PLL_SEC_6_B:
-              {
-                int f[6]  = {0, 1, 2, 3, 4, 5};
-                int s[15] = {2, 0, 1, 2, 2, 2, 2, 0, 1, 1, 2, 2, 2, 2, 1};
-
-                setSymmetry(s, partitions->partitionData[model]->symmetryVector, 15, f, partitions->partitionData[model]->frequencyGrouping, 6);
-                  
-                partitions->partitionData[model]->nonGTR = PLL_TRUE;
-              }
-              break;
-            case PLL_SEC_6_C:
-              {
-                int f[6]  = {0, 2, 2, 1, 0, 1};
-                int s[15] = {2, 0, 1, 2, 2, 2, 2, 0, 1, 1, 2, 2, 2, 2, 1};
-
-                setSymmetry(s, partitions->partitionData[model]->symmetryVector, 15, f, partitions->partitionData[model]->frequencyGrouping, 6);
-                
-                partitions->partitionData[model]->nonGTR = PLL_TRUE;
-              }
-              break;
-            case PLL_SEC_6_D:
-              {
-                int f[6]  = {0, 2, 2, 1, 0, 1};
-                int s[15] = {2, -1, 1, 2, 2, 2, 2, -1, 1, 1, 2, 2, 2, 2, 1};
-
-                setSymmetry(s, partitions->partitionData[model]->symmetryVector, 15, f, partitions->partitionData[model]->frequencyGrouping, 6);
-
-                partitions->partitionData[model]->nonGTR = PLL_TRUE;
-              }
-              break;
-            case PLL_SEC_6_E:
-              {
-                int f[6]  = {0, 1, 2, 3, 4, 5};
-                int s[15] = {2, -1, 1, 2, 2, 2, 2, -1, 1, 1, 2, 2, 2, 2, 1};
-
-                setSymmetry(s, partitions->partitionData[model]->symmetryVector, 15, f, partitions->partitionData[model]->frequencyGrouping, 6);
-
-                partitions->partitionData[model]->nonGTR = PLL_TRUE;
-              }
-              break;
-            case PLL_SEC_7_A:
-                partitions->partitionData[model]->nonGTR = PLL_FALSE;
-              break;
-            case PLL_SEC_7_B:
-              {
-                int f[7]  = {0, 2, 2, 1, 0, 1, 3};
-                int s[21] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20};
-                
-                setSymmetry(s, partitions->partitionData[model]->symmetryVector, 21, f, partitions->partitionData[model]->frequencyGrouping, 7);
-
-                partitions->partitionData[model]->nonGTR = PLL_TRUE;
-
-              }
-              break;
-            case PLL_SEC_7_C:
-              {
-                int f[7]  = {0, 1, 2, 3, 4, 5, 6};
-                int s[21] = {-1, -1, 0, -1, -1, 4, -1, -1, -1, 3, 5, 1, -1, -1, 6, -1, -1, 7, 2, 8, 9};
-                
-                setSymmetry(s, partitions->partitionData[model]->symmetryVector, 21, f, partitions->partitionData[model]->frequencyGrouping, 7);
-
-                partitions->partitionData[model]->nonGTR = PLL_TRUE;
-
-              }
-              break;
-            case PLL_SEC_7_D:
-              {
-                int f[7]  = {0, 1, 2, 3, 4, 5, 6};
-                int s[21] = {2, 0, 1, 2, 2, 3, 2, 2, 0, 1, 3, 1, 2, 2, 3, 2, 2, 3, 1, 3, 3};
-                
-                setSymmetry(s, partitions->partitionData[model]->symmetryVector, 21, f, partitions->partitionData[model]->frequencyGrouping, 7);
-
-                partitions->partitionData[model]->nonGTR = PLL_TRUE;
-
-              }
-              break;
-            case PLL_SEC_7_E:
-              {
-                int f[7]  = {0, 1, 2, 3, 4, 5, 6};
-                int s[21] = {-1, -1, 0, -1, -1, 1, -1, -1, -1, 0, 1, 0, -1, -1, 1, -1, -1, 1, 0, 1, 1};
-                
-                setSymmetry(s, partitions->partitionData[model]->symmetryVector, 21, f, partitions->partitionData[model]->frequencyGrouping, 7);
-
-                partitions->partitionData[model]->nonGTR = PLL_TRUE;
-
-              }
-              break;
-            case PLL_SEC_7_F:
-              {
-                int f[7]  = {0, 2, 2, 1, 0, 1, 3};
-                int s[21] = {2, 0, 1, 2, 2, 3, 2, 2, 0, 1, 3, 1, 2, 2, 3, 2, 2, 3, 1, 3, 3};            
-                
-                setSymmetry(s, partitions->partitionData[model]->symmetryVector, 21, f, partitions->partitionData[model]->frequencyGrouping, 7);
-
-                partitions->partitionData[model]->nonGTR = PLL_TRUE;
-
-              }
-              break;
-              
-            case PLL_SEC_16:
-                partitions->partitionData[1]->nonGTR = PLL_FALSE;
-              break;
-            case PLL_SEC_16_A:
-              {
-                int f[16]  = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-                int s[120] = {/* AA */  4,  4,  3,  4, -1, -1, -1,  4, -1, -1, -1,  3, -1, -1, -1,
-                              /* AC */  4,  3, -1,  4, -1, -1, -1,  3, -1, -1, -1,  4, -1, -1,
-                              /* AG */  3, -1, -1,  3, -1, -1, -1,  4, -1, -1, -1,  3, -1,
-                              /* AU */ -1, -1,  2,  3, -1,  0, -1,  1,  2, -1,  2,  3,
-                              /* CA */  4,  3,  4,  4, -1, -1, -1,  3, -1, -1, -1,
-                              /* CC */  3,  4, -1,  3, -1, -1, -1,  4, -1, -1,
-                              /* CG */  3, -1,  2,  3,  2,  0, -1,  1, -1,
-                              /* CU */ -1, -1, -1,  3, -1, -1, -1,  4,
-                              /* GA */  3,  4,  3,  3, -1, -1, -1,
-                              /* GC */  3,  1,  2,  3,  2, -1,
-                              /* GG */  3, -1, -1,  3, -1,
-                              /* GU */  2, -1,  2,  3,
-                              /* UA */  3,  1,  3,
-                              /* UC */  3,  4,
-                              /* UG */  3};
-                              
-                
-                setSymmetry(s, partitions->partitionData[model]->symmetryVector, 120, f, partitions->partitionData[model]->frequencyGrouping, 16);
-                              
-                partitions->partitionData[model]->nonGTR = PLL_TRUE;
-
-                }
-              break;
-            case PLL_SEC_16_B:
-              {
-                int f[16]  = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-                int s[120] = {/* AA */  0,  0,  0,  0, -1, -1, -1,  0, -1, -1, -1,  0, -1, -1, -1,
-                              /* AC */  0,  0, -1,  0, -1, -1, -1,  0, -1, -1, -1,  0, -1, -1,
-                              /* AG */  0, -1, -1,  0, -1, -1, -1,  0, -1, -1, -1,  0, -1,
-                              /* AU */ -1, -1,  0,  0, -1,  0, -1,  0,  0, -1,  0,  0,
-                              /* CA */  0,  0,  0,  0, -1, -1, -1,  0, -1, -1, -1,
-                              /* CC */  0,  0, -1,  0, -1, -1, -1,  0, -1, -1,
-                              /* CG */  0, -1,  0,  0,  0,  0, -1,  0, -1,
-                              /* CU */ -1, -1, -1,  0, -1, -1, -1,  0,
-                              /* GA */  0,  0,  0,  0, -1, -1, -1,
-                              /* GC */  0,  0,  0,  0,  0, -1,
-                              /* GG */  0, -1, -1,  0, -1,
-                              /* GU */  0, -1,  0,  0,
-                              /* UA */  0,  0,  0,
-                              /* UC */  0,  0,
-                              /* UG */  0};
-                              
-                
-                setSymmetry(s, partitions->partitionData[model]->symmetryVector, 120, f, partitions->partitionData[model]->frequencyGrouping, 16);
-                              
-                partitions->partitionData[model]->nonGTR = PLL_TRUE;
-              }
-              break;
-            case PLL_SEC_16_C:        
-            case PLL_SEC_16_D:
-            case PLL_SEC_16_E:
-            case PLL_SEC_16_F:
-            case PLL_SEC_16_I:
-            case PLL_SEC_16_J:
-            case PLL_SEC_16_K:
-              assert(0);
-            default:
-              assert(0);
-            }
-        }
-
-    }
-
-}
-
-/** @brief Initialize base frequencies in partition data
-  *
-  * Copy the computed empirical frequencies for each partition from the \a empiricalFrequencies
-  * structure to each partition structure.
-  *
-  * @param pr
-  *   List of partitions
-  *
-  * @param empiricalFrequencies
-  *   Array containing the empirical frequencies
-*/
-static void initializeBaseFreqs(partitionList *pr, double **empiricalFrequencies)
-{
-  size_t 
-    model;
-  int
-    l,
-    numFreqs;
-  double f;
-
-  for(model = 0; model < (size_t)pr->numberOfPartitions; model++)
-    {
-      if(pr->partitionData[model]->optimizeBaseFrequencies)
-       {
-         //set all base frequencies to identical starting values 1.0 / numberOfDataStates
-         numFreqs = pr->partitionData[model]->states;
-         f = 1.0 / ((double)numFreqs);
-
-         for(l = 0; l < numFreqs; l++)
-          {
-            pr->partitionData[model]->frequencies[l]          = f;
-            pr->partitionData[model]->empiricalFrequencies[l] = f;
-          }
-       }
-      else
-       {
-         memcpy(pr->partitionData[model]->frequencies,          empiricalFrequencies[model], sizeof(double) * pr->partitionData[model]->states);
-         memcpy(pr->partitionData[model]->empiricalFrequencies, empiricalFrequencies[model], sizeof(double) * pr->partitionData[model]->states);
-       }
-    }
-}
-
-
-/** @brief Initialize the model parameters
-  * 
-  * Initialize the model parameters. Specifically
-  *   - Base frequencies
-  *   - Rate matrix
-  *
-  * @param tr
-  *   The PLL instance
-  *
-  * @param empiricalFrequencies
-  *   Pointer to the empirical frequencies array
-  *
-  * @param partitions
-  *   Pointer to the partitions structure
-  *
-  * @todo
-  *   What is tr->optimizeRateCategoryInvocations = 1 ?
-  */
-void initModel(pllInstance *tr, double **empiricalFrequencies, partitionList * partitions)
-{  
-  int model, j;
-  double  temp;  
-     
-  tr->optimizeRateCategoryInvocations = 1;      
-  tr->numberOfInvariableColumns = 0;
-  tr->weightOfInvariableColumns = 0;           
-  
-  for (j = 0; j < tr->originalCrunchedLength; j++) 
-    {
-      tr->patrat[j] = temp = 1.0;
-      tr->patratStored[j] = 1.0;
-      tr->rateCategory[j] = 0;           
-    } 
-
-  /* PSR (CAT) model init */
-  for(model = 0; model < partitions->numberOfPartitions; model++)
-    {            
-          partitions->partitionData[model]->numberOfCategories = 1;
-          partitions->partitionData[model]->perSiteRates[0] = 1.0;
-    }
-    
-  updatePerSiteRates(tr, partitions, PLL_FALSE);
- 
-  setupSecondaryStructureSymmetries(tr, partitions);
-  
-  initRateMatrix(tr, partitions);
-
-  initializeBaseFreqs(partitions, empiricalFrequencies);
-  
-  for(model = 0; model < partitions->numberOfPartitions; model++)
-   {
-     int
-       k;
-
-     partitions->partitionData[model]->alpha = 1.0;
-     if(partitions->partitionData[model]->dataType == PLL_AA_DATA && partitions->partitionData[model]->protModels == PLL_AUTO)
-       partitions->partitionData[model]->autoProtModels = PLL_WAG; /* initialize by WAG per default */
-      
-     pllInitReversibleGTR(tr, partitions, model); /* Decomposition of Q matrix */
-      /* GAMMA model init */
-     pllMakeGammaCats(partitions->partitionData[model]->alpha, partitions->partitionData[model]->gammaRates, 4, tr->useMedian);
-
-     for(k = 0; k < partitions->partitionData[model]->states; k++)
-       partitions->partitionData[model]->freqExponents[k] = 0.0;
-
-     for(k = 0; k < 4; k++)
-     {
-	   partitions->partitionData[model]->lg4x_weights[k] = 0.25;
-	   partitions->partitionData[model]->lg4x_weightExponents[k] = 0.0;
-     }
-
-   }                                   
-  
-  if(partitions->numberOfPartitions > 1)
-    {
-      tr->fracchange = 0;
-      for(model = 0; model < partitions->numberOfPartitions; model++) 
-        tr->fracchange += partitions->partitionData[model]->fracchange;
-      
-      tr->fracchange /= ((double)partitions->numberOfPartitions);
-    }  
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-  pllMasterBarrier(tr, partitions, PLL_THREAD_COPY_INIT_MODEL);
-#endif
-}
-
-
-
-
diff --git a/pllrepo/src/newick.c b/pllrepo/src/newick.c
deleted file mode 100644
index ceb9653..0000000
--- a/pllrepo/src/newick.c
+++ /dev/null
@@ -1,583 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file newick.c
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include <math.h>
-
-#include "pll.h"
-#include "pllInternal.h"
-
-
-/** @file  newick.c
-
-    @brief Collection of routines for reading and parsing newick trees
-
-    Auxiliary functions for reading and parsing newick tree formats
-*/
-
-
-/** @defgroup newickParseGroup Reading and parsing newick trees
-    
-    This set of functions handles the reading and parsing of newick tree formats
-*/
-
-static int
-parse_newick (pllStack ** stack, int * inp)
-{
-  pllNewickNodeInfo * item = NULL;
-  int item_active = 0;
-  pllLexToken token;
-  int input;
-  pllLexToken prev_token;
-  int nop = 0;          /* number of open parentheses */
-  int depth = 0;
-
-  prev_token.tokenType = PLL_TOKEN_UNKNOWN;
-
-  input = *inp;
-
-  NEXT_TOKEN
-  
-  while (token.tokenType != PLL_TOKEN_EOF && token.tokenType != PLL_TOKEN_UNKNOWN)
-  {
-    switch (token.tokenType)
-     {
-       case PLL_TOKEN_OPAREN:
-#ifdef PLLDEBUG
-       printf ("PLL_TOKEN_OPAREN\n");
-#endif
-        ++nop;
-        memcpy (&prev_token, &token, sizeof (pllLexToken));
-        ++depth;
-        break;
-
-       case PLL_TOKEN_CPAREN:
-#ifdef PLLDEBUG
-       printf ("PLL_TOKEN_CPAREN\n");
-#endif
-        if (prev_token.tokenType != PLL_TOKEN_CPAREN  &&
-            prev_token.tokenType != PLL_TOKEN_UNKNOWN &&
-            prev_token.tokenType != PLL_TOKEN_STRING  &&
-            prev_token.tokenType != PLL_TOKEN_NUMBER  &&
-            prev_token.tokenType != PLL_TOKEN_FLOAT) return (0);
-
-        if (!nop) return (0);
-        --nop;
-        memcpy (&prev_token, &token, sizeof (pllLexToken));
-
-        /* push to the stack */
-        if (!item) item = (pllNewickNodeInfo *) rax_calloc (1, sizeof (pllNewickNodeInfo)); // possibly not nec
-        //if (item->name   == NULL) item->name   = strdup ("INTERNAL_NODE");
-        if (item->name == NULL) 
-         {
-           item->name = (char *) rax_malloc ((strlen("INTERNAL_NODE") + 1) * sizeof (char));
-           strcpy (item->name, "INTERNAL_NODE");
-         }
-
-        //if (item->branch == NULL) item->branch = strdup ("0.000000"); 
-        if (item->branch == NULL) 
-         {
-           item->branch = (char *) rax_malloc ((strlen("0.000000") + 1) * sizeof (char));
-           strcpy (item->branch, "0.000000");
-         }
-        item->depth = depth;
-        pllStackPush (stack, item);
-        item_active  = 1;       /* active = 1 */
-        item = NULL;
-        --depth;
-        break;
-
-       case PLL_TOKEN_STRING:
-#ifdef PLLDEBUG
-       printf ("PLL_TOKEN_STRING      %.*s\n", token.len, token.lexeme);
-#endif
-        if (prev_token.tokenType != PLL_TOKEN_OPAREN &&
-            prev_token.tokenType != PLL_TOKEN_CPAREN &&
-            prev_token.tokenType != PLL_TOKEN_UNKNOWN &&
-            prev_token.tokenType != PLL_TOKEN_COMMA) return (0);
-        if (!item) item = (pllNewickNodeInfo *) rax_calloc (1, sizeof (pllNewickNodeInfo));
-        item->name = my_strndup (token.lexeme, token.len);
-
-        item_active = 1;
-        item->depth = depth;
-        if (prev_token.tokenType == PLL_TOKEN_COMMA  ||
-            prev_token.tokenType == PLL_TOKEN_OPAREN ||
-            prev_token.tokenType == PLL_TOKEN_UNKNOWN) item->leaf = 1;
-        memcpy (&prev_token, &token, sizeof (pllLexToken));
-        break;
-
-       case PLL_TOKEN_FLOAT:
-       case PLL_TOKEN_NUMBER:
-#ifdef PLLDEBUG
-       if (token.tokenType == PLL_TOKEN_FLOAT) printf ("PLL_TOKEN_FLOAT\n"); else printf ("PLL_TOKEN_NUMBER\n");
-#endif
-         if  (prev_token.tokenType != PLL_TOKEN_OPAREN &&
-              prev_token.tokenType != PLL_TOKEN_CPAREN &&
-              prev_token.tokenType != PLL_TOKEN_COLON  &&
-              prev_token.tokenType != PLL_TOKEN_UNKNOWN &&
-              prev_token.tokenType != PLL_TOKEN_COMMA) return (0);
-        if (!item) item = (pllNewickNodeInfo *) rax_calloc (1, sizeof (pllNewickNodeInfo));
-        if (prev_token.tokenType == PLL_TOKEN_COLON)
-         {
-           item->branch = my_strndup (token.lexeme, token.len);
-         }
-        else
-         {
-           if (prev_token.tokenType == PLL_TOKEN_COMMA  ||
-               prev_token.tokenType == PLL_TOKEN_OPAREN ||
-               prev_token.tokenType == PLL_TOKEN_UNKNOWN) item->leaf = 1;
-           //if (prev_token.tokenType != PLL_TOKEN_UNKNOWN) ++ indent;
-           item->name = my_strndup (token.lexeme, token.len);
-         }
-        item_active = 1;
-        item->depth = depth;
-        memcpy (&prev_token, &token, sizeof (pllLexToken));
-        break;
-
-       case PLL_TOKEN_COLON:
-#ifdef PLLDEBUG
-       printf ("PLL_TOKEN_COLON\n");
-#endif
-        if (prev_token.tokenType != PLL_TOKEN_CPAREN &&
-            prev_token.tokenType != PLL_TOKEN_STRING &&
-            prev_token.tokenType != PLL_TOKEN_FLOAT  &&
-            prev_token.tokenType != PLL_TOKEN_NUMBER) return (0);
-        memcpy (&prev_token, &token, sizeof (pllLexToken));
-        break;
-
-       case PLL_TOKEN_COMMA:
-#ifdef PLLDEBUG
-       printf ("PLL_TOKEN_COMMA\n");
-#endif
-        if (prev_token.tokenType != PLL_TOKEN_CPAREN &&
-             prev_token.tokenType != PLL_TOKEN_STRING &&
-             prev_token.tokenType != PLL_TOKEN_FLOAT && 
-             prev_token.tokenType != PLL_TOKEN_NUMBER) return (0);
-        memcpy (&prev_token, &token, sizeof (pllLexToken));
-        
-        /* push to the stack */
-        if (!item) item = (pllNewickNodeInfo *) rax_calloc (1, sizeof (pllNewickNodeInfo)); // possibly not nece
-        //if (item->name   == NULL) item->name   = strdup ("INTERNAL_NODE");
-        if (item->name == NULL) 
-         {
-           item->name = (char *) rax_malloc ((strlen("INTERNAL_NODE") + 1) * sizeof (char));
-           strcpy (item->name, "INTERNAL_NODE");
-         }
-        //if (item->branch == NULL) item->branch = strdup ("0.000000"); 
-        if (item->branch == NULL) 
-         {
-           item->branch = (char *) rax_malloc ((strlen("0.000000") + 1) * sizeof (char));
-           strcpy (item->branch, "0.000000");
-         }
-        item->depth = depth;
-        pllStackPush (stack, item);
-        item_active  = 0;
-        item = NULL;
-        break;
-
-       case PLL_TOKEN_SEMICOLON:
-#ifdef PLLDEBUG
-        printf ("PLL_TOKEN_SEMICOLON\n");
-#endif
-        /* push to the stack */
-        if (!item) item = (pllNewickNodeInfo *) rax_calloc (1, sizeof (pllNewickNodeInfo));
-        //if (item->name   == NULL) item->name   = strdup ("ROOT_NODE");
-        if (item->name == NULL) 
-         {
-           item->name = (char *) rax_malloc ((strlen("ROOT_NODE") + 1) * sizeof (char));
-           strcpy (item->name, "ROOT_NODE");
-         }
-        //if (item->branch == NULL) item->branch = strdup ("0.000000"); 
-        if (item->branch == NULL) 
-         {
-           item->branch = (char *) rax_malloc ((strlen("0.000000") + 1) * sizeof (char));
-           strcpy (item->branch, "0.000000");
-         }
-        pllStackPush (stack, item);
-        item_active  = 0;
-        item = NULL;
-        break;
-       default:
-#ifdef __DEBUGGING_MODE
-         printf ("Unknown token: %d\n", token.tokenType);
-#endif
-       // TODO: Finish this part and add error codes
-        break;
-     }
-    NEXT_TOKEN
-    CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE);
-  }
-  if (item_active)
-   {
-     if (!item) item = (pllNewickNodeInfo *) rax_calloc (1, sizeof (pllNewickNodeInfo));
-     //if (item->name   == NULL) item->name   = strdup ("ROOT_NODE");
-     if (item->name == NULL) 
-      {
-        item->name = (char *) rax_malloc ((strlen("ROOT_NODE") + 1) * sizeof (char));
-        strcpy (item->name, "ROOT_NODE");
-      }
-     //if (item->branch == NULL) item->branch = strdup ("0.000000"); 
-     if (item->branch == NULL) 
-      {
-        item->branch = (char *) rax_malloc ((strlen("0.000000") + 1) * sizeof (char));
-        strcpy (item->branch, "0.000000");
-      }
-     pllStackPush (stack, item);
-     item_active  = 0;
-   }
-
-  if (nop || token.tokenType == PLL_TOKEN_UNKNOWN) 
-   {
-     return (0);
-   }
-
-  return (1);
-}
-
-#ifdef __DEBUGGING_MODE
-void stack_dump(pllStack ** stack)
-{
-  pllNewickNodeInfo * item;
-  pllStack * head;
-  int i;
-
-  head = *stack;
-  while (head)
-   {
-     item = (pllNewickNodeInfo *) head->item;
-
-     for (i = 0; i < item->depth; ++ i) printf ("\t");
-
-     printf ("%s:%s\n", item->name, item->branch);
-
-     head = head->next;
-   }
-}
-#endif
-
-static void
-assign_ranks (pllStack * stack, int * nodes, int * leaves)
-{
-  pllStack * head;
-  pllNewickNodeInfo * item, * tmp;
-  pllStack * preorder = NULL;
-  int children;
-  int depth;
-
-  *nodes = *leaves = 0;
-
-
-  head = stack;
-  while (head)
-  {
-    assert (head->item);
-    item = (pllNewickNodeInfo *) head->item;
-    
-    if (item->leaf)  ++ (*leaves);
-
-    if (preorder)
-     {
-       tmp = (pllNewickNodeInfo *) preorder->item;
-       children = 0;
-       while (item->depth < tmp->depth)
-        {
-          children = 1;
-          depth = tmp->depth;
-          pllStackPop (&preorder);
-          tmp = preorder->item;
-          while (tmp->depth == depth)
-           {
-             ++ children;
-             pllStackPop (&preorder);
-             tmp = (pllNewickNodeInfo *)preorder->item;
-           }
-          tmp->rank += children;
-        }
-     }
-    
-    ++ (*nodes);
-    head = head->next;
-
-    if (item->leaf)
-     {
-       if (!preorder) return;
-
-       children = 1;
-       tmp = preorder->item;
-       while (tmp->depth == item->depth)
-        {
-          ++ children;
-          pllStackPop (&preorder);
-          assert (preorder);
-          tmp = (pllNewickNodeInfo *)preorder->item;
-        }
-       tmp->rank += children;
-     }
-    else
-     {
-       pllStackPush (&preorder, item);
-     }
-  }
-  
-  while (preorder->item != stack->item)
-  {
-    item = (pllNewickNodeInfo *)pllStackPop (&preorder);
-    tmp  = (pllNewickNodeInfo *) preorder->item;
-    children = 1;
-
-    while (tmp->depth == item->depth)
-     {
-       ++ children;
-       item = (pllNewickNodeInfo *) pllStackPop (&preorder);
-       tmp  = (pllNewickNodeInfo *) preorder->item;
-     }
-    tmp->rank += children;
-    children = 0;
-  }
- assert (preorder->item == stack->item);
- 
- pllStackClear (&preorder);
-}
-
-/** @ingroup newickParseGroup
-    @brief Validate if a newick tree is a valid phylogenetic tree
-
-    A valid tree is one where the root node is binary or ternary
-    and all other internal nodes are binary. In case the root
-    is ternary then the tree must contain at least another internal
-    node and the total number of nodes must be equal to 
-    \f$ 2l - 2\f$, where \f$l\f$ is the number of leaves. If the
-    root is binary, then the total number of nodes must be equal
-    to \f$2l - 1\f$.
-
-    @param tree
-      Newick tree wrapper structure which contains the stack representation of the parsed newick tree
-
-    @return
-      Returns \b 1 in case of success, otherwise \b 0
-*/
-int
-pllValidateNewick (pllNewickTree * t)
-{
-  pllStack * head;
-  pllNewickNodeInfo * item;
-  int correct = 0;
- 
-  item = t->tree->item;
-  if (item->rank != 2 && item->rank != 3) return (0);
-  head = t->tree->next;
-  while (head)
-  {
-    item = head->item;
-    if (item->rank != 2 && item->rank != 0) 
-     {
-       return (0);
-     }
-    head = head->next;
-  }
-  
-  item = t->tree->item;
-
-  if (item->rank == 2) 
-   {
-     correct = (t->nodes == 2 * t->tips -1);
-     if (correct)
-      {
-        errno = PLL_NEWICK_ROOTED_TREE;
-      }
-     else
-      {
-        errno = PLL_NEWICK_BAD_STRUCTURE;
-      }
-     return (PLL_FALSE);
-   }
-   
-  
-  correct = ((t->nodes == 2 * t->tips - 2) && t->nodes != 4);
-  if (correct) return (PLL_TRUE);
-
-  errno = PLL_NEWICK_BAD_STRUCTURE;
-
-  return (1);
-}
-
-
-/** @ingroup newickParseGroup
-    @brief Convert a binary rooted trree to a binary unrooted tree
-
-    Changes the root of the node to have 3 descendants instead of two, deletes its last immediate descendant internal node
-    and takes the two children (of the deleted internal node) as its children.
-
-    @param
-      Newick tree
-    
-    @return
-      \b PLL_TRUE in case of success, otherwise \b PLL_FALSE and \a errno is set
-*/
-int
-pllNewickUnroot (pllNewickTree * t)
-{
-  pllStack * tmp;
-  pllNewickNodeInfo * item;
-
-  item = t->tree->item;
-  if (item->rank == 2)
-   {
-     item->rank = 3;
-     t->nodes--;
-     item = t->tree->next->item;
-     if (item->rank == 0)
-      {
-        tmp = t->tree->next->next;
-        t->tree->next->next = t->tree->next->next->next;
-      }
-     else
-      {
-        tmp = t->tree->next;
-        t->tree->next = t->tree->next->next;
-      }
-     item = tmp->item;
-     rax_free (item->name);
-     rax_free (tmp->item);
-     rax_free (tmp);
-   }
-
-  return (pllValidateNewick (t));
-}
-
-
-/** @ingroup newickParseGroup
-    @brief Parse a newick tree string
-  
-    Parse a newick string and create a stack structure which represents the tree
-    in a preorder traversal form. Each element of the stack represents one node
-    and consists of its name, branch length, number of children and depth. The
-    stack structure is finally wrapped in a \a pllNewickTree structure which
-    also contains the number of nodes and leaves.
-
-    @param newick
-      String containing the newick tree
-
-    @return
-      Returns a pointer to the created \a pllNewickTree structure in case of success, otherwise \b NULL
-*/
-pllNewickTree *
-pllNewickParseString (const char * newick)
-{
-  int n, input, rc;
-  pllNewickTree * t;
-  int nodes, leaves;
-  
-  t = (pllNewickTree *) rax_calloc (1, sizeof (pllNewickTree));
-
-  n = strlen (newick);
-
-  init_lexan (newick, n);
-  input = get_next_symbol();
-
-  rc = parse_newick (&(t->tree), &input);
-  if (!rc)
-   {
-     /* TODO: properly clean t->tree */
-     rax_free (t);
-     t = NULL;
-   }
-  else
-   {
-     assign_ranks (t->tree, &nodes, &leaves);
-     t->nodes = nodes;
-     t->tips  = leaves;
-   }
-
-  return (t);
-}
-
-/** @ingroup newickParseGroup
-    @brief Deallocate newick parser stack structure
-
-    Deallocates the newick parser stack structure that represents the parsed tree. It
-    also frees all memory allocated by elements of the stack structure.
-
-    @param tree
-      The tree stack structure
-*/
-void pllNewickParseDestroy (pllNewickTree ** t)
-{
-  pllNewickNodeInfo *  item;
-
-  while ((item = (pllNewickNodeInfo *)pllStackPop (&((*t)->tree))))
-   {
-     rax_free (item->name);
-     rax_free (item->branch);
-     rax_free (item);
-   }
-  rax_free (*t);
-  (*t) = NULL;
-}
-
-/** @ingroup newickParseGroup
-    @brief Parse a newick tree file
-  
-    Parse a newick file and create a stack structure which represents the tree
-    in a preorder traversal form. Each element of the stack represents one node
-    and consists of its name, branch length, number of children (rank) and depth. The
-    stack structure is finally wrapped in a \a pllNewickTree structure which
-    also contains the number of nodes and leaves.
-
-    @param filename
-      Filename containing the newick tree
-
-    @return
-      Returns a pointer to the created \a pllNewickTree structure in case of success, otherwise \b NULL
-*/
-pllNewickTree *
-pllNewickParseFile (const char * filename)
-{
-  long n;
-  char * rawdata;
-  pllNewickTree * t;
-
-  rawdata = pllReadFile (filename, &n);
-  if (!rawdata)
-   {
-     fprintf (stderr, "Error while opening/reading file %s\n", filename);
-     return (0);
-   }
-
-  //printf ("%s\n\n", rawdata);
-
-  t = pllNewickParseString (rawdata);
-
-  rax_free (rawdata);
-
-  return (t);
-}
-
diff --git a/pllrepo/src/newick.h b/pllrepo/src/newick.h
deleted file mode 100644
index 8810598..0000000
--- a/pllrepo/src/newick.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file newick.h
- */
-#ifndef __pll_NEWICK__
-#define __pll_NEWICK__
-#include "stack.h"
-/** @brief Intermediate structure for storing a newick tree 
-    
-    Holds the structure of a parsed newick tree. The number of inner nodes is stored in \a nodes
-*/
-typedef struct
-{
-  int nodes;                    /**< @brief Total number of nodes in the tree == 2*tips - 1 for rooted and 2*tips -2 for unrooted */
-  int tips;                     /**< @brief Number of leaves (tips) in the tree */
-  pllStack * tree;              /**< @brief Parsed tree represented as elements of a stack. Corresponds to placing the postorder traversal of a rooted tree in a pushdown store */
-} pllNewickTree;
-
-
-/** @brief Information describing the parsed newick tree nodes 
-    
-    This structure is placed in the ::pllNewickTree LIFO element pllNewickTree::tree
-    and described each node of the parsed tree.
-
-    @todo Rename this to something more proper
-*/
-typedef struct
-{
-  int depth;                    /**< @brief Distance of node from root */
-  char * name;                  /**< @brief Name of the taxon represented by the node (in case it is a leaf) */
-  char * branch;                /**< @brief Length of branch that leads to its parent */
-  int leaf;                     /**< @brief \b PLL_TRUE if the node is a leaf, otherwise \b PLL_FALSE */
-  int rank;                     /**< @brief Rank of the node, i.e. how many children it has */
-} pllNewickNodeInfo;
-
-
-#endif
diff --git a/pllrepo/src/newviewGenericSpecial.c b/pllrepo/src/newviewGenericSpecial.c
deleted file mode 100644
index e69d7f2..0000000
--- a/pllrepo/src/newviewGenericSpecial.c
+++ /dev/null
@@ -1,8736 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file newviewGenericSpecial.c
- *  
- * @brief Functions that deal (mostly) with conditional likelihood (re)computation
- */
-
-#include "mem_alloc.h"
-
-#ifndef WIN32
-#include <unistd.h>
-#endif
-
-#include <math.h>
-#include <time.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <stdint.h>
-#include <limits.h>
-#include <assert.h>
-
-#include "pll.h"
-#include "pllInternal.h"
-
-#ifdef __MIC_NATIVE
-#include "mic_native.h"
-#endif
-
-
-#ifdef __SSE3
-#include <stdint.h>
-#include <xmmintrin.h>
-#include <pmmintrin.h>
-#include "cycle.h"
-
-static void computeTraversalInfo(nodeptr, traversalInfo *, int *, int, int, pllBoolean, recompVectors *, pllBoolean);
-static void makeP(double z1, double z2, double *rptr, double *EI,  double *EIGN, int numberOfCategories, double *left, double *right, pllBoolean saveMem, int maxCat, const int states);
-#if (defined(__SSE3) && !defined(__AVX))
-static void newviewGTRGAMMAPROT_LG4(int tipCase,
-                                    double *x1, double *x2, double *x3, double *extEV[4], double *tipVector[4],
-                                    int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                    int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling);
-
-static void newviewGTRGAMMA_GAPPED_SAVE(int tipCase,
-                                        double *x1_start, double *x2_start, double *x3_start,
-                                        double *EV, double *tipVector,
-                                        int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                        const int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling,
-                                        unsigned int *x1_gap, unsigned int *x2_gap, unsigned int *x3_gap, 
-                                        double *x1_gapColumn, double *x2_gapColumn, double *x3_gapColumn);
-
-static void newviewGTRGAMMA(int tipCase,
-                            double *x1_start, double *x2_start, double *x3_start,
-                            double *EV, double *tipVector,
-                            int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                            const int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling
-                            );
-
-static void newviewGTRCAT( int tipCase,  double *EV,  int *cptr,
-                           double *x1_start, double *x2_start,  double *x3_start, double *tipVector,
-                           int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                           int n,  double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling);
-
-
-static void newviewGTRCAT_SAVE( int tipCase,  double *EV,  int *cptr,
-                                double *x1_start, double *x2_start,  double *x3_start, double *tipVector,
-                                int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                int n,  double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling,
-                                unsigned int *x1_gap, unsigned int *x2_gap, unsigned int *x3_gap,
-                                double *x1_gapColumn, double *x2_gapColumn, double *x3_gapColumn, const int maxCats);
-
-static void newviewGTRGAMMAPROT_GAPPED_SAVE(int tipCase,
-                                            double *x1, double *x2, double *x3, double *extEV, double *tipVector,
-                                            int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                            int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling,
-                                            unsigned int *x1_gap, unsigned int *x2_gap, unsigned int *x3_gap,  
-                                            double *x1_gapColumn, double *x2_gapColumn, double *x3_gapColumn
-                                            );
-
-static void newviewGTRGAMMAPROT(int tipCase,
-                                double *x1, double *x2, double *x3, double *extEV, double *tipVector,
-                                int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling);
-
-static void newviewGTRCATPROT(int tipCase, double *extEV,
-                              int *cptr,
-                              double *x1, double *x2, double *x3, double *tipVector,
-                              int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                              int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling);
-
-static void newviewGTRCATPROT_SAVE(int tipCase, double *extEV,
-                                   int *cptr,
-                                   double *x1, double *x2, double *x3, double *tipVector,
-                                   int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                   int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling,
-                                   unsigned int *x1_gap, unsigned int *x2_gap, unsigned int *x3_gap,
-                                   double *x1_gapColumn, double *x2_gapColumn, double *x3_gapColumn, const int maxCats);
-
-#endif
-#if (defined(__AVX) || defined(__SSE3))
-static void newviewGTRCAT_BINARY( int tipCase,  double *EV,  int *cptr,
-                                  double *x1_start,  double *x2_start,  double *x3_start,  double *tipVector,
-                                  int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                  int n,  double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling);
-static void newviewGTRGAMMA_BINARY(int tipCase,
-                                   double *x1_start, double *x2_start, double *x3_start,
-                                   double *EV, double *tipVector,
-                                   int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                   const int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling);
-#endif
-
-/* required to compute the absolute values of double precision numbers with SSE3 */
-
-PLL_ALIGN_BEGIN const union PLL_ALIGN_END
-{
-  uint64_t i[2];
-  __m128d m;
-} absMask = {{0x7fffffffffffffffULL , 0x7fffffffffffffffULL }};
-
-
-
-#endif
-
-static int pllGetTransitionMatrixNormal (pllInstance * tr, partitionList * pr, nodeptr p, int model, int rate, double * outBuffer);
-static int pllGetTransitionMatrixLG4 (partitionList * pr, nodeptr p, int model, double * outBuffer);
-
-extern const char binaryStateNames[2];  /**< @brief Alphabet of binary states */
-extern const char dnaStateNames[4];     /**< @brief DNA alphabet  */
-extern const char protStateNames[20];   /**< @brief Amino-acid alphabet */
-extern const unsigned int mask32[32];   /**< @brief Contains the first 32 powers of 2, i.e. 2^0 upto 2^31 */
-
-static void ascertainmentBiasSequence(unsigned char tip[32], int numStates)
-{ 
-  assert(numStates <= 32 && numStates > 1);
-
-  switch(numStates)
-    {
-    case 2:     
-      tip[0] = 1;
-      tip[1] = 2;
-      break;
-    case 4:
-      tip[0] = 1;
-      tip[1] = 2;
-      tip[2] = 4;
-      tip[3] = 8;
-      break;
-    default:
-      {
-	int 
-	  i;
-	for(i = 0; i < numStates; i++)
-	  {
-	    tip[i] = i;
-	    //printf("%c ", inverseMeaningPROT[i]);
-	  }
-	//printf("\n");
-      }
-      break;
-    }
-}
-
-static void newviewAscCat(int tipCase,
-			  double *x1, double *x2, double *x3, double *extEV, double *tipVector,
-			  int *ex3, 
-			  const int n, double *left, double *right, 			    
-			  const int numStates)
-{
-  double
-    *le, *ri, *v, *vl, *vr,
-    ump_x1, ump_x2, x1px2;
-  
-  int 
-    i, l, j, scale;
-
- 
-  unsigned char 
-    tip[32];
-
-  ascertainmentBiasSequence(tip, numStates);
-  
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:
-      {
-	for (i = 0; i < n; i++)
-	  {
-	    le = &left[0];
-	    ri = &right[0];
-
-	    vl = &(tipVector[numStates * tip[i]]);
-	    vr = &(tipVector[numStates * tip[i]]);
-	    v  = &x3[numStates * i];
-
-	    for(l = 0; l < numStates; l++)
-	      v[l] = 0.0;
-
-	    for(l = 0; l < numStates; l++)
-	      {
-		ump_x1 = 0.0;
-		ump_x2 = 0.0;
-
-		for(j = 0; j < numStates; j++)
-		  {
-		    ump_x1 += vl[j] * le[l * numStates + j];
-		    ump_x2 += vr[j] * ri[l * numStates + j];
-		  }
-
-		x1px2 = ump_x1 * ump_x2;
-
-		for(j = 0; j < numStates; j++)
-		  v[j] += x1px2 * extEV[l * numStates + j];
-	      }	    
-	  }
-      }
-      break;
-    case PLL_TIP_INNER:
-      {
-	for (i = 0; i < n; i++)
-	  {
-	    le = &left[0];
-	    ri = &right[0];
-
-	    vl = &(tipVector[numStates * tip[i]]);
-	    vr = &x2[numStates * i];
-	    v  = &x3[numStates * i];
-
-	    for(l = 0; l < numStates; l++)
-	      v[l] = 0.0;
-
-	    for(l = 0; l < numStates; l++)
-	      {
-		ump_x1 = 0.0;
-		ump_x2 = 0.0;
-
-		for(j = 0; j < numStates; j++)
-		  {
-		    ump_x1 += vl[j] * le[l * numStates + j];
-		    ump_x2 += vr[j] * ri[l * numStates + j];
-		  }
-
-		x1px2 = ump_x1 * ump_x2;
-
-		for(j = 0; j < numStates; j++)
-		  v[j] += x1px2 * extEV[l * numStates + j];
-	      }
-
-	    scale = 1;
-	    for(l = 0; scale && (l < numStates); l++)
-	      scale = ((v[l] < PLL_MINLIKELIHOOD) && (v[l] > PLL_MINUSMINLIKELIHOOD));	    
-
-	    if(scale)
-	      {
-		for(l = 0; l < numStates; l++)
-		  v[l] *= PLL_TWOTOTHE256;
-			
-		ex3[i]  += 1;	      
-	      }
-	  }
-      }
-      break;
-    case PLL_INNER_INNER:
-      for(i = 0; i < n; i++)
-	{
-	  le = &left[0];
-	  ri = &right[0];
-
-	  vl = &x1[numStates * i];
-	  vr = &x2[numStates * i];
-	  v = &x3[numStates * i];
-
-	  for(l = 0; l < numStates; l++)
-	    v[l] = 0.0;
-
-	  for(l = 0; l < numStates; l++)
-	    {
-	      ump_x1 = 0.0;
-	      ump_x2 = 0.0;
-
-	      for(j = 0; j < numStates; j++)
-		{
-		  ump_x1 += vl[j] * le[l * numStates + j];
-		  ump_x2 += vr[j] * ri[l * numStates + j];
-		}
-
-	      x1px2 =  ump_x1 * ump_x2;
-
-	      for(j = 0; j < numStates; j++)
-		v[j] += x1px2 * extEV[l * numStates + j];
-	    }
-
-	   scale = 1;
-	   for(l = 0; scale && (l < numStates); l++)
-	     scale = ((v[l] < PLL_MINLIKELIHOOD) && (v[l] > PLL_MINUSMINLIKELIHOOD));
-	  
-	   if(scale)
-	     {
-	       for(l = 0; l < numStates; l++)
-		 v[l] *= PLL_TWOTOTHE256;
-	      
-	       ex3[i]  += 1;	     
-	     }
-	}
-      break;
-    default:
-      assert(0);
-    }
-  
- 
-
-}
-
-
-static void newviewAscGamma(int tipCase,
-			    double *x1, double *x2, double *x3, double *extEV, double *tipVector,
-			    int *ex3, 
-			    const int n, double *left, double *right, 			    
-			    const int numStates)
-{
-  
-  int  
-    i, j, l, k, scale;
-  
-  const int 
-    statesSquare = numStates * numStates,
-    gammaStates = 4 * numStates;
-
-  double 
-    *vl, *vr, al, ar, *v, x1px2;
-
-  unsigned char 
-    tip[32];
-
-  ascertainmentBiasSequence(tip, numStates);
-  
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:
-      {
-	for(i = 0; i < n; i++)
-	  {
-	    for(k = 0; k < 4; k++)
-	      {
-		vl = &(tipVector[numStates * tip[i]]);
-		vr = &(tipVector[numStates * tip[i]]);
-		v =  &(x3[gammaStates * i + numStates * k]);
-
-		for(l = 0; l < numStates; l++)
-		  v[l] = 0;
-
-		for(l = 0; l < numStates; l++)
-		  {
-		    al = 0.0;
-		    ar = 0.0;
-		    for(j = 0; j < numStates; j++)
-		      {
-			al += vl[j] * left[k * statesSquare + l * numStates + j];
-			ar += vr[j] * right[k * statesSquare + l * numStates + j];
-		      }
-
-		    x1px2 = al * ar;
-		    for(j = 0; j < numStates; j++)
-		      v[j] += x1px2 * extEV[numStates * l + j];
-		  }
-	      }	    
-	  }
-      }
-      break;
-    case PLL_TIP_INNER:
-      {
-	for (i = 0; i < n; i++)
-	  {
-	    for(k = 0; k < 4; k++)
-	      {
-		vl = &(tipVector[numStates * tip[i]]);
-		vr = &(x2[gammaStates * i + numStates * k]);
-		v =  &(x3[gammaStates * i + numStates * k]);
-
-		for(l = 0; l < numStates; l++)
-		  v[l] = 0;
-
-		for(l = 0; l < numStates; l++)
-		  {
-		    al = 0.0;
-		    ar = 0.0;
-		    for(j = 0; j < numStates; j++)
-		      {
-			al += vl[j] * left[k * statesSquare + l * numStates + j];
-			ar += vr[j] * right[k * statesSquare + l * numStates + j];
-		      }
-
-		    x1px2 = al * ar;
-		    for(j = 0; j < numStates; j++)
-		      v[j] += x1px2 * extEV[numStates * l + j];
-		  }
-	      }
-	   
-	    v = &x3[gammaStates * i];
-	    scale = 1;
-	    for(l = 0; scale && (l < gammaStates); l++)
-	      scale = (PLL_ABS(v[l]) < PLL_MINLIKELIHOOD);
-
-	    if(scale)
-	      {		
-		for(l = 0; l < gammaStates; l++)
-		  v[l] *= PLL_TWOTOTHE256;
-		
-		ex3[i]  += 1;	      
-	      }
-	  }
-      }
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-       {
-	 for(k = 0; k < 4; k++)
-	   {
-	     vl = &(x1[gammaStates * i + numStates * k]);
-	     vr = &(x2[gammaStates * i + numStates * k]);
-	     v =  &(x3[gammaStates * i + numStates * k]);
-
-	     for(l = 0; l < numStates; l++)
-	       v[l] = 0;
-
-	     for(l = 0; l < numStates; l++)
-	       {
-		 al = 0.0;
-		 ar = 0.0;
-		 for(j = 0; j < numStates; j++)
-		   {
-		     al += vl[j] * left[k * statesSquare + l * numStates + j];
-		     ar += vr[j] * right[k * statesSquare + l * numStates + j];
-		   }
-
-		 x1px2 = al * ar;
-		 for(j = 0; j < numStates; j++)
-		   v[j] += x1px2 * extEV[numStates * l + j];
-	       }
-	   }
-	 
-	 v = &(x3[gammaStates * i]);
-	 scale = 1;
-	 for(l = 0; scale && (l < gammaStates); l++)
-	   scale = ((PLL_ABS(v[l]) <  PLL_MINLIKELIHOOD));
-
-	 if(scale)
-	   {	    
-	     for(l = 0; l < gammaStates; l++)
-	       v[l] *= PLL_TWOTOTHE256;
-	     
-	     ex3[i]  += 1;	    
-	   }
-       }
-      break;
-    default:
-      assert(0);
-    }  
-}
-
-
-/* generic function for computing the P matrices, for computing the conditional likelihood at a node p, given child nodes q and r 
-   we compute P(z1) and P(z2) here */
-
-/** @brief Computes two P matrices for two edges.
-
-    Generic function for computing the P matrices of two nodes based on their edges. This is used to 
-    (later) compute the the conditional likelihood at a node p which has two descendants \a q and \r, 
-    which in turn have the edges \a z1 and \a z2 that connect them with \a p. Given those edges, we
-    compute two P matrices \a P(z1) and \a P(z2) which are stored in the arrays \a left and \a right.
- 
-    The following value is computed here: 
-    \f[
-     EI\cdot exp( EIGN \cdot z)
-     \f]
-     to fill up the P matrix.
-     
-    @param z1    Branch length leading to left descendant node (let's call it \a q)
-    @param z2    Branch length leading to right descendant node (let's call it \a r)
-    @param rptr  Array of values for rate categories
-    @param EI    Inverse eigenvectors of Q-matrix
-    @param EIGN  Eigenvalues of Q-matrix
-    @param numberOfCategories How many rate heterogeneity categories we have, depending on GAMMA and CAT
-    @param left  Where to store the left P matrix (for node \a q)
-    @param right Where to store the right P matrix (for node \a r)
-    @param saveMem If set to \b PLL_TRUE, memory saving technique is enabled
-    @param maxCat Maximum number of rate categories
-    @param states Number of states for the particular data (4 for DNA or 20 for AA)
-*/
-static void 
-makeP(double z1, double z2, double *rptr, double *EI,  double *EIGN, int numberOfCategories, double *left, double *right, pllBoolean saveMem, int maxCat, const int states)
-{
-  int  i, j, k, statesSquare = states * states;
-
-  /* assign some space for pre-computing and later re-using functions */
-
-  double 
-    *lz1 = (double*)rax_malloc(sizeof(double) * states),
-    *lz2 = (double*)rax_malloc(sizeof(double) * states),
-    *d1 = (double*)rax_malloc(sizeof(double) * states),
-    *d2 = (double*)rax_malloc(sizeof(double) * states);
-
-  /* multiply branch lengths with eigenvalues */
-
-  for(i = 1; i < states; i++)
-  {
-    lz1[i] = EIGN[i] * z1;
-    lz2[i] = EIGN[i] * z2;
-  }
-
-
-  /* loop over the number of rate categories, this will be 4 for the GAMMA model and 
-     variable for the CAT model */
-
-  for(i = 0; i < numberOfCategories; i++)
-  {
-    /* exponentiate the rate multiplied by the branch */
-
-    for(j = 1; j < states; j++)
-    {
-      d1[j] = exp(rptr[i] * lz1[j]);
-      d2[j] = exp(rptr[i] * lz2[j]);
-
-    }
-
-    /* now fill the P matrices for the two branch length values */
-
-    for(j = 0; j < states; j++)
-    {
-      /* left and right are pre-allocated arrays */
-
-      left[statesSquare * i  + states * j] = 1.0;
-      right[statesSquare * i + states * j] = 1.0;         
-
-      for(k = 1; k < states; k++)
-      {
-        left[statesSquare * i + states * j + k]  = d1[k] * EI[states * j + k];
-        right[statesSquare * i + states * j + k] = d2[k] * EI[states * j + k];
-      }
-    }
-  }
-
-
-  /* if memory saving is enabled and we are using CAT we need to do one additional P matrix 
-     calculation for a rate of 1.0 to compute the entries of a column/tree site comprising only gaps */
-
-
-  if(saveMem)
-  {
-    i = maxCat;
-
-    for(j = 1; j < states; j++)
-    {
-      d1[j] = exp (lz1[j]);
-      d2[j] = exp (lz2[j]);
-    }
-
-    for(j = 0; j < states; j++)
-    {
-      left[statesSquare * i  + states * j] = 1.0;
-      right[statesSquare * i + states * j] = 1.0;
-
-      for(k = 1; k < states; k++)
-      {
-        left[statesSquare * i + states * j + k]  = d1[k] * EI[states * j + k];
-        right[statesSquare * i + states * j + k] = d2[k] * EI[states * j + k];
-      }
-    }
-  }
-
-  /* free the temporary buffers */
-
-  rax_free(lz1);
-  rax_free(lz2);
-  rax_free(d1);
-  rax_free(d2);
-}
-
-
-/** Compute the transition probability matrix for a given branch
-
-    Computes the transition probability matrix for the branch \a p->z and partition \a model given the
-    PLL instance \a tr and list of partitions \a pr. The result is stored in \a outBuffer which must
-    be of sufficient size, i.e states * states * (numberOfRateCategories + 1) * sizeof(double);
-
-    @param tr  PLL instance
-    @param pr  List of partitions
-    @param model  Partition index for which to take the branch length
-    @param p  Adjacent node to the edge we want to compute the trans. prob. matrix
-    @param outBuffer Output buffer where to store the transition probability matrix
-
-*/
-int pllGetTransitionMatrix (pllInstance * tr, partitionList * pr, nodeptr p, int model, int rate, double * outBuffer)
-{
-  if (tr->rateHetModel == PLL_CAT)
-   {
-     if (rate >= pr->partitionData[model]->numberOfCategories) return (PLL_FALSE);
-   }
-  else
-   {
-     if (rate >= 4) return (PLL_FALSE);
-   }
-
-  if (pr->partitionData[model]->dataType == PLL_AA_DATA &&
-		  (pr->partitionData[model]->protModels == PLL_LG4M || pr->partitionData[model]->protModels == PLL_LG4X))
-    return (pllGetTransitionMatrixLG4 (pr, p, model, outBuffer));
-    
-    
-  return (pllGetTransitionMatrixNormal (tr, pr, p, model, rate, outBuffer));
-}
-
-
-/* TODO: Fix this function according to pllGetTransitionMatrixNormal */
-static int pllGetTransitionMatrixLG4 (partitionList * pr, nodeptr p, int model, double * outBuffer)
-{
-  int
-    i, j, k,
-    states = pr->partitionData[model]->states,
-    numberOfCategories = 4;
-  double
-    d[64],
-    *  rptr = pr->partitionData[model]->gammaRates,
-    ** EI   = pr->partitionData[model]->EI_LG4,
-    ** EIGN = pr->partitionData[model]->EIGN_LG4;
-
-  assert (states == 20);
-
-  for (i = 0; i < numberOfCategories; ++i)
-   {
-     for (j = 1; j < states; ++j)
-      {
-        d[j] = exp(rptr[i] * EIGN[i][j] * p->z[model]);
-      }
-     for (j = 0; j < states; ++ j)
-      {
-        outBuffer[states * states * i + states * j] = 1.0;
-        for (k = 1; k < states; ++k) 
-         {
-           outBuffer[states * states * i + states * j + k] = d[k] * EI[i][states * j + k];
-         }
-      }
-   }
-  return (PLL_TRUE);
-}
-
-static int pllGetTransitionMatrixNormal (pllInstance * tr, partitionList * pr, nodeptr p, int model, int rate, double * outBuffer)
-{
-  int 
-    i, j, k,
-    /* numberOfCategories, */
-    states = pr->partitionData[model]->states;
-  double
-    * d = (double *)rax_malloc(sizeof(double) * states),
-    * rptr,
-    * EI   = pr->partitionData[model]->EI,
-    * EIGN = pr->partitionData[model]->EIGN,
-    * EV = pr->partitionData[model]->EV;
-  
-  double lz = (p->z[model] > PLL_ZMIN) ? log(p->z[model]) : log(PLL_ZMIN);                        
-
-  if (tr->rateHetModel == PLL_CAT)
-   {
-     rptr               = pr->partitionData[model]->perSiteRates;
-     /* numberOfCategories = pr->partitionData[model]->numberOfCategories; */
-   }
-  else
-   {
-     rptr               = pr->partitionData[model]->gammaRates;
-     /* numberOfCategories = 4; */
-   }
-
-  for (i = 0; i < states * states; ++ i) outBuffer[i] = 0;
-
-  d[0] = 1.0;
-  for (j = 1; j < states; ++ j)
-   {
-     d[j] = exp(rptr[rate] * EIGN[j] * lz);
-   }
-
-  for (i = 0; i < states; ++ i)
-   {
-     for (j = 0; j < states; ++ j)
-      {
-        for (k = 0; k < states; ++ k)
-         {
-           outBuffer[states * i + j] += (d[k] * EI[states * i + k] * EV[states * j + k]);
-         }
-      }
-   }
-
-  assert (!tr->saveMemory);
-  // TODO: Fix the following snippet
-  //if (tr->saveMemory)
-  // {
-  //   i = tr->maxCategories;
-  //   
-  //   for (j = 1; j < states; ++j)
-  //    {
-  //      d[j] = EXP(EIGN[j] * p->z[model]);
-  //    }
-
-  //   for (j = 0; j < states; ++j)
-  //    {
-  //      outBuffer[states * states * i + states * j] = 1.0;
-  //      for (k = 1; k < states; ++k)
-  //       {
-  //         outBuffer[states * states * i + states * j + k] = d[k] * EI[states * j + k];
-  //       }
-  //    }
-  // }
-
-  rax_free(d);
-
-  return (PLL_TRUE);
-}
-
-
-/** @brief Compute two P matrices for two edges for the LG4 model
-    
-    Computing the P matrices of two nodes based on their edges for the LG4 model. This is used to 
-    (later) compute the the conditional likelihood at a node p which has two descendants \a q and \r, 
-    which in turn have the edges \a z1 and \a z2 that connect them with \a p. Given those edges, we
-    compute two P matrices \a P(z1) and \a P(z2) which are stored in the arrays \a left and \a right.
-
-    @param z1
-      Branch length leading to left descendant node (let's call it \a q)
-     
-    @param z2
-      Branch length leading to right descendant node (let's call it \a r)
-
-    @param rptr
-      Array of values for rate categories
-
-    @param EI
-      Inverse eigenvectors of 4 Q-matrices
-     
-    @param EIGN
-      Eigenvalues of 4 Q-matrix
-
-    @param numberOfCategories
-      How many rate heterogeneity categories we have, depending on GAMMA and CAT
-     
-    @param left
-      Where to store the left P matrix (for node \a q)
-     
-    @param right
-      Where to store the right P matrix (for node \a r)
-
-    @param numStates
-      Number of states for the particular data (4 for DNA or 20 for AA)
-
-    @todo
-      Present the maths here as in ::makeP
-
-*/
-static void makeP_FlexLG4(double z1, double z2, double *rptr, double *EI[4],  double *EIGN[4], int numberOfCategories, double *left, double *right, const int numStates)
-{
-  int 
-    i,
-    j,
-    k;
-  
-  const int
-    statesSquare = numStates * numStates;
-
-  double    
-    d1[64],  
-    d2[64];
-
-  assert(numStates <= 64);
-       
-  for(i = 0; i < numberOfCategories; i++)
-    {
-      for(j = 1; j < numStates; j++)
-        {
-          d1[j] = exp (rptr[i] * EIGN[i][j] * z1);
-          d2[j] = exp (rptr[i] * EIGN[i][j] * z2);
-        }
-
-      for(j = 0; j < numStates; j++)
-        {
-          left[statesSquare * i  + numStates * j] = 1.0;
-          right[statesSquare * i + numStates * j] = 1.0;
-
-          for(k = 1; k < numStates; k++)
-            {
-              left[statesSquare * i + numStates * j + k]  = d1[k] * EI[i][numStates * j + k];
-              right[statesSquare * i + numStates * j + k] = d2[k] * EI[i][numStates * j + k];
-            }
-        }
-    }  
-}
-
-#if (!defined(__AVX) && !defined(__SSE3))
-
-/** @brief Computation of conditional likelihood arrays for CAT
- 
-    This is a generic, slow but readable function implementation for computing the 
-     conditional likelihood arrays at p, given child nodes q and r using the CAT
-     mode of rate heterogeneity. Depending whether \a q, resp. \r, are tips or internal
-     nodes (indicated by \a tipCase) the conditional likelihoods are computed based on
-     \a x1 if \a q is an inner node or \a tipX1 if it is a tip, resp. \a x2 if \a r
-     is an inner node or \a tipX2 if it is a tip. Output array \a ex3 stores the
-     number of times the likelihood of each site for each internal node has been scaled.
-     The conditional likelihood vectors for any possible base-pair (which is useful when
-     \a q or \a r are tips) has been already precomputed from the eigenvalues of the Q
-     matrix in the array \a tipVector. In case the conditional likelihood for a particular
-     site is very small in terms of a floating point number, then it is multiplied by a
-     very large number (scaling), and then number of times it has been scaled (per node) is
-     stored in the array \a ex3, if \a fastScaling is set to \b PLL_FALSE. Otherwise, the
-     total number of scalings for all sites and all nodes is stored in a single variable
-     \a scalerIncrement.
-
-    @param tipCase
-      Can be either \b PLL_TIP_TIP, or \b PLL_TIP_INNER or \b PLL_INNER_INNER, and describes the
-      descendants of the node for which we currently compute the condition likelihood
-      vector, i.e. whether they are both tips (leaves), or one is tip and the other
-      an inner node, or both are inner nodes.
-
-    @param extEV
-      Eigenvectors of Q matrix
-      
-    @param cptr
-      Array where the rate for each site in the compressed partition alignment is stored
-
-    @param x1
-      Conditional likelihood vectors of the first child node, in case it is an internal node
-
-    @param x2
-      Conditional likelihood vectors of the second child node, in case it is an internal node
-
-    @param x3
-      Pointer to where the computed conditional likelihood vector of node \a p will be stored
-
-    @param tipVector
-      Vector contining sums of left eigenvectors for likelihood computation at tips.
-
-    @param ex3
-      Pointer to an array of whose elements correspond to the number of times the likelihood of
-      a particular site of a particular internal nodeis scaled. Those elements are incremented
-      at every scaling operation and only if \a fastScaling flag is set to \b PLL_FALSE. This 
-      array will be used later when evaluating the likelihood of the whole tree.
-
-    @param tipX1
-      Pointer to the alignment data (sequence) of first child node, in case it is a tip
-
-    @param tipX2
-      Pointer to the alignment data (sequence) of second child node, in case it is a tip
-
-    @param n
-      Number of sites for which we are doing the evaluation. For the single-thread version this is the number of sites in the
-      current partition, for multi-threads this is the number of sites assigned to the running thread from the current partition.
-
-    @param left
-      Pointer to the P matrix of the left child
-
-    @param right
-      Pointer to the P matrix of the right child
-
-    @param wgt
-      Array of weights for each site
-
-    @param scalerIncrement
-      Where to store the number of scalings carried out in case \a fastScaling is set to \b PLL_TRUE.
-
-    @param fastScaling
-      If set to \b PLL_TRUE, only the total number of scalings for all sites of the partition will be
-      stored in \a scalerIncrement, otherwise per-site scalings are stored in the array \a ex3. 
-
-    @param states
-      Number of states for the particular data (4 for DNA or 20 for AA)
- */
-static void newviewCAT_FLEX(int tipCase, double *extEV,
-                            int *cptr,
-                            double *x1, double *x2, double *x3, double *tipVector,
-                            int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                            int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling, const int states)
-{
-  double
-    *le, 
-    *ri, 
-    *v, 
-    *vl, 
-    *vr,
-    ump_x1, 
-    ump_x2, 
-    x1px2;
-
-  int 
-    i, 
-    l, 
-    j, 
-    scale, 
-    addScale = 0;
-
-  const int 
-    statesSquare = states * states;
-
-
-  /* here we switch over the different cases for efficiency, but also because 
-     each case accesses different data types.
-
-     We consider three cases: either q and r are both tips, q or r are tips, and q and r are inner 
-     nodes.
-     */
-
-
-  switch(tipCase)
-  {
-
-    /* both child nodes of p weher we want to update the conditional likelihood are tips */
-    case PLL_TIP_TIP:     
-      /* loop over sites */
-      for (i = 0; i < n; i++)
-      {
-        /* set a pointer to the P-Matrices for the rate category of this site */
-        le = &left[cptr[i] * statesSquare];
-        ri = &right[cptr[i] * statesSquare];
-
-        /* pointers to the likelihood entries of the tips q (vl) and r (vr) 
-           We will do reading accesses to these values only.
-           */
-        vl = &(tipVector[states * tipX1[i]]);
-        vr = &(tipVector[states * tipX2[i]]);
-
-        /* address of the conditional likelihood array entres at site i. This is 
-           a writing access to v */
-        v  = &x3[states * i];
-
-        /* initialize v */
-        for(l = 0; l < states; l++)
-          v[l] = 0.0;
-
-        /* loop over states to compute the cond likelihoods at p (v) */
-
-        for(l = 0; l < states; l++)
-        {             
-          ump_x1 = 0.0;
-          ump_x2 = 0.0;
-
-          /* le and ri are the P-matrices */
-
-          for(j = 0; j < states; j++)
-          {
-            ump_x1 += vl[j] * le[l * states + j];
-            ump_x2 += vr[j] * ri[l * states + j];
-          }
-
-          x1px2 = ump_x1 * ump_x2;
-
-          /* multiply with matrix of eigenvectors extEV */
-
-          for(j = 0; j < states; j++)
-            v[j] += x1px2 * extEV[l * states + j];
-        }          
-      }    
-      break;
-    case PLL_TIP_INNER:      
-
-      /* same as above, only that now vl is a tip and vr is the conditional probability vector 
-         at an inner node. Note that, if we have the case that either q or r is a tip, the 
-         nodes will be flipped to ensure that tipX1 always points to the sequence at the tip.
-         */
-
-      for (i = 0; i < n; i++)
-      {
-        le = &left[cptr[i] * statesSquare];
-        ri = &right[cptr[i] * statesSquare];
-
-        /* access tip vector lookup table */
-        vl = &(tipVector[states * tipX1[i]]);
-
-        /* access conditional likelihoo arrays */
-        /* again, vl and vr are reading accesses, while v is a writing access */
-        vr = &x2[states * i];
-        v  = &x3[states * i];
-
-        /* same as in the loop above */
-
-        for(l = 0; l < states; l++)
-          v[l] = 0.0;
-
-        for(l = 0; l < states; l++)
-        {
-          ump_x1 = 0.0;
-          ump_x2 = 0.0;
-
-          for(j = 0; j < states; j++)
-          {
-            ump_x1 += vl[j] * le[l * states + j];
-            ump_x2 += vr[j] * ri[l * states + j];
-          }
-
-          x1px2 = ump_x1 * ump_x2;
-
-          for(j = 0; j < states; j++)
-            v[j] += x1px2 * extEV[l * states + j];
-        }
-
-        /* now let's check for numerical scaling. 
-           The maths in RAxML are a bit non-standard to avoid/economize on arithmetic operations 
-           at the virtual root and for branch length optimization and hence values stored 
-           in the conditional likelihood vectors can become negative.
-           Below we check if all absolute values stored at position i of v are smaller 
-           than a pre-defined value in pll.h. If they are all smaller we can then safely 
-           multiply them by a large, constant number PLL_TWOTOTHE256 (without numerical overflow) 
-           that is also speced in pll.h */
-
-        scale = 1;
-        for(l = 0; scale && (l < states); l++)
-          scale = ((v[l] < PLL_MINLIKELIHOOD) && (v[l] > PLL_MINUSMINLIKELIHOOD));         
-
-        if(scale)
-        {
-          for(l = 0; l < states; l++)
-            v[l] *= PLL_TWOTOTHE256;
-
-          /* if we have scaled the entries to prevent underflow, we need to keep track of how many scaling 
-             multiplications we did per node such as to undo them at the virtual root, e.g., in 
-             evaluateGeneric() 
-             Note here, that, if we scaled the site we need to increment the scaling counter by the wieght, i.e., 
-             the number of sites this potentially compressed pattern represents ! */ 
-
-          if(!fastScaling)
-            ex3[i] += 1;
-          else
-            addScale += wgt[i];   
-          
-        }
-      }   
-      break;
-    case PLL_INNER_INNER:
-
-      /* same as above, only that the two child nodes q and r are now inner nodes */
-
-      for(i = 0; i < n; i++)
-      {
-        le = &left[cptr[i] * statesSquare];
-        ri = &right[cptr[i] * statesSquare];
-
-        /* index conditional likelihood vectors of inner nodes */
-
-        vl = &x1[states * i];
-        vr = &x2[states * i];
-        v = &x3[states * i];
-
-        for(l = 0; l < states; l++)
-          v[l] = 0.0;
-
-        for(l = 0; l < states; l++)
-        {
-          ump_x1 = 0.0;
-          ump_x2 = 0.0;
-
-          for(j = 0; j < states; j++)
-          {
-            ump_x1 += vl[j] * le[l * states + j];
-            ump_x2 += vr[j] * ri[l * states + j];
-          }
-
-          x1px2 =  ump_x1 * ump_x2;
-
-          for(j = 0; j < states; j++)
-            v[j] += x1px2 * extEV[l * states + j];            
-        }
-
-        scale = 1;
-        for(l = 0; scale && (l < states); l++)
-          scale = ((v[l] < PLL_MINLIKELIHOOD) && (v[l] > PLL_MINUSMINLIKELIHOOD));
-
-        if(scale)
-        {
-          for(l = 0; l < states; l++)
-            v[l] *= PLL_TWOTOTHE256;
-          
-          if(!fastScaling)
-            ex3[i] += 1;
-          else
-            addScale += wgt[i];    
-        }
-      }
-      break;
-    default:
-      assert(0);
-  }
-
-  /* increment the scaling counter by the additional scalings done at node p */
-
-  if(fastScaling)
-    *scalerIncrement = addScale;
-}
-
-/** @brief Computation of conditional likelihood arrays for \b GAMMA
- 
-    This is a generic, slow but readable function implementation for computing the 
-     conditional likelihood arrays at \a p, given child nodes \a q and \a r using the \b GAMMA
-     model of rate heterogeneity. Depending whether \a q, resp. \r, are tips or internal
-     nodes (indicated by \a tipCase) the conditional likelihoods are computed based on
-     \a x1 if \a q is an inner node or \a tipX1 if it is a tip, resp. \a x2 if \a r
-     is an inner node or \a tipX2 if it is a tip. Output array \a ex3 stores the
-     number of times the likelihood of each site for each internal node has been scaled.
-     The conditional likelihood vectors for any possible base-pair (which is useful when
-     \a q or \a r are tips) has been already precomputed from the eigenvalues of the Q
-     matrix in the array \a tipVector. In case the conditional likelihood for a particular
-     site is very small in terms of a floating point number, then it is multiplied by a
-     very large number (scaling), and then number of times it has been scaled (per node) is
-     stored in the array \a ex3, if \a fastScaling is set to \b PLL_FALSE. Otherwise, the
-     total number of scalings for all sites and all nodes is stored in a single variable
-     \a scalerIncrement.
-
-    @param tipCase
-      Can be either \b PLL_TIP_TIP, or \b PLL_TIP_INNER or \b PLL_INNER_INNER, and describes the
-      descendants of the node for which we currently compute the condition likelihood
-      vector, i.e. whether they are both tips (leaves), or one is tip and the other
-      an inner node, or both are inner nodes.
-
-    @param x1
-      Conditional likelihood vectors of the first child node, in case it is an internal node
-
-    @param x2
-      Conditional likelihood vectors of the second child node, in case it is an internal node
-
-    @param x3
-      Pointer to where the computed conditional likelihood vector of node \a p will be stored
-
-    @param extEV
-      Eigenvectors of Q matrix
-
-    @param tipVector
-      Vector contining sums of left eigenvectors for likelihood computation at tips.
-
-    @param ex3
-      Pointer to an array of whose elements correspond to the number of times the likelihood of
-      a particular site of a particular internal nodeis scaled. Those elements are incremented
-      at every scaling operation and only if \a fastScaling flag is set to \b PLL_FALSE. This 
-      array will be used later when evaluating the likelihood of the whole tree.
-
-    @param tipX1
-      Pointer to the alignment data (sequence) of first child node, in case it is a tip
-
-    @param tipX2
-      Pointer to the alignment data (sequence) of second child node, in case it is a tip
-
-    @param n
-      Number of sites to be processed
-
-    @param left
-      Pointer to the P matrix of the left child
-
-    @param right
-      Pointer to the P matrix of the right child
-
-    @param wgt
-      Array of weights for each site
-
-    @param scalerIncrement
-      Where to store the number of scalings carried out in case \a fastScaling is set to \b PLL_TRUE.
-
-    @param fastScaling
-      If set to \b PLL_TRUE, only the total number of scalings for all sites of the partition will be
-      stored in \a scalerIncrement, otherwise per-site scalings are stored in the array \a ex3. 
-
-    @param states
-      Number of states for the particular data (4 for DNA or 20 for AA)
-
-    @param maxStateValue
-      Number of all possible base-pairs including degenerate characters, i.e. 16 for  DNA and 23 for AA
- */
-static void newviewGAMMA_FLEX(int tipCase,
-                              double *x1, double *x2, double *x3, double *extEV, double *tipVector,
-                              int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                              int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling, const int states, const int maxStateValue)
-{
-  double  
-    *uX1, 
-    *uX2, 
-    *v, 
-    x1px2, 
-    *vl, 
-    *vr, 
-    al, 
-    ar;
-
-  int  
-    i, 
-    j, 
-    l, 
-    k, 
-    scale, 
-    addScale = 0;
-
-  const int     
-    statesSquare = states * states,
-                 span = states * 4,
-                 /* this is required for doing some pre-computations that help to save 
-                    numerical operations. What we are actually computing here are additional lookup tables 
-                    for each possible state a certain data-type can assume.
-                    for DNA with ambuguity coding this is 15, for proteins this is 22 or 23, since there 
-                    also exist one or two amibguity codes for protein data.
-                    Essentially this is very similar to the tip vectors which we also use as lookup tables */
-                 precomputeLength = maxStateValue * span;
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:
-      {
-        /* allocate pre-compute memory space */
-
-        double 
-          *umpX1 = (double*)rax_malloc(sizeof(double) * precomputeLength),
-          *umpX2 = (double*)rax_malloc(sizeof(double) * precomputeLength);
-
-        /* multiply all possible tip state vectors with the respective P-matrices 
-        */
-
-        for(i = 0; i < maxStateValue; i++)
-        {
-          v = &(tipVector[states * i]);
-
-          for(k = 0; k < span; k++)
-          {
-
-            umpX1[span * i + k] = 0.0;
-            umpX2[span * i + k] = 0.0;
-
-            for(l = 0; l < states; l++)
-            {
-              umpX1[span * i + k] +=  v[l] *  left[k * states + l];
-              umpX2[span * i + k] +=  v[l] * right[k * states + l];
-            }
-
-          }
-        }
-
-        for(i = 0; i < n; i++)
-        {
-          /* access the precomputed arrays (pre-computed multiplication of conditional with the tip state) 
-          */
-
-          uX1 = &umpX1[span * tipX1[i]];
-          uX2 = &umpX2[span * tipX2[i]];
-
-          /* loop over discrete GAMMA rates */
-
-          for(j = 0; j < 4; j++)
-          {
-            /* the rest is the same as for CAT */
-            v = &x3[i * span + j * states];
-
-            for(k = 0; k < states; k++)
-              v[k] = 0.0;
-
-            for(k = 0; k < states; k++)
-            {              
-              x1px2 = uX1[j * states + k] * uX2[j * states + k];
-
-              for(l = 0; l < states; l++)                                                       
-                v[l] += x1px2 * extEV[states * k + l];               
-            }
-
-          }        
-        }
-
-        /* free precomputed vectors */
-
-        rax_free(umpX1);
-        rax_free(umpX2);
-      }
-      break;
-    case PLL_TIP_INNER:
-      {
-        /* we do analogous pre-computations as above, with the only difference that we now do them 
-           only for one tip vector */
-
-        double 
-          *umpX1 = (double*)rax_malloc(sizeof(double) * precomputeLength),
-          *ump_x2 = (double*)rax_malloc(sizeof(double) * states);
-
-        /* precompute P and left tip vector product */
-
-        for(i = 0; i < maxStateValue; i++)
-        {
-          v = &(tipVector[states * i]);
-
-          for(k = 0; k < span; k++)
-          {
-
-            umpX1[span * i + k] = 0.0;
-
-            for(l = 0; l < states; l++)
-              umpX1[span * i + k] +=  v[l] * left[k * states + l];
-
-
-          }
-        }
-
-        for (i = 0; i < n; i++)
-        {
-          /* access pre-computed value based on the raw sequence data tipX1 that is used as an index */
-
-          uX1 = &umpX1[span * tipX1[i]];
-
-          /* loop over discrete GAMMA rates */
-
-          for(k = 0; k < 4; k++)
-          {
-            v = &(x2[span * i + k * states]);
-
-            for(l = 0; l < states; l++)
-            {
-              ump_x2[l] = 0.0;
-
-              for(j = 0; j < states; j++)
-                ump_x2[l] += v[j] * right[k * statesSquare + l * states + j];
-            }
-
-            v = &(x3[span * i + states * k]);
-
-            for(l = 0; l < states; l++)
-              v[l] = 0;
-
-            for(l = 0; l < states; l++)
-            {
-              x1px2 = uX1[k * states + l]  * ump_x2[l];
-              for(j = 0; j < states; j++)
-                v[j] += x1px2 * extEV[l * states  + j];
-            }
-          }
-
-          /* also do numerical scaling as above. Note that here we need to scale 
-             4 * 4 values for DNA or 4 * 20 values for protein data.
-             If they are ALL smaller than our threshold, we scale. Note that,
-             this can cause numerical problems with GAMMA, if the values generated 
-             by the four discrete GAMMA rates are too different.
-
-             For details, see: 
-
-             F. Izquierdo-Carrasco, S.A. Smith, A. Stamatakis: "Algorithms, Data Structures, and Numerics for Likelihood-based Phylogenetic Inference of Huge Trees"
-
-*/
-
-
-          v = &x3[span * i];
-          scale = 1;
-          for(l = 0; scale && (l < span); l++)
-            scale = (PLL_ABS(v[l]) <  PLL_MINLIKELIHOOD);
-
-
-          if (scale)
-          {
-            for(l = 0; l < span; l++)
-              v[l] *= PLL_TWOTOTHE256;
-            
-            if(!fastScaling)
-              ex3[i] += 1;
-            else
-              addScale += wgt[i];                   
-          }
-        }
-
-        rax_free(umpX1);
-        rax_free(ump_x2);
-      }
-      break;
-    case PLL_INNER_INNER:
-
-      /* same as above, without pre-computations */
-
-      for (i = 0; i < n; i++)
-      {
-        for(k = 0; k < 4; k++)
-        {
-          vl = &(x1[span * i + states * k]);
-          vr = &(x2[span * i + states * k]);
-          v =  &(x3[span * i + states * k]);
-
-
-          for(l = 0; l < states; l++)
-            v[l] = 0;
-
-
-          for(l = 0; l < states; l++)
-          {              
-
-            al = 0.0;
-            ar = 0.0;
-
-            for(j = 0; j < states; j++)
-            {
-              al += vl[j] * left[k * statesSquare + l * states + j];
-              ar += vr[j] * right[k * statesSquare + l * states + j];
-            }
-
-            x1px2 = al * ar;
-
-            for(j = 0; j < states; j++)
-              v[j] += x1px2 * extEV[states * l + j];
-
-          }
-        }
-
-        v = &(x3[span * i]);
-        scale = 1;
-        for(l = 0; scale && (l < span); l++)
-          scale = ((PLL_ABS(v[l]) <  PLL_MINLIKELIHOOD));
-
-        if(scale)
-        {  
-          for(l = 0; l < span; l++)
-            v[l] *= PLL_TWOTOTHE256;
-          
-          if(!fastScaling)
-            ex3[i] += 1;
-          else
-            addScale += wgt[i];           
-        }
-      }
-      break;
-    default:
-      assert(0);
-  }
-
-  /* as above, increment the global counter that counts scaling multiplications by the scaling multiplications 
-     carried out for computing the likelihood array at node p */
-
-  if(fastScaling)
-    *scalerIncrement = addScale;
-}
-
-
-/* Candidate for deletion */
-/*
-static void newviewGTRCAT( int tipCase,  double *EV,  int *cptr,
-                           double *x1_start,  double *x2_start,  double *x3_start,  double *tipVector,
-                           int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                           int n,  double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling)
-{
-  double
-    *le,
-    *ri,
-    *x1, *x2, *x3;
-  double
-    ump_x1, ump_x2, x1px2[4];
-  int i, j, k, scale, addScale = 0;
-
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:
-      {
-        for (i = 0; i < n; i++)
-          {
-            x1 = &(tipVector[4 * tipX1[i]]);
-            x2 = &(tipVector[4 * tipX2[i]]);
-            x3 = &x3_start[4 * i];
-
-            le =  &left[cptr[i] * 16];
-            ri =  &right[cptr[i] * 16];
-
-            for(j = 0; j < 4; j++)
-              {
-                ump_x1 = 0.0;
-                ump_x2 = 0.0;
-                for(k = 0; k < 4; k++)
-                  {
-                    ump_x1 += x1[k] * le[j * 4 + k];
-                    ump_x2 += x2[k] * ri[j * 4 + k];
-                  }
-                x1px2[j] = ump_x1 * ump_x2;
-              }
-
-            for(j = 0; j < 4; j++)
-              x3[j] = 0.0;
-
-            for(j = 0; j < 4; j++)
-              for(k = 0; k < 4; k++)
-                x3[k] += x1px2[j] * EV[j * 4 + k];          
-          }
-      }
-      break;
-    case PLL_TIP_INNER:
-      {
-        for (i = 0; i < n; i++)
-          {
-            x1 = &(tipVector[4 * tipX1[i]]);
-            x2 = &x2_start[4 * i];
-            x3 = &x3_start[4 * i];
-
-            le =  &left[cptr[i] * 16];
-            ri =  &right[cptr[i] * 16];
-
-            for(j = 0; j < 4; j++)
-              {
-                ump_x1 = 0.0;
-                ump_x2 = 0.0;
-                for(k = 0; k < 4; k++)
-                  {
-                    ump_x1 += x1[k] * le[j * 4 + k];
-                    ump_x2 += x2[k] * ri[j * 4 + k];
-                  }
-                x1px2[j] = ump_x1 * ump_x2;
-              }
-
-            for(j = 0; j < 4; j++)
-              x3[j] = 0.0;
-
-            for(j = 0; j < 4; j++)
-              for(k = 0; k < 4; k++)
-                x3[k] +=  x1px2[j] *  EV[4 * j + k];       
-
-            scale = 1;
-            for(j = 0; j < 4 && scale; j++)
-              scale = (x3[j] < PLL_MINLIKELIHOOD && x3[j] > PLL_MINUSMINLIKELIHOOD);               
-                    
-            if(scale)
-              {             
-                for(j = 0; j < 4; j++)
-                  x3[j] *= PLL_TWOTOTHE256;
-                
-                if(useFastScaling)
-                  addScale += wgt[i];
-                else
-                  ex3[i]  += 1;         
-              }      
-          }
-      }
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-        {
-          x1 = &x1_start[4 * i];
-          x2 = &x2_start[4 * i];
-          x3 = &x3_start[4 * i];
-
-          le = &left[cptr[i] * 16];
-          ri = &right[cptr[i] * 16];
-
-          for(j = 0; j < 4; j++)
-            {
-              ump_x1 = 0.0;
-              ump_x2 = 0.0;
-              for(k = 0; k < 4; k++)
-                {
-                  ump_x1 += x1[k] * le[j * 4 + k];
-                  ump_x2 += x2[k] * ri[j * 4 + k];
-                }
-              x1px2[j] = ump_x1 * ump_x2;
-            }
-
-          for(j = 0; j < 4; j++)
-            x3[j] = 0.0;
-
-          for(j = 0; j < 4; j++)
-            for(k = 0; k < 4; k++)
-              x3[k] +=  x1px2[j] *  EV[4 * j + k];
-        
-          scale = 1;
-          for(j = 0; j < 4 && scale; j++)
-            scale = (x3[j] < PLL_MINLIKELIHOOD && x3[j] > PLL_MINUSMINLIKELIHOOD);
-
-          if(scale)
-            {               
-              for(j = 0; j < 4; j++)
-                x3[j] *= PLL_TWOTOTHE256;
-              
-              if(useFastScaling)
-                addScale += wgt[i];
-              else
-                ex3[i]  += 1;           
-            }     
-        }
-      break;
-    default:
-      assert(0);
-    }
-
-  if(useFastScaling)
-    *scalerIncrement = addScale;
-
-}
-*/
-#if 0
-static void newviewGTRGAMMA_BINARY(int tipCase,
-                                   double *x1_start, double *x2_start, double *x3_start,
-                                   double *EV, double *tipVector,
-                                   int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                   const int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling
-                                   )
-{
-  double
-    *x1, *x2, *x3;
-  double
-    ump_x1,
-    ump_x2,
-    x1px2[4];
-  int i, j, k, l, scale, addScale = 0;
-
-
-  /* C-OPT figure out if we are at an inner node who has two tips/leaves
-     as descendants TIP_TIP, a tip and another inner node as descendant
-     TIP_INNER, or two inner nodes as descendants INNER_INNER */
-
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:
-      {
-        for (i = 0; i < n; i++)
-          {
-            x1 = &(tipVector[2 * tipX1[i]]);
-            x2 = &(tipVector[2 * tipX2[i]]);
-            x3 = &x3_start[i * 8];
-
-            for(j = 0; j < 8; j++)
-              x3[j] = 0.0;
-
-            for (j = 0; j < 4; j++)
-              {
-                for (k = 0; k < 2; k++)
-                  {
-                    ump_x1 = 0.0;
-                    ump_x2 = 0.0;
-
-                    for (l=0; l < 2; l++)
-                      {
-                        ump_x1 += x1[l] * left[ j*4 + k*2 + l];
-                        ump_x2 += x2[l] * right[j*4 + k*2 + l];
-                      }
-
-                    x1px2[k] = ump_x1 * ump_x2;
-                  }
-
-                for(k = 0; k < 2; k++)
-                  for (l = 0; l < 2; l++)
-                    x3[j * 2 + l] +=  x1px2[k] * EV[2 * k + l];
-
-              }    
-          }
-      }
-      break;
-    case PLL_TIP_INNER:
-      {
-         for (i = 0; i < n; i++)
-           {
-             x1 = &(tipVector[2 * tipX1[i]]);
-             x2 = &x2_start[i * 8];
-             x3 = &x3_start[i * 8];
-
-             for(j = 0; j < 8; j++)
-               x3[j] = 0.0;
-
-             for (j = 0; j < 4; j++)
-               {
-                 for (k = 0; k < 2; k++)
-                   {
-                     ump_x1 = 0.0;
-                     ump_x2 = 0.0;
-
-                     for (l=0; l < 2; l++)
-                       {
-                         ump_x1 += x1[l] * left[ j*4 + k*2 + l];
-                         ump_x2 += x2[j*2 + l] * right[j*4 + k*2 + l];
-                       }
-
-                     x1px2[k] = ump_x1 * ump_x2;
-                   }
-
-                 for(k = 0; k < 2; k++)
-                   for (l = 0; l < 2; l++)
-                     x3[j * 2 + l] +=  x1px2[k] * EV[2 * k + l];
-
-               }            
-
-             scale = 1;
-             for(l = 0; scale && (l < 8); l++)
-               scale = (PLL_ABS(x3[l]) <  PLL_MINLIKELIHOOD);
-
-             if(scale)
-               {
-                 for (l=0; l < 8; l++)
-                   x3[l] *= PLL_TWOTOTHE256;
-                 
-                 if(useFastScaling)
-                   addScale += wgt[i];
-                 else
-                   ex3[i]  += 1;               
-               }
-
-           }
-      }
-      break;
-    case PLL_INNER_INNER:
-
-      /* C-OPT here we don't do any pre-computations
-         This should be the most compute intensive loop of the three
-         cases here. If we have one or two tips as descendants
-         we can take a couple of shortcuts */
-
-
-     for (i = 0; i < n; i++)
-       {
-         x1 = &x1_start[i * 8];
-         x2 = &x2_start[i * 8];
-         x3 = &x3_start[i * 8];
-
-         for(j = 0; j < 8; j++)
-           x3[j] = 0.0;
-
-         for (j = 0; j < 4; j++)
-           {
-             for (k = 0; k < 2; k++)
-               {
-                 ump_x1 = 0.0;
-                 ump_x2 = 0.0;
-
-                 for (l=0; l < 2; l++)
-                   {
-                     ump_x1 += x1[j*2 + l] * left[ j*4 + k*2 + l];
-                     ump_x2 += x2[j*2 + l] * right[j*4 + k*2 + l];
-                   }
-
-                 x1px2[k] = ump_x1 * ump_x2;
-               }
-
-             for(k = 0; k < 2; k++)
-               for (l = 0; l < 2; l++)
-                 x3[j * 2 + l] +=  x1px2[k] * EV[2 * k + l];
-
-           }
-         
-         scale = 1;
-         for(l = 0; scale && (l < 8); l++)
-           scale = (PLL_ABS(x3[l]) <  PLL_MINLIKELIHOOD);
-
-
-         if(scale)
-           {
-             for (l=0; l<8; l++)
-               x3[l] *= PLL_TWOTOTHE256;
-
-             if(useFastScaling)
-               addScale += wgt[i];
-             else
-               ex3[i]  += 1;      
-           }
-       }
-     break;
-
-    default:
-      assert(0);
-    }
-
-  if(useFastScaling)
-    *scalerIncrement = addScale;
-
-}
-
-static void newviewGTRCAT_BINARY( int tipCase,  double *EV,  int *cptr,
-				  double *x1_start,  double *x2_start,  double *x3_start,  double *tipVector,
-				  int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-				  int n,  double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling)
-{
-  double
-    *le,
-    *ri,
-    *x1, *x2, *x3;
-  double
-    ump_x1, ump_x2, x1px2[2];
-  int i, j, k, scale, addScale = 0;
-
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:
-      {
-	for (i = 0; i < n; i++)
-	  {
-	    x1 = &(tipVector[2 * tipX1[i]]);
-	    x2 = &(tipVector[2 * tipX2[i]]);
-	    x3 = &x3_start[2 * i];	    
-
-	    le =  &left[cptr[i] * 4];
-	    ri =  &right[cptr[i] * 4];
-
-	    for(j = 0; j < 2; j++)
-	      {
-		ump_x1 = 0.0;
-		ump_x2 = 0.0;
-		for(k = 0; k < 2; k++)
-		  {
-		    ump_x1 += x1[k] * le[j * 2 + k];
-		    ump_x2 += x2[k] * ri[j * 2 + k];
-		  }
-		x1px2[j] = ump_x1 * ump_x2;
-	      }
-
-	    for(j = 0; j < 2; j++)
-	      x3[j] = 0.0;
-
-	    for(j = 0; j < 2; j++)
-	      for(k = 0; k < 2; k++)
-		x3[k] += x1px2[j] * EV[j * 2 + k];	   
-	  }
-      }
-      break;
-    case PLL_TIP_INNER:
-      {
-	for (i = 0; i < n; i++)
-	  {
-	    x1 = &(tipVector[2 * tipX1[i]]);
-	    x2 = &x2_start[2 * i];
-	    x3 = &x3_start[2 * i];
-	    
-	    le =  &left[cptr[i] * 4];
-	    ri =  &right[cptr[i] * 4];
-
-	    for(j = 0; j < 2; j++)
-	      {
-		ump_x1 = 0.0;
-		ump_x2 = 0.0;
-		for(k = 0; k < 2; k++)
-		  {
-		    ump_x1 += x1[k] * le[j * 2 + k];
-		    ump_x2 += x2[k] * ri[j * 2 + k];
-		  }
-		x1px2[j] = ump_x1 * ump_x2;
-	      }
-
-	    for(j = 0; j < 2; j++)
-	      x3[j] = 0.0;
-
-	    for(j = 0; j < 2; j++)
-	      for(k = 0; k < 2; k++)
-		x3[k] +=  x1px2[j] *  EV[2 * j + k];	   
-
-	    scale = 1;
-	    for(j = 0; j < 2 && scale; j++)
-	      scale = (x3[j] < PLL_MINLIKELIHOOD && x3[j] > PLL_MINUSMINLIKELIHOOD);
-
-	    if(scale)
-	      {
-		for(j = 0; j < 2; j++)
-		  x3[j] *= PLL_TWOTOTHE256;
-
-		if(useFastScaling)
-		  addScale += wgt[i];
-		else
-		  ex3[i]  += 1;	       
-	      }
-	  }
-      }
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-	{
-	  x1 = &x1_start[2 * i];
-	  x2 = &x2_start[2 * i];
-	  x3 = &x3_start[2 * i];
-
-	  le = &left[cptr[i] * 4];
-	  ri = &right[cptr[i] * 4];
-
-	  for(j = 0; j < 2; j++)
-	    {
-	      ump_x1 = 0.0;
-	      ump_x2 = 0.0;
-	      for(k = 0; k < 2; k++)
-		{
-		  ump_x1 += x1[k] * le[j * 2 + k];
-		  ump_x2 += x2[k] * ri[j * 2 + k];
-		}
-	      x1px2[j] = ump_x1 * ump_x2;
-	    }
-
-	  for(j = 0; j < 2; j++)
-	    x3[j] = 0.0;
-
-	  for(j = 0; j < 2; j++)
-	    for(k = 0; k < 2; k++)
-	      x3[k] +=  x1px2[j] *  EV[2 * j + k];	  
-
-	  scale = 1;
-	  for(j = 0; j < 2 && scale; j++)
-	    scale = (x3[j] < PLL_MINLIKELIHOOD && x3[j] > PLL_MINUSMINLIKELIHOOD);
-
-	  if(scale)
-	    {
-	      for(j = 0; j < 2; j++)
-		x3[j] *= PLL_TWOTOTHE256;
-
-	      if(useFastScaling)
-		addScale += wgt[i];
-	      else
-		ex3[i]  += 1;	   
-	    }
-	}
-      break;
-    default:
-      assert(0);
-    }
-
-  if(useFastScaling)
-    *scalerIncrement = addScale;
-
-}
-#endif    /* end if 0 */
-#endif
-
-#if (defined(__AVX) || defined(__SSE3))
-static void newviewGTRCAT_BINARY( int tipCase,  double *EV,  int *cptr,
-                                  double *x1_start,  double *x2_start,  double *x3_start,  double *tipVector,
-                                  int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                  int n,  double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling)
-{
-  double
-    *le,
-    *ri,
-    *x1, *x2, *x3;
-  int i, l, scale, addScale = 0;
-
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:
-      {
-        for(i = 0; i < n; i++)
-          {
-            x1 = &(tipVector[2 * tipX1[i]]);
-            x2 = &(tipVector[2 * tipX2[i]]);
-            x3 = &x3_start[2 * i];         
-
-            le =  &left[cptr[i] * 4];
-            ri =  &right[cptr[i] * 4];
-
-            _mm_store_pd(x3, _mm_setzero_pd());     
-                     
-            for(l = 0; l < 2; l++)
-              {                                                                                                                          
-                __m128d al = _mm_mul_pd(_mm_load_pd(x1), _mm_load_pd(&le[l * 2]));
-                __m128d ar = _mm_mul_pd(_mm_load_pd(x2), _mm_load_pd(&ri[l * 2]));
-                
-                al = _mm_hadd_pd(al, al);
-                ar = _mm_hadd_pd(ar, ar);
-                
-                al = _mm_mul_pd(al, ar);
-                
-                __m128d vv  = _mm_load_pd(x3);
-                __m128d EVV = _mm_load_pd(&EV[2 * l]);
-                
-                vv = _mm_add_pd(vv, _mm_mul_pd(al, EVV));
-                
-                _mm_store_pd(x3, vv);                                                     
-              }            
-          }
-      }
-      break;
-    case PLL_TIP_INNER:
-      {
-        for (i = 0; i < n; i++)
-          {
-            x1 = &(tipVector[2 * tipX1[i]]);
-            x2 = &x2_start[2 * i];
-            x3 = &x3_start[2 * i];
-            
-            le =  &left[cptr[i] * 4];
-            ri =  &right[cptr[i] * 4];
-
-            _mm_store_pd(x3, _mm_setzero_pd());     
-                     
-            for(l = 0; l < 2; l++)
-              {                                                                                                                          
-                __m128d al = _mm_mul_pd(_mm_load_pd(x1), _mm_load_pd(&le[l * 2]));
-                __m128d ar = _mm_mul_pd(_mm_load_pd(x2), _mm_load_pd(&ri[l * 2]));
-                
-                al = _mm_hadd_pd(al, al);
-                ar = _mm_hadd_pd(ar, ar);
-                
-                al = _mm_mul_pd(al, ar);
-                
-                __m128d vv  = _mm_load_pd(x3);
-                __m128d EVV = _mm_load_pd(&EV[2 * l]);
-                
-                vv = _mm_add_pd(vv, _mm_mul_pd(al, EVV));
-                
-                _mm_store_pd(x3, vv);                                                     
-              }  
-            
-            __m128d minlikelihood_sse = _mm_set1_pd(PLL_MINLIKELIHOOD);
-         
-            scale = 1;
-            
-            __m128d v1 = _mm_and_pd(_mm_load_pd(x3), absMask.m);
-            v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-            if(_mm_movemask_pd( v1 ) != 3)
-              scale = 0;                         
-            
-            if(scale)
-              {
-                __m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-                
-                __m128d ex3v = _mm_load_pd(x3);           
-                _mm_store_pd(x3, _mm_mul_pd(ex3v,twoto));                                                 
-                
-                if(useFastScaling)
-                  addScale += wgt[i];
-                else
-                  ex3[i]  += 1;   
-              }                    
-          }
-      }
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-        {
-          x1 = &x1_start[2 * i];
-          x2 = &x2_start[2 * i];
-          x3 = &x3_start[2 * i];
-
-          le = &left[cptr[i] * 4];
-          ri = &right[cptr[i] * 4];
-
-          _mm_store_pd(x3, _mm_setzero_pd());       
-          
-          for(l = 0; l < 2; l++)
-            {                                                                                                                            
-              __m128d al = _mm_mul_pd(_mm_load_pd(x1), _mm_load_pd(&le[l * 2]));
-              __m128d ar = _mm_mul_pd(_mm_load_pd(x2), _mm_load_pd(&ri[l * 2]));
-              
-              al = _mm_hadd_pd(al, al);
-              ar = _mm_hadd_pd(ar, ar);
-              
-              al = _mm_mul_pd(al, ar);
-              
-              __m128d vv  = _mm_load_pd(x3);
-              __m128d EVV = _mm_load_pd(&EV[2 * l]);
-              
-              vv = _mm_add_pd(vv, _mm_mul_pd(al, EVV));
-              
-              _mm_store_pd(x3, vv);                                                       
-            }                             
-
-          __m128d minlikelihood_sse = _mm_set1_pd(PLL_MINLIKELIHOOD);
-         
-          scale = 1;
-                  
-          __m128d v1 = _mm_and_pd(_mm_load_pd(x3), absMask.m);
-          v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-          if(_mm_movemask_pd( v1 ) != 3)
-            scale = 0;                   
-         
-          if(scale)
-            {
-              __m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-                    
-              __m128d ex3v = _mm_load_pd(x3);             
-              _mm_store_pd(x3, _mm_mul_pd(ex3v,twoto));                                           
-             
-              if(useFastScaling)
-                addScale += wgt[i];
-              else
-                ex3[i]  += 1;     
-           }             
-        }
-      break;
-    default:
-      assert(0);
-    }
-
-  if(useFastScaling)
-    *scalerIncrement = addScale;
-
-}
-
-static void newviewGTRGAMMA_BINARY(int tipCase,
-				   double *x1_start, double *x2_start, double *x3_start,
-				   double *EV, double *tipVector,
-				   int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-				   const int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling
-				   )
-{
-  double
-    *x1, *x2, *x3;
- 
-  int i, k, l, scale, addScale = 0; 
-
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:
-      for (i = 0; i < n; i++)
-       {
-	 x1  = &(tipVector[2 * tipX1[i]]);
-	 x2  = &(tipVector[2 * tipX2[i]]);
-	 
-	 for(k = 0; k < 4; k++)
-	   {	     	     	    
-	     x3 = &(x3_start[8 * i + 2 * k]);	     
-	    	         
-	     _mm_store_pd(x3, _mm_setzero_pd());	    
-	    	     
-	     for(l = 0; l < 2; l++)
-	       {		 		 						   		  		 		 
-		 __m128d al = _mm_mul_pd(_mm_load_pd(x1), _mm_load_pd(&left[k * 4 + l * 2]));
-		 __m128d ar = _mm_mul_pd(_mm_load_pd(x2), _mm_load_pd(&right[k * 4 + l * 2]));
-		 		       
-		 al = _mm_hadd_pd(al, al);
-		 ar = _mm_hadd_pd(ar, ar);
-		   
-		 al = _mm_mul_pd(al, ar);
-		   
-		 __m128d vv  = _mm_load_pd(x3);
-		 __m128d EVV = _mm_load_pd(&EV[2 * l]);
-		 
-		 vv = _mm_add_pd(vv, _mm_mul_pd(al, EVV));
-		 
-		 _mm_store_pd(x3, vv);		     	  		   		  
-	       }	     	    
-	   }
-       }
-      break;
-    case PLL_TIP_INNER:
-      for (i = 0; i < n; i++)
-       {
-	 x1  = &(tipVector[2 * tipX1[i]]);
-	 
-	 for(k = 0; k < 4; k++)
-	   {	     	     
-	     x2 = &(x2_start[8 * i + 2 * k]);
-	     x3 = &(x3_start[8 * i + 2 * k]);	     
-	    	         
-	     _mm_store_pd(x3, _mm_setzero_pd());	    
-	    	     
-	     for(l = 0; l < 2; l++)
-	       {		 		 						   		  		 		 
-		 __m128d al = _mm_mul_pd(_mm_load_pd(x1), _mm_load_pd(&left[k * 4 + l * 2]));
-		 __m128d ar = _mm_mul_pd(_mm_load_pd(x2), _mm_load_pd(&right[k * 4 + l * 2]));
-		 		       
-		 al = _mm_hadd_pd(al, al);
-		 ar = _mm_hadd_pd(ar, ar);
-		   
-		 al = _mm_mul_pd(al, ar);
-		   
-		 __m128d vv  = _mm_load_pd(x3);
-		 __m128d EVV = _mm_load_pd(&EV[2 * l]);
-		 
-		 vv = _mm_add_pd(vv, _mm_mul_pd(al, EVV));
-		 
-		 _mm_store_pd(x3, vv);		     	  		   		  
-	       }	     	    
-	   }
-	
-	 x3 = &(x3_start[8 * i]);
-	 __m128d minlikelihood_sse = _mm_set1_pd( PLL_MINLIKELIHOOD );
-	 
-	 scale = 1;
-	 for(l = 0; scale && (l < 8); l += 2)
-	   {
-	     __m128d vv = _mm_load_pd(&x3[l]);
-	     __m128d v1 = _mm_and_pd(vv, absMask.m);
-	     v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-	     if(_mm_movemask_pd( v1 ) != 3)
-	       scale = 0;
-	   }	    	         
-	 
-	 if(scale)
-	   {
-	     __m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-	     
-	     for(l = 0; l < 8; l+=2)
-	       {
-		 __m128d ex3v = _mm_load_pd(&x3[l]);		  
-		 _mm_store_pd(&x3[l], _mm_mul_pd(ex3v,twoto));	
-	       }		   		  
-	     
-	     if(useFastScaling)
-	       addScale += wgt[i];
-	     else
-	       ex3[i]  += 1;	  
-	   }	 
-       }      
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-       {	 
-	 for(k = 0; k < 4; k++)
-	   {	     
-	     x1 = &(x1_start[8 * i + 2 * k]);
-	     x2 = &(x2_start[8 * i + 2 * k]);
-	     x3 = &(x3_start[8 * i + 2 * k]);	     
-	    	         
-	     _mm_store_pd(x3, _mm_setzero_pd());	    
-	    	     
-	     for(l = 0; l < 2; l++)
-	       {		 		 						   		  		 		 
-		 __m128d al = _mm_mul_pd(_mm_load_pd(x1), _mm_load_pd(&left[k * 4 + l * 2]));
-		 __m128d ar = _mm_mul_pd(_mm_load_pd(x2), _mm_load_pd(&right[k * 4 + l * 2]));
-		 		       
-		 al = _mm_hadd_pd(al, al);
-		 ar = _mm_hadd_pd(ar, ar);
-		   
-		 al = _mm_mul_pd(al, ar);
-		   
-		 __m128d vv  = _mm_load_pd(x3);
-		 __m128d EVV = _mm_load_pd(&EV[2 * l]);
-		 
-		 vv = _mm_add_pd(vv, _mm_mul_pd(al, EVV));
-		 
-		 _mm_store_pd(x3, vv);		     	  		   		  
-	       }	     	    
-	   }
-	
-	 x3 = &(x3_start[8 * i]);
-	 __m128d minlikelihood_sse = _mm_set1_pd( PLL_MINLIKELIHOOD );
-	 
-	 scale = 1;
-	 for(l = 0; scale && (l < 8); l += 2)
-	   {
-	     __m128d vv = _mm_load_pd(&x3[l]);
-	     __m128d v1 = _mm_and_pd(vv, absMask.m);
-	     v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-	     if(_mm_movemask_pd( v1 ) != 3)
-	       scale = 0;
-	   }	    	         
-	 
-	 if(scale)
-	   {
-	     __m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-	     
-	     for(l = 0; l < 8; l+=2)
-	       {
-		 __m128d ex3v = _mm_load_pd(&x3[l]);		  
-		 _mm_store_pd(&x3[l], _mm_mul_pd(ex3v,twoto));	
-	       }		   		  
-	     
-	     if(useFastScaling)
-	       addScale += wgt[i];
-	     else
-	       ex3[i]  += 1;	  
-	   }	 
-       }
-      break;
-
-    default:
-      assert(0);
-    }
-
-  if(useFastScaling)
-    *scalerIncrement = addScale;
-
-}
-
-
-#endif
-
-
-
-
-/* The function below computes partial traversals only down to the point/node in the tree where the 
-   conditional likelihhod vector summarizing a subtree is already oriented in the correct direction */
-
-
-/** @brief Compute a partial or full traversal descriptor for a subtree of the topology
-
-   Unless the \a partialTraversal is set to \b PLL_TRUE, compute a partial traversal descriptor down 
-   to the point/node in the tree where the conditional likelihood vector representing a subtree is
-   already oriented in the correct direction. The elements of the traversal descriptor are stored in
-   \a ti and a \a counter keeps track of the number of elements.
-
-   @param p
-     Root of the  subtree for which we want to compute the traversal descriptor. The two descendents are \a p->next->back and \a p->next->next->back
-
-   @param ti
-i    Traversal descriptor element structure
-
-   @param counter
-     Number of elements in the traversal descriptor. Updated when an element is added
-
-   @param maxTips
-     Number of tips in the tree structure
-
-   @param numBranches
-     Number of branches
-   
-   @param partialTraversal
-     If \b PLL_TRUE, a partial traversal descriptor is computed, otherwise a full
-
-   @param rvec
-     Parameter concerning ancestral state recomputation. Please document
-
-   @param useRecom
-     If \b PLL_TRUE, then ancestral state recomputation is enabled.
-   
-   @todo Fill in the ancestral recomputation parameter information 
- */
-static void computeTraversalInfo(nodeptr p, traversalInfo *ti, int *counter, int maxTips, int numBranches, pllBoolean partialTraversal, recompVectors *rvec, pllBoolean useRecom)
-{
-  /* if it's a tip we don't do anything */
-
-  if(isTip(p->number, maxTips))
-    return;
-
-  {
-    int 
-      i;
-
-    /* recom default values */
-    int slot = -1,
-        unpin1 = -1, 
-        unpin2 = -1;
-    /* get the left and right descendants */
-
-    nodeptr 
-      q = p->next->back,
-        r = p->next->next->back;   
-
-    /* if the left and right children are tips there is not that much to do */
-    if(isTip(r->number, maxTips) && isTip(q->number, maxTips))
-    {
-      /* fix the orientation of p->x */
-
-      if (! p->x)
-        getxnode(p);    
-      
-      assert(p->x);
-
-      /* add the current node triplet p,q,r to the traversal descriptor */
-      ti[*counter].tipCase = PLL_TIP_TIP;
-      ti[*counter].pNumber = p->number;
-      ti[*counter].qNumber = q->number;
-      ti[*counter].rNumber = r->number;
-
-
-      /* copy branches to traversal descriptor */
-      for(i = 0; i < numBranches; i++)
-      {     
-        ti[*counter].qz[i] = q->z[i];
-        ti[*counter].rz[i] = r->z[i];
-      }
-
-      /* recom - add the slot to the traversal descriptor */
-      if(useRecom)
-      {
-        getxVector(rvec, p->number, &slot, maxTips);
-        ti[*counter].slot_p = slot;
-        ti[*counter].slot_q = -1;
-        ti[*counter].slot_r = -1;
-      }
-
-      /* increment length counter */
-
-      *counter = *counter + 1;
-    }
-    else
-    {
-      /* if either r or q are tips, flip them to make sure that the tip data is stored 
-         for q */
-      if(isTip(r->number, maxTips) || isTip(q->number, maxTips))
-      {     
-        if(isTip(r->number, maxTips))
-        {
-          nodeptr 
-            tmp = r;
-          r = q;
-          q = tmp;
-        }
-
-
-        /* if the orientation of the liklihood vector at r is not correct we need to re-compute it 
-           and descend into its subtree to figure out if there are more vrctors in there to re-compute and 
-           re-orient */
-
-        if(needsRecomp(useRecom, rvec, r, maxTips) || !partialTraversal) 
-          computeTraversalInfo(r, ti, counter, maxTips, numBranches, partialTraversal, rvec, useRecom);
-        else
-          {
-            if(useRecom)
-              /* the node is available,  now make sure it will not be unpinned until it is read */
-              protectNode(rvec, r->number, maxTips);
-          }
-        /* Now that r is oriented, we can safely set the orientation of p */
-        if(! p->x)
-          getxnode(p);   
-
-        /* make sure that everything is consistent now */
-
-        assert(p->x && r->x);
-
-        /* store data for p, q, r in the traversal descriptor */
-
-        ti[*counter].tipCase = PLL_TIP_INNER;
-        ti[*counter].pNumber = p->number;
-        ti[*counter].qNumber = q->number;
-        ti[*counter].rNumber = r->number;
-
-        for(i = 0; i < numBranches; i++)
-        {       
-          ti[*counter].qz[i] = q->z[i];
-          ti[*counter].rz[i] = r->z[i];
-        }
-
-        if(useRecom)
-        {
-          getxVector(rvec, r->number, &slot, maxTips);
-          ti[*counter].slot_r = slot;
-
-          getxVector(rvec, p->number, &slot, maxTips);
-          ti[*counter].slot_p = slot;
-
-          ti[*counter].slot_q = -1;
-
-          unpin2 = r->number; /* when PLL_TIP_INNER finishes, the INNER input vector r can be unpinned*/
-        }
-
-        *counter = *counter + 1;
-      }
-      else
-      {
-        /* same as above, only now q and r are inner nodes. Hence if they are not 
-           oriented correctly they will need to be recomputed and we need to descend into the 
-           respective subtrees to check if everything is consistent in there, potentially expanding 
-           the traversal descriptor */
-        if(( useRecom && (!partialTraversal) ) || 
-            ( useRecom && needsRecomp(useRecom, rvec, q, maxTips) && needsRecomp(useRecom, rvec, r, maxTips) ))
-        {
-          /* PLL_INNER_INNER and recomputation implies that the order we descend q and r matters, 
-           * if we are in a partial traversal, this is only relevant if both require recomputation
-           * see TODOFER add ref. */
-
-          int q_stlen = rvec->stlen[q->number - maxTips - 1],
-              r_stlen = rvec->stlen[q->number - maxTips - 1];
-          assert(q_stlen >= 2 && q_stlen <= maxTips - 1);
-          assert(r_stlen >= 2 && r_stlen <= maxTips - 1);
-
-          if(q_stlen > r_stlen)
-          {
-            computeTraversalInfo(q, ti, counter, maxTips, numBranches, partialTraversal, rvec, useRecom);
-            computeTraversalInfo(r, ti, counter, maxTips, numBranches, partialTraversal, rvec, useRecom);
-          }
-          else
-          {
-            computeTraversalInfo(r, ti, counter, maxTips, numBranches, partialTraversal, rvec, useRecom);
-            computeTraversalInfo(q, ti, counter, maxTips, numBranches, partialTraversal, rvec, useRecom);
-          }
-        }
-        else
-        {
-          /* Now the order does not matter */
-          /* If we are in a recomputation and partial, only either q or r will be descended */
-
-          if(!partialTraversal || needsRecomp(useRecom, rvec, q, maxTips))
-            computeTraversalInfo(q, ti, counter, maxTips, numBranches, partialTraversal, rvec, useRecom);
-          else
-          {
-            if(useRecom)
-              /* the node is available,  now make sure it will not be unpinned until it is read */
-              protectNode(rvec, q->number, maxTips);
-          }
-
-          if(!partialTraversal || needsRecomp(useRecom, rvec, r, maxTips))
-            computeTraversalInfo(r, ti, counter, maxTips, numBranches, partialTraversal, rvec, useRecom);
-          else
-          {
-            if(useRecom)
-              protectNode(rvec, r->number, maxTips);
-          }
-        }
-
-
-        if(! p->x)
-          getxnode(p);
-
-        /* check that the vector orientations are consistent now */
-
-        assert(p->x && r->x && q->x);
-
-        ti[*counter].tipCase = PLL_INNER_INNER;
-        ti[*counter].pNumber = p->number;
-        ti[*counter].qNumber = q->number;
-        ti[*counter].rNumber = r->number;
-
-        if(useRecom)
-        {
-          /* We check that the strategy cannot re-use slots */
-          getxVector(rvec, q->number, &slot, maxTips);
-          ti[*counter].slot_q = slot;
-
-          getxVector(rvec, r->number, &slot, maxTips);
-          ti[*counter].slot_r = slot;
-          assert(slot != ti[*counter].slot_q);
-
-          getxVector(rvec, p->number, &slot, maxTips);
-          ti[*counter].slot_p = slot;
-          assert(slot != ti[*counter].slot_q);
-          assert(slot != ti[*counter].slot_r);
-
-          /* And at these point both input INNER can be marked as unpinned */
-          unpin2 = r->number;
-          unpin1 = q->number;
-        }
-
-        for(i = 0; i < numBranches; i++)
-        {       
-          ti[*counter].qz[i] = q->z[i];
-          ti[*counter].rz[i] = r->z[i];
-        }
-
-        *counter = *counter + 1;
-      }
-    }
-    if(useRecom)
-    {
-      /* Mark the nodes as unpinnable(will be unpinned while executing the replacement strategy only if required)*/
-      unpinNode(rvec, unpin1, maxTips);
-      unpinNode(rvec, unpin2, maxTips);
-    }
-  }
-}
-
-/* below are the optimized unrolled, and vectorized versions of the above generi cfunctions 
-   for computing the conditional likelihood at p given child nodes q and r. The actual implementation is located at the end/bottom of this 
-   file.
-   */
-/* now this is the function that just iterates over the length of the traversal descriptor and 
-   just computes the conditional likelihhod arrays in the order given by the descriptor.
-   So in a sense, this function has no clue that there is any tree-like structure 
-   in the traversal descriptor, it just operates on an array of structs of given length */ 
-
-
-/** @brief Compute the conditional likelihood for each entry (node) of the traversal descriptor
-
-    Computes the conditional likelihood vectors for each entry (node) in the already computed
-    traversal descriptor, starting from the \a startIndex entry.
-     
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param startIndex
-      From which node to start computing the conditional likelihood vectors in the traversal
-      descriptor
-     
-    @note This function just iterates over the length of the traversal descriptor and 
-      computes the conditional likelihhod arrays in the order given by the descriptor.
-      So in a sense, this function has no clue that there is any tree-like structure 
-      in the traversal descriptor, it just operates on an array of structs of given length.
- */
-void pllNewviewIterative (pllInstance *tr, partitionList *pr, int startIndex)
-{
-  traversalInfo 
-    *ti   = tr->td[0].ti;
-
-  int 
-    i, 
-    model;
-
-  int 
-    p_slot = -1, 
-    q_slot = -1, 
-    r_slot = -1;
-
-#ifdef _DEBUG_RECOMPUTATION
-  /* recom */
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-#else
-  countTraversal(tr);
-#endif
-  /* E recom */
-#endif
-
-  /* loop over traversal descriptor length. Note that on average we only re-compute the conditionals on 3 -4 
-     nodes in RAxML */
-
-  for(i = startIndex; i < tr->td[0].count; i++)
-  {
-
-    traversalInfo 
-      *tInfo = &ti[i];
-    
-    /* Note that the slots refer to different things if recomputation is applied */
-    if(tr->useRecom)
-      {
-        /* a slot has been assigned while computing the traversal descriptor  */
-        p_slot = tInfo->slot_p;
-        q_slot = tInfo->slot_q;
-        r_slot = tInfo->slot_r;
-      }
-    else
-      {
-        /* a fixed slot is always given for each inner node, we only need an offset to get the right index */
-        p_slot = tInfo->pNumber - tr->mxtips - 1;
-        q_slot = tInfo->qNumber - tr->mxtips - 1;
-        r_slot = tInfo->rNumber - tr->mxtips - 1;
-      }
-
-    /* now loop over all partitions for nodes p, q, and r of the current traversal vector entry */
-
-    for(model = 0; model < pr->numberOfPartitions; model++)
-    {
-      /* number of sites in this partition */
-      size_t            
-        width  = (size_t)pr->partitionData[model]->width;
-
-      /* this conditional statement is exactly identical to what we do in pllEvaluateIterative */
-
-      if(tr->td[0].executeModel[model] && width > 0)
-      {       
-        double
-          *x1_start = (double*)NULL,
-          *x2_start = (double*)NULL,
-          *x3_start = pr->partitionData[model]->xVector[p_slot],
-          *left     = (double*)NULL,
-          *right    = (double*)NULL,            
-#if (defined(__SSE3) || defined(__AVX))
-          *x1_gapColumn = (double*)NULL,
-          *x2_gapColumn = (double*)NULL,
-          *x3_gapColumn = (double*)NULL,
-#endif
-          *rateCategories = (double*)NULL,
-          *x1_ascColumn = NULL,
-          *x2_ascColumn = NULL,
-          *x3_ascColumn = NULL;
-
-        int
-          categories,
-          scalerIncrement = 0,
-
-          /* integer wieght vector with pattern compression weights */
-
-          *wgt = pr->partitionData[model]->wgt;
-
-        /* pointers for per-site scaling array at node p */
-        
-        int      
-          *ex3     = NULL,
-          *ex3_asc = NULL;
-
-        /* select fastScaling or per-site scaling of conidtional likelihood entries */
-
-        pllBoolean
-          fastScaling = tr->fastScaling;
-
-#if (defined(__SSE3) || defined(__AVX))
-        unsigned int
-          *x1_gap = (unsigned int*)NULL,
-          *x2_gap = (unsigned int*)NULL,
-          *x3_gap = (unsigned int*)NULL;
-#endif
-
-        unsigned char
-          *tipX1 = (unsigned char *)NULL,
-          *tipX2 = (unsigned char *)NULL;
-
-        double 
-          qz, 
-          rz;        
-
-        size_t
-#if (defined(__SSE3) || defined(__AVX))
-          gapOffset = 0,
-#endif
-          rateHet = discreteRateCategories(tr->rateHetModel),
-          ascWidth = (size_t)pr->partitionData[model]->states,
-
-          /* get the number of states in the data stored in partition model */
-          
-          states = (size_t)pr->partitionData[model]->states,
-          
-          /* get the length of the current likelihood array stored at node p. This is 
-             important mainly for the SEV-based memory saving option described in here:
-             
-             F. Izquierdo-Carrasco, S.A. Smith, A. Stamatakis: "Algorithms, Data Structures, and Numerics for Likelihood-based Phylogenetic Inference of Huge Trees".
-             
-             So pr->partitionData[model]->xSpaceVector[i] provides the length of the allocated conditional array of partition model
-             and node i 
-          */
-          
-          availableLength = pr->partitionData[model]->xSpaceVector[p_slot],
-          requiredLength = 0;        
-        
-        /* figure out what kind of rate heterogeneity approach we are using */
-
-        if(tr->rateHetModel == PLL_CAT)
-          {              
-            rateCategories = pr->partitionData[model]->perSiteRates;
-            categories = pr->partitionData[model]->numberOfCategories;
-          }
-        else
-          {                              
-            rateCategories = pr->partitionData[model]->gammaRates;
-            categories = 4;
-          }
-
-        /* memory saving stuff, not important right now, but if you are interested ask Fernando */
-
-#if (defined(__SSE3) || defined(__AVX))
-        if(tr->saveMemory)
-          {
-            size_t
-              j,
-              setBits = 0;                
-            
-            gapOffset = states * (size_t)getUndetermined(pr->partitionData[model]->dataType);
-            
-            x1_gap = &(pr->partitionData[model]->gapVector[tInfo->qNumber * pr->partitionData[model]->gapVectorLength]);
-            x2_gap = &(pr->partitionData[model]->gapVector[tInfo->rNumber * pr->partitionData[model]->gapVectorLength]);
-            x3_gap = &(pr->partitionData[model]->gapVector[tInfo->pNumber * pr->partitionData[model]->gapVectorLength]);
-            
-            for(j = 0; j < (size_t)pr->partitionData[model]->gapVectorLength; j++)
-              {              
-                x3_gap[j] = x1_gap[j] & x2_gap[j];
-                setBits += (size_t)(bitcount_32_bit(x3_gap[j])); 
-              }
-            
-            requiredLength = (width - setBits)  * rateHet * states * sizeof(double);            
-          }
-        else
-#endif
-          {
-            /* if we are not trying to save memory the space required to store an inner likelihood array 
-               is the number of sites in the partition times the number of states of the data type in the partition 
-               times the number of discrete GAMMA rates (1 for CAT essentially) times 8 bytes */
-            requiredLength  =  virtual_width( width ) * rateHet * states * sizeof(double);
-            
-            //                   printf( "req: %d %d %d %d\n", requiredLength, width, virtual_width(width), model );
-          }
-        
-        /* Initially, even when not using memory saving no space is allocated for inner likelihood arrats hence 
-           availableLength will be zero at the very first time we traverse the tree.
-           Hence we need to allocate something here */
-
-        if(requiredLength != availableLength)
-          {               
-            /* if there is a vector of incorrect length assigned here i.e., x3 != NULL we must free 
-               it first */
-            if(x3_start)
-              rax_free(x3_start);
-            
-            /* allocate memory: note that here we use a byte-boundary aligned malloc, because we need the vectors
-               to be aligned at 16 BYTE (SSE3) or 32 BYTE (AVX) boundaries! */
-            
-            rax_posix_memalign ((void **)&x3_start, PLL_BYTE_ALIGNMENT, requiredLength);              
-            
-            /* update the data structures for consistent bookkeeping */
-            pr->partitionData[model]->xVector[p_slot]      = x3_start;
-            pr->partitionData[model]->xSpaceVector[p_slot] = requiredLength;
-          }
-        
-
-        /* 
-           if we are not using fast scaling, we need to assign memory for storing 
-           integer vectors at each inner node that are as long as the sites of the 
-           partition. IMPORTANT: while this looks as if this might be a memory saving trick 
-           it is not. The ex3 vectors will be allocated once during the very first tree 
-           traversal and then never again because they will always have the required length!
-        */
-
-        if(!fastScaling)
-          {
-            size_t
-              availableExpLength = pr->partitionData[model]->expSpaceVector[p_slot],
-              requiredExpLength  = width * sizeof(int);
-            
-            ex3 = pr->partitionData[model]->expVector[p_slot];
-            
-            if(requiredExpLength != availableExpLength)
-              {
-                if(ex3)
-                  rax_free(ex3);
-                
-                rax_posix_memalign ((void **)&ex3, PLL_BYTE_ALIGNMENT, requiredExpLength);               
-                
-                pr->partitionData[model]->expVector[p_slot] = ex3;
-                
-                pr->partitionData[model]->expSpaceVector[p_slot] = requiredExpLength;
-              }
-          }
-
-        /* now just set the pointers for data accesses in the newview() implementations above to the corresponding values 
-           according to the tip case */
-        
-        switch(tInfo->tipCase)
-          {
-          case PLL_TIP_TIP:           
-            tipX1    = pr->partitionData[model]->yVector[tInfo->qNumber];
-            tipX2    = pr->partitionData[model]->yVector[tInfo->rNumber];
-
-#if (defined(__SSE3) || defined(__AVX))
-            if(tr->saveMemory)
-              {
-                x1_gapColumn   = &(pr->partitionData[model]->tipVector[gapOffset]);
-                x2_gapColumn   = &(pr->partitionData[model]->tipVector[gapOffset]);
-                x3_gapColumn   = &(pr->partitionData[model]->gapColumn[(tInfo->pNumber - tr->mxtips - 1) * states * rateHet]);
-              }
-#endif            
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-            if(pr->partitionData[model]->ascBias && tr->threadID == 0)
-#else
-            if(pr->partitionData[model]->ascBias)
-#endif
-             {
-              size_t
-                k;
-              
-              x3_ascColumn = &pr->partitionData[model]->ascVector[(tInfo->pNumber - tr->mxtips - 1) * pr->partitionData[model]->ascOffset];
-              ex3_asc      = &pr->partitionData[model]->ascExpVector[(tInfo->pNumber - tr->mxtips - 1) * ascWidth];
-
-              for(k = 0; k < ascWidth; k++)
-                ex3_asc[k] = 0;               
-             }
-            /* if we do per-site log likelihood scaling, and both child nodes are tips,
-               just initialize the vector with zeros, i.e., no scaling events */
-
-            if(!fastScaling)
-              {
-                size_t
-                  k;                                 
-
-                for(k = 0; k < width; k++)
-                  ex3[k] = 0;
-              }
-            break;
-          case PLL_TIP_INNER:                
-            tipX1    =  pr->partitionData[model]->yVector[tInfo->qNumber];
-            x2_start = pr->partitionData[model]->xVector[r_slot];
-            assert(r_slot != p_slot);
-            
-#if (defined(__SSE3) || defined(__AVX))
-            if(tr->saveMemory)
-              { 
-                x1_gapColumn   = &(pr->partitionData[model]->tipVector[gapOffset]);
-                x2_gapColumn   = &pr->partitionData[model]->gapColumn[(tInfo->rNumber - tr->mxtips - 1) * states * rateHet];
-                x3_gapColumn   = &pr->partitionData[model]->gapColumn[(tInfo->pNumber - tr->mxtips - 1) * states * rateHet];
-              }
-#endif
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-            if(pr->partitionData[model]->ascBias && tr->threadID == 0)
-#else
-              if(pr->partitionData[model]->ascBias)
-#endif      
-              {   
-                size_t
-                  k;
-
-                int 
-                  *ex2_asc;
-                
-                x2_ascColumn = &pr->partitionData[model]->ascVector[(tInfo->rNumber - tr->mxtips - 1) * pr->partitionData[model]->ascOffset];
-                x3_ascColumn = &pr->partitionData[model]->ascVector[(tInfo->pNumber - tr->mxtips - 1) * pr->partitionData[model]->ascOffset];
-                
-                ex2_asc = &pr->partitionData[model]->ascExpVector[(tInfo->rNumber - tr->mxtips - 1) * ascWidth];
-                ex3_asc = &pr->partitionData[model]->ascExpVector[(tInfo->pNumber - tr->mxtips - 1) * ascWidth];
-
-                for(k = 0; k < ascWidth; k++)
-                  ex3_asc[k] = ex2_asc[k];
-              }
-            
-            /* if one child node is not a tip, just copy the values from there, coudl also be done with memcpy of course 
-               the elements of ex3[] will then potentially be further incremented in the actual newview() if scaling events 
-               take place */
-
-            if(!fastScaling)
-              {
-                size_t 
-                  k;
-                int
-                  *ex2 = pr->partitionData[model]->expVector[r_slot];                
-                      
-                for(k = 0; k < width; k++)
-                  ex3[k] = ex2[k];
-              }
-            break;
-          case PLL_INNER_INNER:                              
-            x1_start       = pr->partitionData[model]->xVector[q_slot];
-            x2_start       = pr->partitionData[model]->xVector[r_slot];
-            assert(r_slot != p_slot);
-            assert(q_slot != p_slot);
-            assert(q_slot != r_slot);
-            
-#if (defined(__SSE3) || defined(__AVX))
-            if(tr->saveMemory)
-              {
-                x1_gapColumn   = &pr->partitionData[model]->gapColumn[(tInfo->qNumber - tr->mxtips - 1) * states * rateHet];
-                x2_gapColumn   = &pr->partitionData[model]->gapColumn[(tInfo->rNumber - tr->mxtips - 1) * states * rateHet];
-                x3_gapColumn   = &pr->partitionData[model]->gapColumn[(tInfo->pNumber - tr->mxtips - 1) * states * rateHet];
-              }
-#endif
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-              if(pr->partitionData[model]->ascBias && tr->threadID == 0)
-#else
-              if(pr->partitionData[model]->ascBias)
-#endif          
-               {                
-                 size_t
-                   k;
-
-                 int 
-                   *ex1_asc,
-                   *ex2_asc;
-                 
-                 x1_ascColumn = &pr->partitionData[model]->ascVector[(tInfo->qNumber - tr->mxtips - 1) * pr->partitionData[model]->ascOffset];
-                 x2_ascColumn = &pr->partitionData[model]->ascVector[(tInfo->rNumber - tr->mxtips - 1) * pr->partitionData[model]->ascOffset];
-                 x3_ascColumn = &pr->partitionData[model]->ascVector[(tInfo->pNumber - tr->mxtips - 1) * pr->partitionData[model]->ascOffset];
-                 
-                 ex1_asc = &pr->partitionData[model]->ascExpVector[(tInfo->qNumber - tr->mxtips - 1) * ascWidth];
-                 ex2_asc = &pr->partitionData[model]->ascExpVector[(tInfo->rNumber - tr->mxtips - 1) * ascWidth];
-                 ex3_asc = &pr->partitionData[model]->ascExpVector[(tInfo->pNumber - tr->mxtips - 1) * ascWidth];
-
-                 for(k = 0; k < ascWidth; k++)
-                   ex3_asc[k] = ex1_asc[k] + ex2_asc[k];
-               }
-            /* both child nodes are inner nodes, thus the initial value of the scaling vector 
-               ex3 is the sum of the scaling values of the left and right child node */
-
-            if(!fastScaling)
-              {
-                size_t
-                  k;
-                      
-                int            
-                  *ex1      = pr->partitionData[model]->expVector[q_slot],
-                  *ex2      = pr->partitionData[model]->expVector[r_slot];                    
-                      
-                  for(k = 0; k < width; k++)
-                    ex3[k] = ex1[k] + ex2[k];
-              }
-            break;
-          default:
-            assert(0);
-          }
-
-        /* set the pointers to the left and right P matrices to the pre-allocated memory space for storing them */
-
-        left  = pr->partitionData[model]->left;
-        right = pr->partitionData[model]->right;
-
-        /* if we use per-partition branch length optimization 
-           get the branch length of partition model and take the log otherwise 
-           use the joint branch length among all partitions that is always stored 
-           at index [0] */
-
-        if(pr->perGeneBranchLengths)
-        {
-          qz = tInfo->qz[model];                                    
-          rz = tInfo->rz[model];                  
-        }
-        else
-        {
-          qz = tInfo->qz[0];
-          rz = tInfo->rz[0];
-        }
-
-        qz = (qz > PLL_ZMIN) ? log(qz) : log(PLL_ZMIN);                        
-        rz = (rz > PLL_ZMIN) ? log(rz) : log(PLL_ZMIN);                       
-
-        /* compute the left and right P matrices */
-
-        if(pr->partitionData[model]->dataType == PLL_AA_DATA &&
-        		(pr->partitionData[model]->protModels == PLL_LG4M || pr->partitionData[model]->protModels == PLL_LG4X))
-                makeP_FlexLG4(qz, rz, pr->partitionData[model]->gammaRates,
-                              pr->partitionData[model]->EI_LG4,
-                              pr->partitionData[model]->EIGN_LG4,
-                              4, left, right, 20);
-        else
-        makeP(qz, rz, rateCategories,   pr->partitionData[model]->EI,
-              pr->partitionData[model]->EIGN, categories,
-              left, right, tr->saveMemory, tr->maxCategories, states);
-
-
-#if (!defined(__SSE3) && !defined(__AVX) && !defined(__MIC_NATIVE))
-        assert(!tr->saveMemory);
-
-        /* figure out if we need to compute the CAT or GAMMA model of rate heterogeneity */
-
-        if(tr->rateHetModel == PLL_CAT)
-         {
-
-           newviewCAT_FLEX(tInfo->tipCase,  pr->partitionData[model]->EV, pr->partitionData[model]->rateCategory,
-                           x1_start, x2_start, x3_start, pr->partitionData[model]->tipVector,
-                           ex3, tipX1, tipX2,
-                           width, left, right, wgt, &scalerIncrement, fastScaling, states);
-         }
-        else 
-         {
-            newviewGAMMA_FLEX(tInfo->tipCase,
-                 x1_start, x2_start, x3_start, pr->partitionData[model]->EV, pr->partitionData[model]->tipVector,
-                 0, tipX1, tipX2,
-                 width, left, right, wgt, &scalerIncrement, fastScaling, states, getUndetermined(pr->partitionData[model]->dataType) + 1);
-         }
-#else
-        /* dedicated highly optimized functions. Analogously to the functions in evaluateGeneric() 
-           we also siwtch over the state number */
-
-        switch(states)
-        {               
-        case 2:
-          assert (!tr->saveMemory);
-          if (tr->rateHetModel == PLL_CAT)
-           {
-             newviewGTRCAT_BINARY(tInfo->tipCase,  pr->partitionData[model]->EV, pr->partitionData[model]->rateCategory,
-                                  x1_start, x2_start, x3_start, pr->partitionData[model]->tipVector,
-                                  ex3, tipX1, tipX2,
-                                  width, left, right, wgt, &scalerIncrement, fastScaling);
-           }
-          else
-           {
-             newviewGTRGAMMA_BINARY(tInfo->tipCase,
-                                    x1_start, x2_start, x3_start, pr->partitionData[model]->EV, pr->partitionData[model]->tipVector,
-                                    ex3, tipX1, tipX2,
-                                    width, left, right, wgt, &scalerIncrement, fastScaling);                  
-           }
-          break;
-
-        case 4: /* DNA */
-#ifdef __MIC_NATIVE
-
-              /* CAT & memory saving are not supported on MIC */
-
-              assert(!tr->saveMemory);
-              assert(tr->rateHetModel == PLL_GAMMA);
-
-              newviewGTRGAMMA_MIC(tInfo->tipCase,
-                                x1_start, x2_start, x3_start, pr->partitionData[model]->EV, pr->partitionData[model]->tipVector,
-                                ex3, tipX1, tipX2,
-                                width, left, right, wgt, &scalerIncrement, fastScaling);
-#else
-          if(tr->rateHetModel == PLL_CAT)
-            {                                
-              
-              if(tr->saveMemory)
-#ifdef __AVX
-                newviewGTRCAT_AVX_GAPPED_SAVE(tInfo->tipCase,  pr->partitionData[model]->EV, pr->partitionData[model]->rateCategory,
-                                              x1_start, x2_start, x3_start, pr->partitionData[model]->tipVector,
-                                              ex3, tipX1, tipX2,
-                                              width, left, right, wgt, &scalerIncrement, fastScaling, x1_gap, x2_gap, x3_gap,
-                                              x1_gapColumn, x2_gapColumn, x3_gapColumn, tr->maxCategories);
-#else
-                newviewGTRCAT_SAVE(tInfo->tipCase,  pr->partitionData[model]->EV, pr->partitionData[model]->rateCategory,
-                                   x1_start, x2_start, x3_start, pr->partitionData[model]->tipVector,
-                                   ex3, tipX1, tipX2,
-                                   width, left, right, wgt, &scalerIncrement, fastScaling, x1_gap, x2_gap, x3_gap,
-                                   x1_gapColumn, x2_gapColumn, x3_gapColumn, tr->maxCategories);
-#endif
-              else
-#ifdef __AVX
-                newviewGTRCAT_AVX(tInfo->tipCase,  pr->partitionData[model]->EV, pr->partitionData[model]->rateCategory,
-                                  x1_start, x2_start, x3_start, pr->partitionData[model]->tipVector,
-                                  ex3, tipX1, tipX2,
-                                  width, left, right, wgt, &scalerIncrement, fastScaling);
-#else
-              newviewGTRCAT(tInfo->tipCase,  pr->partitionData[model]->EV, pr->partitionData[model]->rateCategory,
-                            x1_start, x2_start, x3_start, pr->partitionData[model]->tipVector,
-                            ex3, tipX1, tipX2,
-                            width, left, right, wgt, &scalerIncrement, fastScaling);
-#endif
-            }
-          else
-            {
-              
-              if(tr->saveMemory)
-#ifdef __AVX
-                newviewGTRGAMMA_AVX_GAPPED_SAVE(tInfo->tipCase,
-                                                x1_start, x2_start, x3_start, pr->partitionData[model]->EV, pr->partitionData[model]->tipVector,
-                                                ex3, tipX1, tipX2,
-                                                width, left, right, wgt, &scalerIncrement, fastScaling,
-                                                x1_gap, x2_gap, x3_gap, 
-                                                x1_gapColumn, x2_gapColumn, x3_gapColumn);
-
-#else
-              newviewGTRGAMMA_GAPPED_SAVE(tInfo->tipCase,
-                                          x1_start, x2_start, x3_start, pr->partitionData[model]->EV, pr->partitionData[model]->tipVector,
-                                          ex3, tipX1, tipX2,
-                                          width, left, right, wgt, &scalerIncrement, fastScaling,
-                                          x1_gap, x2_gap, x3_gap, 
-                                          x1_gapColumn, x2_gapColumn, x3_gapColumn);
-#endif
-              else
-#ifdef __AVX
-                newviewGTRGAMMA_AVX(tInfo->tipCase,
-                                    x1_start, x2_start, x3_start, pr->partitionData[model]->EV, pr->partitionData[model]->tipVector,
-                                    ex3, tipX1, tipX2,
-                                    width, left, right, wgt, &scalerIncrement, fastScaling);
-#else
-              newviewGTRGAMMA(tInfo->tipCase,
-                              x1_start, x2_start, x3_start, pr->partitionData[model]->EV, pr->partitionData[model]->tipVector,
-                              ex3,tipX1, tipX2,
-                              width, left, right, wgt, &scalerIncrement, fastScaling);
-#endif
-            }
-#endif
-
-            break;                  
-          case 20: /* proteins */
-
-#ifdef __MIC_NATIVE
-
-                        /* CAT & memory saving are not supported on MIC */
-
-                        assert(!tr->saveMemory);
-                        assert(tr->rateHetModel == PLL_GAMMA);
-
-                        if(pr->partitionData[model]->protModels == PLL_LG4M || pr->partitionData[model]->protModels == PLL_LG4X)
-                        {
-                                  newviewGTRGAMMAPROT_LG4_MIC(tInfo->tipCase,
-                            x1_start, x2_start, x3_start, pr->partitionData[model]->EV_LG4, pr->partitionData[model]->tipVector_LG4,
-                            tipX1, tipX2,
-                            width, left, right, wgt, &scalerIncrement);
-                        }
-                        else
-                        {
-                                  newviewGTRGAMMAPROT_MIC(tInfo->tipCase,
-                                                x1_start, x2_start, x3_start, pr->partitionData[model]->EV, pr->partitionData[model]->tipVector,
-                                                ex3, tipX1, tipX2,
-                                                width, left, right, wgt, &scalerIncrement, fastScaling);
-                        }
-#else
-
-            if(tr->rateHetModel == PLL_CAT)
-            {
-
-
-              if(tr->saveMemory)
-#ifdef __AVX
-                newviewGTRCATPROT_AVX_GAPPED_SAVE(tInfo->tipCase,  pr->partitionData[model]->EV, pr->partitionData[model]->rateCategory,
-                                                  x1_start, x2_start, x3_start, pr->partitionData[model]->tipVector,
-                                                  ex3, tipX1, tipX2, width, left, right, wgt, &scalerIncrement, fastScaling, 
-                                                  x1_gap, x2_gap, x3_gap,
-                                                  x1_gapColumn, x2_gapColumn, x3_gapColumn, tr->maxCategories);
-#else
-              newviewGTRCATPROT_SAVE(tInfo->tipCase,  pr->partitionData[model]->EV, pr->partitionData[model]->rateCategory,
-                                     x1_start, x2_start, x3_start, pr->partitionData[model]->tipVector,
-                                     ex3, tipX1, tipX2, width, left, right, wgt, &scalerIncrement, fastScaling, x1_gap, x2_gap, x3_gap,
-                                     x1_gapColumn, x2_gapColumn, x3_gapColumn, tr->maxCategories);
-#endif
-              else
-#ifdef __AVX
-                newviewGTRCATPROT_AVX(tInfo->tipCase,  pr->partitionData[model]->EV, pr->partitionData[model]->rateCategory,
-                                      x1_start, x2_start, x3_start, pr->partitionData[model]->tipVector,
-                                      ex3, tipX1, tipX2, width, left, right, wgt, &scalerIncrement, fastScaling);
-#else
-              newviewGTRCATPROT(tInfo->tipCase,  pr->partitionData[model]->EV, pr->partitionData[model]->rateCategory,
-                                x1_start, x2_start, x3_start, pr->partitionData[model]->tipVector,
-                                ex3, tipX1, tipX2, width, left, right, wgt, &scalerIncrement, fastScaling);                     
-#endif
-            }
-            else
-            {
-
-              
-
-              if(tr->saveMemory)
-#ifdef __AVX
-                newviewGTRGAMMAPROT_AVX_GAPPED_SAVE(tInfo->tipCase,
-                                                    x1_start, x2_start, x3_start,
-                                                    pr->partitionData[model]->EV,
-                                                    pr->partitionData[model]->tipVector,
-                                                    ex3, tipX1, tipX2,
-                                                    width, left, right, wgt, &scalerIncrement, fastScaling,
-                                                    x1_gap, x2_gap, x3_gap,
-                                                    x1_gapColumn, x2_gapColumn, x3_gapColumn);
-#else
-                newviewGTRGAMMAPROT_GAPPED_SAVE(tInfo->tipCase,
-                                                x1_start, x2_start, x3_start,
-                                                pr->partitionData[model]->EV,
-                                                pr->partitionData[model]->tipVector,
-                                                ex3, tipX1, tipX2,
-                                                width, left, right, wgt, &scalerIncrement, fastScaling,
-                                                x1_gap, x2_gap, x3_gap,
-                                                x1_gapColumn, x2_gapColumn, x3_gapColumn);
-#endif
-            
-             else
-                        {
-                          if(pr->partitionData[model]->protModels == PLL_LG4M || pr->partitionData[model]->protModels == PLL_LG4X)
-                            {
-#ifdef __AVX 
-                              newviewGTRGAMMAPROT_AVX_LG4(tInfo->tipCase,
-                                                          x1_start, x2_start, x3_start,
-                                                          pr->partitionData[model]->EV_LG4,
-                                                          pr->partitionData[model]->tipVector_LG4,
-                                                          (int*)NULL, tipX1, tipX2,
-                                                          width, left, right, wgt, &scalerIncrement, PLL_TRUE);
-#else
-                              newviewGTRGAMMAPROT_LG4(tInfo->tipCase,
-                                                      x1_start, x2_start, x3_start,
-                                                      pr->partitionData[model]->EV_LG4,
-                                                      pr->partitionData[model]->tipVector_LG4,
-                                                      (int*)NULL, tipX1, tipX2,
-                                                      width, left, right, 
-                                                      wgt, &scalerIncrement, PLL_TRUE);
-#endif                      
-                            }
-              else
-#ifdef __AVX
-                newviewGTRGAMMAPROT_AVX(tInfo->tipCase,
-                                        x1_start, x2_start, x3_start, pr->partitionData[model]->EV, pr->partitionData[model]->tipVector,
-                                        ex3, tipX1, tipX2,
-                                        width, left, right, wgt, &scalerIncrement, fastScaling);
-#else
-              newviewGTRGAMMAPROT(tInfo->tipCase,
-                                  x1_start, x2_start, x3_start, pr->partitionData[model]->EV, pr->partitionData[model]->tipVector,
-                                  ex3, tipX1, tipX2,
-                                  width, left, right, wgt, &scalerIncrement, fastScaling);
-#endif                 
-            }   
-        }
-#endif
-            
-            break;      
-          default:
-            assert(0);
-        }
-#endif
-
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-       if(pr->partitionData[model]->ascBias && tr->threadID == 0)
-#else
-       if(pr->partitionData[model]->ascBias)
-#endif         
-         {
-           switch(tr->rateHetModel)
-             {
-             case PLL_CAT:
-               {
-                 double 
-                   rates = 1.0;
-                 
-                 //need to re-calculate transition probabilities assuming a rate of 1.0 
-                 makeP(qz, rz, 
-                       &rates,  
-                       pr->partitionData[model]->EI,
-                       pr->partitionData[model]->EIGN,
-                       1, 
-                       left, right, 
-                       tr->saveMemory,
-                       tr->maxCategories,
-                       states);
-                 
-                 newviewAscCat(tInfo->tipCase,
-                               x1_ascColumn, x2_ascColumn, x3_ascColumn,
-                               pr->partitionData[model]->EV,
-                               pr->partitionData[model]->tipVector,
-                               ex3_asc,
-                               states, left, right, states);
-               }
-               break;
-             case PLL_GAMMA:
-               newviewAscGamma(tInfo->tipCase,
-                               x1_ascColumn, x2_ascColumn, x3_ascColumn,
-                               pr->partitionData[model]->EV,
-                               pr->partitionData[model]->tipVector,
-                               ex3_asc,
-                               states, left, right, states);                        
-               break;
-             default:
-               assert(0);
-             }
-         }
-
-
-        /* important step, here we essentiallt recursively compute the number of scaling multiplications 
-           at node p: it's the sum of the number of scaling multiplications already conducted 
-           for computing nodes q and r plus the scaling multiplications done at node p */
-
-        if(fastScaling)
-          {
-            pr->partitionData[model]->globalScaler[tInfo->pNumber] =
-              pr->partitionData[model]->globalScaler[tInfo->qNumber] +
-              pr->partitionData[model]->globalScaler[tInfo->rNumber] +
-              (unsigned int)scalerIncrement;
-            
-            /* check that we are not getting an integer overflow ! */
-
-            assert(pr->partitionData[model]->globalScaler[tInfo->pNumber] < INT_MAX);
-          }
-        
-        /* show the output vector */
-      } 
-    }
-  }
-}
-
-/** @brief Compute the traversal descriptor of the subtree rooted at \a p.
-    
-    Computes the traversal descriptor of the subtree with root \a p. By traversal
-    descriptory we essentially mean a preorder traversal of the unrooted topology
-    by rooting it at a node \a p.
-    If \a partialTraversal is set to \b PLL_TRUE then subtrees which are oriented
-    correctly (i.e. if root node \a r of a subtree has \a r->x == 1) are not
-    included in the traversal descriptor.
-
-    @param tr
-      PLL instance
-
-    @param p
-      Node assumed to be the root
-
-    @param partialTraversal
-      If set to \b PLL_TRUE, then a partial traversal descriptor is computed.
-
-    @param numBranches
-      Number of branches (either per-partition branch or joint branch estimate)
-*/
-void computeTraversal(pllInstance *tr, nodeptr p, pllBoolean partialTraversal, int numBranches)
-{
-  /* Only if we apply recomputations we need the additional step of updating the subtree lengths */
-  if(tr->useRecom)
-  {
-    int traversal_counter = 0;
-    if(partialTraversal)
-      computeTraversalInfoStlen(p, tr->mxtips, tr->rvec, &traversal_counter);
-    else
-      computeFullTraversalInfoStlen(p, tr->mxtips, tr->rvec);
-  }
-  computeTraversalInfo(p, &(tr->td[0].ti[0]), &(tr->td[0].count), tr->mxtips, numBranches, partialTraversal, tr->rvec, tr->useRecom);
-}
-
-
-/** @brief Computes the conditional likelihood vectors of all nodes in the subtree rooted at \a p
-  
-    Compute the conditional likelihood vectors of all nodes in the subtree rooted at node \a p. The
-    conditional likelihood vector at node \a p is recomputed regardless of whether the orientation (i.e. \a p->x) 
-    is correct or not, and, recursuvely, the likelihoods at each node in the subtree as needed and if necessary.
-    In case \a masked is set to \b PLL_TRUE, the computation will not take place at partitions for which the 
-    conditional likelihood has converged (for example as a reult of previous branch length optimization).
-    
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param p
-      Root of the subtree for which we want to recompute the conditional likelihood vectors
-
-    @param masked
-      If set to \b PLL_TRUE, then likelihood vectors of partitions that are converged are
-      not recomputed.
- */
-void pllUpdatePartials (pllInstance *tr, partitionList *pr, nodeptr p, pllBoolean masked)
-{  
-  /* if it's a tip there is nothing to do */
-
-  if(isTip(p->number, tr->mxtips))
-    return;
-
-  /* the first entry of the traversal descriptor is always reserved for evaluate or branch length optimization calls,
-     hence we start filling the array at the second entry with index one. This is not very nice and should be fixed 
-     at some point */
-
-  tr->td[0].count = 0;
-
-  /* compute the traversal descriptor, which will include nodes-that-need-update descending the subtree  p */
-  computeTraversal(tr, p, PLL_TRUE, pr->perGeneBranchLengths?pr->numberOfPartitions : 1);
-
-  /* the traversal descriptor has been recomputed -> not sure if it really always changes, something to 
-     optimize in the future */
-  tr->td[0].traversalHasChanged = PLL_TRUE;
-
-  /* We do a masked newview, i.e., do not execute newvies for each partition, when for example 
-     doing a branch length optimization on the entire tree when branches are estimated on a per partition basis.
-
-     you may imagine that for partition 5 the branch length optimization has already converged whereas 
-     for partition 6 we still need to go over the tree again.
-
-     This is explained in more detail in:
-
-     A. Stamatakis, M. Ott: "Load Balance in the Phylogenetic Likelihood Kernel". Proceedings of ICPP 2009
-
-     The external pllBoolean array tr->partitionConverged[] contains exactly that information and is copied
-     to executeModel and subsequently to the executeMask of the traversal descriptor 
-
-*/
-
-
-  if(masked)
-  {
-    int model;
-
-    for(model = 0; model < pr->numberOfPartitions; model++)
-    {
-      if(tr->partitionConverged[model])
-        pr->partitionData[model]->executeModel = PLL_FALSE;
-      else
-        pr->partitionData[model]->executeModel = PLL_TRUE;
-    }
-  }
-
-  /* if there is something to re-compute */
-
-  if(tr->td[0].count > 0)
-  {
-    /* store execute mask in traversal descriptor */
-
-    storeExecuteMaskInTraversalDescriptor(tr, pr);
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-    /* do the parallel for join for pthreads
-       not that we do not need a reduction operation here, but just a barrier to make 
-       sure that all threads are done with their partition */
-
-    pllMasterBarrier(tr, pr, PLL_THREAD_NEWVIEW);
-#else
-    /* in the sequential case we now simply call pllNewviewIterative() */
-
-    pllNewviewIterative(tr, pr, 0);
-#endif
-
-  }
-
-  /* clean up */
-
-  if(masked)
-  {
-    int model;
-
-    for(model = 0; model < pr->numberOfPartitions; model++)
-      pr->partitionData[model]->executeModel = PLL_TRUE;
-  }
-
-  tr->td[0].traversalHasChanged = PLL_FALSE;
-}
-
-/* function to compute the marginal ancestral probability vector at a node p for CAT/PSR model */
-
-/** @brief Compute the marginal ancestral probability vector for CAT/PSR model
-    
-    Computes the marginal ancestral probability vector for CAT/PSR model, given the conditional likelihood
-    vector \a x3 of some node, and a zero branch length P matrix \a diagptable.
-
-    @param x3
-      Conditional likelihood of the node for which we are computing the ancestral vector
-
-    @param ancestralBuffer
-      Buffer where to store the marginal ancestral probability vector
-
-    @param diagptable
-      A zero branch length P matrix
-
-    @param n
-      Number of sites in the partition to process (in the case of MPI/PTHREADS, the number of sites in the partition assigned to the current thread/process)
-
-    @param numStates
-      Number of states
-
-    @param cptr
-      Array where the rate for each site in the compressed partition alignment is stored
-      
- */
-static void ancestralCat(double *x3, double *ancestralBuffer, double *diagptable, const int n, const int numStates, int *cptr)
-{ 
-  double 
-    *term = (double*)rax_malloc(sizeof(double) * numStates);
-
-  int 
-    i;
-
-  const int
-    statesSquare = numStates * numStates;
-  
-  for(i = 0; i < n; i++)
-    {
-      double 
-        sum = 0.0,
-        *v = &x3[numStates * i],
-        *ancestral = &ancestralBuffer[numStates * i],
-        *d = &diagptable[cptr[i] * statesSquare];            
-
-      int 
-        l,
-        j;
-
-      for(l = 0; l < numStates; l++)
-        {
-          double 
-            ump_x1 = 0.0;
-      
-          for(j = 0; j < numStates; j++)        
-            ump_x1 += v[j] * d[l * numStates + j];
-
-          sum += ump_x1;
-          term[l] = ump_x1;      
-        }
-                
-      for(l = 0; l < numStates; l++)          
-        ancestral[l] = term[l] / sum;   
-    }
-   
-  rax_free(term);
-}
-
-
-/* compute marginal ancestral states for GAMMA models,
-   for the euqation to obtain marginal ancestral states 
-   see Ziheng Yang's book */
-
-/** @brief Compute the marginal ancestral probability vector for GAMMA model
-    
-    Computes the marginal ancestral probability vector for the GAMMA model, given the conditional likelihood
-    vector \a x3 of some node, and a zero branch length P matrix \a diagptable.
-
-    @param x3
-      Conditional likelihood of the node for which we are computing the ancestral vector
-
-    @param ancestralBuffer
-      Buffer where to store the marginal ancestral probability vector
-
-    @param diagptable
-      A zero branch length P matrix
-
-    @param n
-      Number of sites in the partition to process (in the case of MPI/PTHREADS, the number of sites in the partition assigned to the current thread/process)
-
-    @param numStates
-      Number of states
-
-    @param gammaStates
-      Number of GAMMA categories times number of states
-      
- */
-static void ancestralGamma(double *x3, double *ancestralBuffer, double *diagptable, const int n, const int numStates, const int gammaStates)
-{
-  int 
-    i;
-
-  const int
-    statesSquare = numStates * numStates;
-
-  double    
-    *term = (double*)rax_malloc(sizeof(double) * numStates);                  
-  
-  for(i = 0; i < n; i++)
-    {
-      double 
-        sum = 0.0,
-        *_v = &x3[gammaStates * i],
-        *ancestral = &ancestralBuffer[numStates * i];  
-      
-      int
-        k,
-        j,
-        l;
-      
-      for(l = 0; l < numStates; l++)
-        term[l] = 0.0;
-
-      for(k = 0; k < 4; k++)
-        {
-          double 
-            *v =  &(_v[numStates * k]);
-
-          for(l = 0; l < numStates; l++)
-            {
-              double
-                al = 0.0;
-              
-              for(j = 0; j < numStates; j++)        
-                al += v[j] * diagptable[k * statesSquare + l * numStates + j];
-          
-              term[l] += al;
-              sum += al;
-            }
-        }
-  
-      for(l = 0; l < numStates; l++)        
-        ancestral[l] = term[l] / sum;       
-    }
-   
-  rax_free(term);
-}
-
-/* compute dedicated zero branch length P matrix */
-/** @brief Compute a dedicated zero branch length P matrix
-   
-    Computes a P matrix by assuming a branch length of zero. This is used
-    for the marginal ancestral probabilities recomputation.
-
-    @param rptr
-      Array of values for rate categories
-
-    @param EI
-      Inverse eigenvector of Q matrix
-
-    @param EIGN
-      Eigenvalues of Q matrix
-
-    @param numberOfCategories
-      Number of rate categories
-
-    @param left
-      Where to store the resulting P matrix
-
-    @param numStates
-      Number of states
- */
-static void calc_diagp_Ancestral(double *rptr, double *EI,  double *EIGN, int numberOfCategories, double *left, const int numStates)
-{
-  int 
-    i,
-    j,
-    k;
-  
-  const int   
-    statesSquare = numStates * numStates;
-
-  double 
-    z1 = 0.0,
-    lz1[64],
-    d1[64];
-
-  assert(numStates <= 64);
-     
-  for(i = 0; i < numStates; i++)    
-    lz1[i] = EIGN[i] * z1;
-     
-
-  for(i = 0; i < numberOfCategories; i++)
-    {
-      d1[0] = 1.0;
-
-      for(j = 1; j < numStates; j++)    
-        d1[j] = exp(rptr[i] * lz1[j]);
-         
-      for(j = 0; j < numStates; j++)
-        {
-          left[statesSquare * i  + numStates * j] = 1.0;         
-
-          for(k = 1; k < numStates; k++)            
-            left[statesSquare * i + numStates * j + k]  = d1[k] * EI[numStates * j + k];             
-        }
-    }  
-}
-
-/** @brief A very simple iterative function, we only access the conditional likelihood vector at node \a p
- *
- *
- */
-void newviewAncestralIterative(pllInstance *tr, partitionList *pr)
-{
-  traversalInfo 
-    *ti    = tr->td[0].ti,
-    *tInfo = &ti[0];
-
-  int    
-    model,
-    p_slot = -1;
-
-  /* make sure that the traversal descriptor has length 1 */
-
-  assert(tr->td[0].count == 1);
-  assert(!tr->saveMemory);
-
-  /* get the index to the conditional likelihood vector depending on whether recomputation is used or not */
-
-  if(tr->useRecom)    
-    p_slot = tInfo->slot_p;         
-  else    
-    p_slot = tInfo->pNumber - tr->mxtips - 1;         
-
-  /* now loop over all partitions for nodes p of the current traversal vector entry */
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-    {
-      /* number of sites in this partition */
-      size_t            
-        width  = (size_t)pr->partitionData[model]->width;
-
-      /* this conditional statement is exactly identical to what we do in pllEvaluateIterative */
-
-      if(tr->td[0].executeModel[model] && width > 0)
-        {             
-          double         
-            *x3_start = pr->partitionData[model]->xVector[p_slot],
-//          *left     = (double*)NULL,
-//          *right    = (double*)NULL,                 
-            *rateCategories = (double*)NULL,
-            *diagptable = (double*)NULL;
-
-          int
-            categories;
-        
-          size_t                  
-            states = (size_t)pr->partitionData[model]->states,
-            availableLength = pr->partitionData[model]->xSpaceVector[p_slot],
-            requiredLength = 0,
-            rateHet = discreteRateCategories(tr->rateHetModel);   
-
-        /* figure out what kind of rate heterogeneity approach we are using */
-
-          if(tr->rateHetModel == PLL_CAT)
-            {            
-              rateCategories = pr->partitionData[model]->perSiteRates;
-              categories     = pr->partitionData[model]->numberOfCategories;
-            }
-          else
-            {                            
-              rateCategories = pr->partitionData[model]->gammaRates;
-              categories     = 4;
-            }
-          
-          /* allocate some space for a special P matrix with a branch length of 0 into which we mingle 
-             the eignevalues. This will allow us to obtain real probabilites from the internal RAxML 
-             representation */
-
-          rax_posix_memalign ((void **)&diagptable, PLL_BYTE_ALIGNMENT, categories * states * states * sizeof(double));
-          
-          requiredLength  =  virtual_width( width ) * rateHet * states * sizeof(double);
-          
-          /* make sure that this vector had already been allocated. This must be PLL_TRUE since we first invoked a standard newview() on this */
-
-          assert(requiredLength == availableLength);                                     
-
-          /* now compute the special P matrix */
-
-          calc_diagp_Ancestral(rateCategories, pr->partitionData[model]->EI,  pr->partitionData[model]->EIGN, categories, diagptable, states);
-          
-          /* switch over the rate heterogeneity model 
-             and call generic functions that compute the marginal ancestral states and 
-             store them in pr->partitionData[model]->ancestralBuffer
-          */
-
-          if(tr->rateHetModel == PLL_CAT)       
-            ancestralCat(x3_start, pr->partitionData[model]->ancestralBuffer, diagptable, width, states, pr->partitionData[model]->rateCategory);
-          else
-            ancestralGamma(x3_start, pr->partitionData[model]->ancestralBuffer, diagptable, width, states, categories * states);
-          
-          rax_free(diagptable);                   
-        }       
-    }
-}
-
-/** @brief Computes the Conditional Likelihood Vector (CLV) for each rate of some internal node.
-
-    Computes the conditional likelihood vectors of node \a p for each rate, given the partition
-    index \a partition. The result is placed in the array \a outProbs, which must be pre-allocated
-    by the caller, and must be of size \a sites * categories * states * sizeof(double). The structure of
-    the resulting array is the following:
-    For each site we have \a categories * states cells of size \a double. Those cells are divided per rate
-    category, i.e. first \a states cells are the probabilities for the states of rate 1 (ordered alphabetically
-    by base name), next \a states cells for rate 2 and so on.
-
-    @param tr   PLL instance
-    @param pr     List of partitions
-    @param p Node for which we want to compute the CLV
-    @param partition   Index of the partition for which to compute the CLV
-    @param outProbs    Pre-allocated array where the result will be stored
-
-    @returns Returns \b PLL_TRUE on success, \b PLL_FALSE on failure
-
-    @todo       Fix to work with CAT
-*/
-int pllGetCLV (pllInstance * tr, partitionList * pr, nodeptr p, int partition, double * outProbs)
-{
-  size_t i, j, k, l;
-
-  if (tr->rateHetModel != PLL_GAMMA) return (PLL_FALSE);
-
-  int p_slot;
-  size_t states = (size_t)pr->partitionData[partition]->states;
-
-  double
-    *term = (double*)rax_malloc(sizeof(double) * states);
-
-  if(tr->useRecom)
-    p_slot = p->number;
-  else
-    p_slot = p->number - tr->mxtips - 1;
-
-  size_t width = (size_t) pr->partitionData[partition]->width;
-  double * diagptable = NULL;
-  double * rateCategories = pr->partitionData[partition]->gammaRates;
-  double * x3 = pr->partitionData[partition]->xVector[p_slot];
-  size_t categories = 4;
-
-  rax_posix_memalign ((void **)&diagptable, PLL_BYTE_ALIGNMENT, categories * states * states * sizeof (double));
-
-  calc_diagp_Ancestral(rateCategories, pr->partitionData[partition]->EI,  pr->partitionData[partition]->EIGN, categories, diagptable, states);
-
-  for (i = 0; i < width; ++ i)
-   {
-     double
-       *_v  = &x3[categories * states * i],
-       *clv = &outProbs[categories * states * i];
-
-     for (k = 0; k < categories; ++ k)
-      {
-        double
-         sum = 0.0,
-         *v = &(_v[states * k]);
-
-        for (l = 0; l < states; ++ l)
-         {
-           double al = 0.0;
-
-           for (j = 0; j < states; ++ j)
-             al += v[j] * diagptable[k * states * states + l * states + j];
-
-           term[l] = al;
-           sum += al;
-         }
-        for (l = 0; l < states; ++ l)
-           clv[k * categories + l] = term[l] / sum;
-      }
-   }
-
-  rax_free(term);
-  rax_free(diagptable);
-
-  return (PLL_TRUE);
-}
-
-/* this is very similar to pllUpdatePartials, except that it also computes the marginal ancestral probabilities 
-   at node p. To simplify the code I am re-using newview() here to first get the likelihood vector p->x at p
-   and then I deploy newviewAncestralIterative(tr); that should always only have a traversal descriptor of lenth 1,
-   to do some mathematical transformations that are required to obtain the marginal ancestral probabilities from 
-   the conditional likelihood array at p.
-
-   Note that the marginal ancestral probability vector summarizes the subtree rooted at p! */
-
-/** @brief Computes the conditional likelihood vectors of all nodes in the subtree rooted at \a p
-    and the marginal ancestral probabilities at node \a p
-
-    Compute the conditional likelihood vectors of all nodes in the subtree rooted at node \a p. The
-    conditional likelihood vector at node \a p is recomputed regardless of whether the orientation (i.e. \a p->x)
-    is correct or not, and, recursively, the likelihoods at each node in the subtree as needed and if necessary.
-    In addition, the marginal ancestral probability vector for node \a p is also computed.
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param p
-      Node for which we want to compute the ancestral vector
-
-    @note
-      This function is not implemented with the saveMemory technique. 
-*/
-void pllUpdatePartialsAncestral(pllInstance *tr, partitionList *pr, nodeptr p)
-{
-  /* error check, we don't need to compute anything for tips */
-  
-  if(isTip(p->number, tr->mxtips))
-    {
-      printf("You are trying to compute the ancestral states on a tip node of the tree\n");
-      assert(0);
-    }
-
-  /* doesn't work yet in conjunction with SEVs, can be implemented though at some point 
-     if urgently required */
-
-  if(tr->saveMemory)
-    {
-      printf("ancestral state implementation will not work with memory saving (SEVs) enabled!\n");
-      printf("returning without computing anything ... \n");
-      return;
-    }
-
-  /* first call pllUpdatePartials() with mask set to PLL_FALSE such that the likelihood vector is there ! */
-
-  pllUpdatePartials(tr, pr, p, PLL_FALSE);
-
-  /* now let's compute the ancestral states using this vector ! */
-  
-  /* to make things easy and reduce code size, let's re-compute a standard traversal descriptor for node p,
-     hence we need to set the count to 0 */
-
-  tr->td[0].count = 0;
-
-  computeTraversalInfo(p, &(tr->td[0].ti[0]), &(tr->td[0].count), tr->mxtips, pr->perGeneBranchLengths?pr->numberOfPartitions : 1, PLL_TRUE, tr->rvec, tr->useRecom);
-
-  tr->td[0].traversalHasChanged = PLL_TRUE;
-
-  /* here we actually assert, that the traversal descriptor only contains one node triplet p, p->next->back, p->next->next->back
-     this must be PLL_TRUE because we have alread invoked the standard pllUpdatePartials() on p.
-  */ 
-
-  assert(tr->td[0].count == 1);  
-  
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-  /* use the pthreads barrier to invoke newviewAncestralIterative() on a per-thread basis */
-
-  pllMasterBarrier (tr, pr, PLL_THREAD_NEWVIEW_ANCESTRAL);
-#else
-  /* now call the dedicated function that does the mathematical transformation of the 
-     conditional likelihood vector at p to obtain the marginal ancestral states */
-
-  newviewAncestralIterative(tr, pr);
-#endif
-
-  tr->td[0].traversalHasChanged = PLL_FALSE;
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-  /* invoke another parallel region to gather the marginal ancestral probabilities 
-     from the threads/MPI processes */
-
-  pllMasterBarrier (tr, pr, PLL_THREAD_GATHER_ANCESTRAL);
-#endif
-
-  
-}
-
-/* returns the character representation of an enumerated DNA or AA state */
-
-/** @brief Get the character representation of an enumerated DNA or AA state
-    
-    Returns the character representation of the enumarates DNA or AA state,
-    from the constant arrays \a dnaStateNames (for DNA) or \a protStateNames (for proteins).
-
-    @param dataType
-      Type of data, i.e. \b PLL_DNA_DATA or \b PLL_AA_DATA
-
-    @param state
-      The number which we want to decode to a letter
-
-    @return
-      Returns the decoded character
- */
-static char getStateCharacter(int dataType, int state)
-{
-  char 
-    result;
-
-  switch(dataType)
-    {    
-    case PLL_BINARY_DATA:
-       result = binaryStateNames[state];
-       break;
-    case PLL_DNA_DATA:
-       result = dnaStateNames[state];
-      break;
-    case PLL_AA_DATA:
-      result =  protStateNames[state];
-      break;    
-    default:
-      assert(0);
-    }
-
-  return  result;
-}
-
-/** @brief Prints the ancestral state information for a node \a p to the terminal 
- 
-    Prints the ancestral state information for a node \a p to the terminal. 
-    The ancestral state sequence, resp. marginal ancestral state probabilities, is printed
-    depending on whether \a \a printStates, resp. \a printProbs, is set to \b PLL_TRUE.
-
-    @param p
-      The node for which to print the ancestral state sequence
-
-    @param printStates
-      If set to \b PLL_TRUE then the ancestral state sequence is printed
-
-    @param printProbs
-      If set to \b PLL_TRUE then the marginal ancestral state probabilities are printed
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
- 
-    @note  Here one can see how to store the ancestral probabilities in a dedicated data structure
- */
-void printAncestralState(nodeptr p, pllBoolean printStates, pllBoolean printProbs, pllInstance *tr, partitionList *pr)
-{
-#ifdef _USE_PTHREADS
-  size_t 
-    accumulatedOffset = 0;
-#endif
-
-  int
-    j,
-    k,
-    model,
-    globalIndex = 0;
-  
-  /* allocate an array of structs for storing ancestral prob vector info/data */
-
-  ancestralState 
-    *a = (ancestralState *)rax_malloc(sizeof(ancestralState) * tr->originalCrunchedLength);   
-
-  /* loop over partitions */
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-    {
-      int            
-        i,
-        width = pr->partitionData[model]->upper - pr->partitionData[model]->lower,
-        states = pr->partitionData[model]->states;
-      
-      /* set pointer to ancestral probability vector */
-
-#ifdef _USE_PTHREADS
-      double
-        *ancestral = &tr->ancestralVector[accumulatedOffset];
-#else
-      double 
-        *ancestral = pr->partitionData[model]->ancestralBuffer;
-#endif        
-      
-      /* loop over the sites of the partition */
-
-      for(i = 0; i < width; i++, globalIndex++)
-        {
-          double
-            equal = 1.0 / (double)states,
-            max = -1.0;
-            
-          pllBoolean
-            approximatelyEqual = PLL_TRUE;
-
-          int
-            max_l = -1,
-            l;
-          
-          char 
-            c;
-
-          /* stiore number of states for this site */
-
-          a[globalIndex].states = states;
-
-          /* alloc space for storing marginal ancestral probabilities */
-
-          a[globalIndex].probs = (double *)rax_malloc(sizeof(double) * states);
-          
-          /* loop over states to store probabilities and find the maximum */
-
-          for(l = 0; l < states; l++)
-            {
-              double 
-                value = ancestral[states * i + l];
-
-              if(value > max)
-                {
-                  max = value;
-                  max_l = l;
-                }
-              
-              /* this is used for discretizing the ancestral state sequence, if all marginal ancestral 
-                 probabilities are approximately equal we output a ? */
-
-              approximatelyEqual = approximatelyEqual && (PLL_ABS(equal - value) < 0.000001);
-              
-              a[globalIndex].probs[l] = value;                
-            }
-
-          
-          /* figure out the discrete ancestral nucleotide */
-
-          if(approximatelyEqual)
-            c = '?';      
-          else
-            c = getStateCharacter(pr->partitionData[model]->dataType, max_l);
-          
-          a[globalIndex].c = c;   
-        }
-
-#ifdef _USE_PTHREADS
-      accumulatedOffset += width * states;
-#endif            
-    }
-
-  /* print marginal ancestral probs to terminal */
-
-  if(printProbs)
-    {
-      printf("%d\n", p->number);
-      
-      for(k = 0; k < tr->originalCrunchedLength; k++)
-        {
-          for(j = 0; j < a[k].states; j++)
-            printf("%f ", a[k].probs[j]);
-          printf("\n");      
-        }
-      
-      printf("\n");
-    }
- 
-  /* print discrete state ancestrakl sequence to terminal */
-
-  if(printStates)
-    {
-      printf("%d ", p->number);
-
-      for(k = 0; k < tr->originalCrunchedLength; k++)          
-        printf("%c", a[k].c);   
-  
-      printf("\n");
-    }
-  
-  /* free the ancestral state data structure */
-          
-  for(j = 0; j < tr->originalCrunchedLength; j++)
-    rax_free(a[j].probs);  
-
-  rax_free(a);
-}
-
-void pllGetAncestralState(pllInstance *tr, partitionList *pr, nodeptr p, double * outProbs, char * outSequence)
-{
-#ifdef _USE_PTHREADS
-  size_t 
-    accumulatedOffset = 0;
-#endif
-
-  int
-    j,
-    k,
-    model,
-    globalIndex = 0;
-     
-  pllUpdatePartialsAncestral(tr, pr, p);
-  
-  /* allocate an array of structs for storing ancestral prob vector info/data */
-
-  ancestralState 
-    *a = (ancestralState *)rax_malloc(sizeof(ancestralState) * tr->originalCrunchedLength);   
-
-  /* loop over partitions */
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-    {
-      int            
-        i,
-        width = pr->partitionData[model]->upper - pr->partitionData[model]->lower,
-        states = pr->partitionData[model]->states;
-      
-      /* set pointer to ancestral probability vector */
-
-#ifdef _USE_PTHREADS
-      double
-        *ancestral = &tr->ancestralVector[accumulatedOffset];
-#else
-      double 
-        *ancestral = pr->partitionData[model]->ancestralBuffer;
-#endif        
-      
-      /* loop over the sites of the partition */
-
-      for(i = 0; i < width; i++, globalIndex++)
-        {
-          double
-            equal = 1.0 / (double)states,
-            max = -1.0;
-            
-          pllBoolean
-            approximatelyEqual = PLL_TRUE;
-
-          int
-            max_l = -1,
-            l;
-          
-          char 
-            c;
-
-          /* stiore number of states for this site */
-
-          a[globalIndex].states = states;
-
-          /* alloc space for storing marginal ancestral probabilities */
-
-          a[globalIndex].probs = (double *)rax_malloc(sizeof(double) * states);
-          
-          /* loop over states to store probabilities and find the maximum */
-
-          for(l = 0; l < states; l++)
-            {
-              double 
-                value = ancestral[states * i + l];
-
-              if(value > max)
-                {
-                  max = value;
-                  max_l = l;
-                }
-              
-              /* this is used for discretizing the ancestral state sequence, if all marginal ancestral 
-                 probabilities are approximately equal we output a ? */
-
-              approximatelyEqual = approximatelyEqual && (PLL_ABS(equal - value) < 0.000001);
-              
-              a[globalIndex].probs[l] = value;                
-            }
-
-          
-          /* figure out the discrete ancestral nucleotide */
-
-          if(approximatelyEqual)
-            c = '?';      
-          else
-            c = getStateCharacter(pr->partitionData[model]->dataType, max_l);
-          
-          a[globalIndex].c = c;   
-        }
-
-#ifdef _USE_PTHREADS
-      accumulatedOffset += width * states;
-#endif            
-    }
-
-  /* print marginal ancestral probs to terminal */
-
-  for(k = 0; k < tr->originalCrunchedLength; k++)
-    {
-      for(j = 0; j < a[k].states; j++)
-        outProbs[k * a[k].states + j] = a[k].probs[j];
-    }
- 
-  /* print discrete state ancestrakl sequence to terminal */
-
-  for(k = 0; k < tr->originalCrunchedLength; k++)          
-      outSequence[k] = a[k].c;
-  outSequence[tr->originalCrunchedLength] = 0;
-  
-  /* free the ancestral state data structure */
-          
-  for(j = 0; j < tr->originalCrunchedLength; j++)
-    rax_free(a[j].probs);  
-
-  rax_free(a);
-}
-/* optimized function implementations */
-
-
-/**
- *  @defgroup group1 Optimized functions
- *  This is the optimized functions group
- */
-
-#if (!defined(__AVX) && defined(__SSE3))
-
-/** @ingroup group1
- *  @brief Computation of conditional likelihood arrray for GTR GAMMA with memory saving (Optimized SSE3 version for DNA data)
-
-    This is the SSE3 optimized version of ::newviewGAMMA_FLEX for computing the conditional
-    likelihood arrays at some node \a p, given child nodes \a q and \a r using the \b GAMMA
-    model of rate heterogeneity. The memory saving technique is incorporated.
-
-    @note
-    For more details and function argument description check the function ::newviewGAMMA_FLEX
-*/
-static void newviewGTRGAMMA_GAPPED_SAVE(int tipCase,
-                                        double *x1_start, double *x2_start, double *x3_start,
-                                        double *EV, double *tipVector,
-                                        int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                        const int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling,
-                                        unsigned int *x1_gap, unsigned int *x2_gap, unsigned int *x3_gap, 
-                                        double *x1_gapColumn, double *x2_gapColumn, double *x3_gapColumn)
-{
-  int     
-    i, 
-    j, 
-    k, 
-    l,
-    addScale = 0, 
-    scaleGap = 0;
-
-  double
-    *x1,
-    *x2,
-    *x3,
-    *x1_ptr = x1_start,
-    *x2_ptr = x2_start,       
-    max;
-  PLL_ALIGN_BEGIN double
-    maxima[2] PLL_ALIGN_END,
-    EV_t[16] PLL_ALIGN_END;
-
-  __m128d 
-    values[8],
-    EVV[8];  
-
-  for(k = 0; k < 4; k++)
-    for (l=0; l < 4; l++)
-      EV_t[4 * l + k] = EV[4 * k + l];
-
-  for(k = 0; k < 8; k++)
-    EVV[k] = _mm_load_pd(&EV_t[k * 2]);      
-
-
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:
-      {
-        double *uX1, *uX2;
-        PLL_ALIGN_BEGIN double umpX1[256] PLL_ALIGN_END, umpX2[256] PLL_ALIGN_END;
-
-
-        for (i = 1; i < 16; i++)
-        {           
-          __m128d x1_1 = _mm_load_pd(&(tipVector[i*4]));
-          __m128d x1_2 = _mm_load_pd(&(tipVector[i*4 + 2]));       
-
-          for (j = 0; j < 4; j++)
-            for (k = 0; k < 4; k++)
-            {                            
-              __m128d left1 = _mm_load_pd(&left[j*16 + k*4]);
-              __m128d left2 = _mm_load_pd(&left[j*16 + k*4 + 2]);
-
-              __m128d acc = _mm_setzero_pd();
-
-              acc = _mm_add_pd(acc, _mm_mul_pd(left1, x1_1));
-              acc = _mm_add_pd(acc, _mm_mul_pd(left2, x1_2));
-
-              acc = _mm_hadd_pd(acc, acc);
-              _mm_storel_pd(&umpX1[i*16 + j*4 + k], acc);
-            }
-
-          for (j = 0; j < 4; j++)
-            for (k = 0; k < 4; k++)
-            {
-              __m128d left1 = _mm_load_pd(&right[j*16 + k*4]);
-              __m128d left2 = _mm_load_pd(&right[j*16 + k*4 + 2]);
-
-              __m128d acc = _mm_setzero_pd();
-
-              acc = _mm_add_pd(acc, _mm_mul_pd(left1, x1_1));
-              acc = _mm_add_pd(acc, _mm_mul_pd(left2, x1_2));
-
-              acc = _mm_hadd_pd(acc, acc);
-              _mm_storel_pd(&umpX2[i*16 + j*4 + k], acc);
-
-            }
-        }                 
-
-        uX1 = &umpX1[240];
-        uX2 = &umpX2[240];                          
-
-        for (j = 0; j < 4; j++)
-        {                                                                                  
-          __m128d uX1_k0_sse = _mm_load_pd( &uX1[j * 4] );
-          __m128d uX1_k2_sse = _mm_load_pd( &uX1[j * 4 + 2] );
-
-          __m128d uX2_k0_sse = _mm_load_pd( &uX2[j * 4] );
-          __m128d uX2_k2_sse = _mm_load_pd( &uX2[j * 4 + 2] );
-
-          __m128d x1px2_k0 = _mm_mul_pd( uX1_k0_sse, uX2_k0_sse );
-          __m128d x1px2_k2 = _mm_mul_pd( uX1_k2_sse, uX2_k2_sse );                                                 
-
-          __m128d EV_t_l0_k0 = EVV[0];
-          __m128d EV_t_l0_k2 = EVV[1];
-          __m128d EV_t_l1_k0 = EVV[2];
-          __m128d EV_t_l1_k2 = EVV[3];
-          __m128d EV_t_l2_k0 = EVV[4];
-          __m128d EV_t_l2_k2 = EVV[5];
-          __m128d EV_t_l3_k0 = EVV[6]; 
-          __m128d EV_t_l3_k2 = EVV[7];
-
-          EV_t_l0_k0 = _mm_mul_pd( x1px2_k0, EV_t_l0_k0 );
-          EV_t_l0_k2 = _mm_mul_pd( x1px2_k2, EV_t_l0_k2 );
-          EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l0_k2 );
-
-          EV_t_l1_k0 = _mm_mul_pd( x1px2_k0, EV_t_l1_k0 );
-          EV_t_l1_k2 = _mm_mul_pd( x1px2_k2, EV_t_l1_k2 );
-
-          EV_t_l1_k0 = _mm_hadd_pd( EV_t_l1_k0, EV_t_l1_k2 );
-          EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l1_k0 );
-
-          EV_t_l2_k0 = _mm_mul_pd( x1px2_k0, EV_t_l2_k0 );
-          EV_t_l2_k2 = _mm_mul_pd( x1px2_k2, EV_t_l2_k2 );
-          EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l2_k2 );
-
-          EV_t_l3_k0 = _mm_mul_pd( x1px2_k0, EV_t_l3_k0 );
-          EV_t_l3_k2 = _mm_mul_pd( x1px2_k2, EV_t_l3_k2 );
-          EV_t_l3_k0 = _mm_hadd_pd( EV_t_l3_k0, EV_t_l3_k2 );
-
-          EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l3_k0 );
-
-          _mm_store_pd( &x3_gapColumn[j * 4 + 0], EV_t_l0_k0 );
-          _mm_store_pd( &x3_gapColumn[j * 4 + 2], EV_t_l2_k0 );    
-        }  
-
-
-        x3 = x3_start;
-
-        for (i = 0; i < n; i++)
-        {           
-          if(!(x3_gap[i / 32] & mask32[i % 32]))             
-          {
-            uX1 = &umpX1[16 * tipX1[i]];
-            uX2 = &umpX2[16 * tipX2[i]];                                        
-
-            for (j = 0; j < 4; j++)
-            {                                                                              
-              __m128d uX1_k0_sse = _mm_load_pd( &uX1[j * 4] );
-              __m128d uX1_k2_sse = _mm_load_pd( &uX1[j * 4 + 2] );
-
-
-              __m128d uX2_k0_sse = _mm_load_pd( &uX2[j * 4] );
-              __m128d uX2_k2_sse = _mm_load_pd( &uX2[j * 4 + 2] );
-
-
-              //
-              // multiply left * right
-              //
-
-              __m128d x1px2_k0 = _mm_mul_pd( uX1_k0_sse, uX2_k0_sse );
-              __m128d x1px2_k2 = _mm_mul_pd( uX1_k2_sse, uX2_k2_sse );
-
-
-              //
-              // multiply with EV matrix (!?)
-              //
-
-              __m128d EV_t_l0_k0 = EVV[0];
-              __m128d EV_t_l0_k2 = EVV[1];
-              __m128d EV_t_l1_k0 = EVV[2];
-              __m128d EV_t_l1_k2 = EVV[3];
-              __m128d EV_t_l2_k0 = EVV[4];
-              __m128d EV_t_l2_k2 = EVV[5];
-              __m128d EV_t_l3_k0 = EVV[6]; 
-              __m128d EV_t_l3_k2 = EVV[7];
-
-              EV_t_l0_k0 = _mm_mul_pd( x1px2_k0, EV_t_l0_k0 );
-              EV_t_l0_k2 = _mm_mul_pd( x1px2_k2, EV_t_l0_k2 );
-              EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l0_k2 );
-
-              EV_t_l1_k0 = _mm_mul_pd( x1px2_k0, EV_t_l1_k0 );
-              EV_t_l1_k2 = _mm_mul_pd( x1px2_k2, EV_t_l1_k2 );
-
-              EV_t_l1_k0 = _mm_hadd_pd( EV_t_l1_k0, EV_t_l1_k2 );
-              EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l1_k0 );
-
-              EV_t_l2_k0 = _mm_mul_pd( x1px2_k0, EV_t_l2_k0 );
-              EV_t_l2_k2 = _mm_mul_pd( x1px2_k2, EV_t_l2_k2 );
-              EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l2_k2 );
-
-              EV_t_l3_k0 = _mm_mul_pd( x1px2_k0, EV_t_l3_k0 );
-              EV_t_l3_k2 = _mm_mul_pd( x1px2_k2, EV_t_l3_k2 );
-              EV_t_l3_k0 = _mm_hadd_pd( EV_t_l3_k0, EV_t_l3_k2 );
-
-              EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l3_k0 );
-
-              _mm_store_pd( &x3[j * 4 + 0], EV_t_l0_k0 );
-              _mm_store_pd( &x3[j * 4 + 2], EV_t_l2_k0 );
-            }
-
-            x3 += 16;
-          }
-        }
-      }
-      break;
-    case PLL_TIP_INNER:
-      { 
-        double 
-          *uX1;
-        PLL_ALIGN_BEGIN double
-          umpX1[256] PLL_ALIGN_END;
-
-        for (i = 1; i < 16; i++)
-        {
-          __m128d x1_1 = _mm_load_pd(&(tipVector[i*4]));
-          __m128d x1_2 = _mm_load_pd(&(tipVector[i*4 + 2]));       
-
-          for (j = 0; j < 4; j++)
-            for (k = 0; k < 4; k++)
-            {            
-              __m128d left1 = _mm_load_pd(&left[j*16 + k*4]);
-              __m128d left2 = _mm_load_pd(&left[j*16 + k*4 + 2]);
-
-              __m128d acc = _mm_setzero_pd();
-
-              acc = _mm_add_pd(acc, _mm_mul_pd(left1, x1_1));
-              acc = _mm_add_pd(acc, _mm_mul_pd(left2, x1_2));
-
-              acc = _mm_hadd_pd(acc, acc);
-              _mm_storel_pd(&umpX1[i*16 + j*4 + k], acc);                
-            }
-        }
-
-        {
-          __m128d maxv =_mm_setzero_pd();
-
-          scaleGap = 0;
-
-          x2 = x2_gapColumn;                     
-          x3 = x3_gapColumn;
-
-          uX1 = &umpX1[240];         
-
-          for (j = 0; j < 4; j++)
-          {                                
-            double *x2_p = &x2[j*4];
-            double *right_k0_p = &right[j*16];
-            double *right_k1_p = &right[j*16 + 1*4];
-            double *right_k2_p = &right[j*16 + 2*4];
-            double *right_k3_p = &right[j*16 + 3*4];
-            __m128d x2_0 = _mm_load_pd( &x2_p[0] );
-            __m128d x2_2 = _mm_load_pd( &x2_p[2] );
-
-            __m128d right_k0_0 = _mm_load_pd( &right_k0_p[0] );
-            __m128d right_k0_2 = _mm_load_pd( &right_k0_p[2] );
-            __m128d right_k1_0 = _mm_load_pd( &right_k1_p[0] );
-            __m128d right_k1_2 = _mm_load_pd( &right_k1_p[2] );
-            __m128d right_k2_0 = _mm_load_pd( &right_k2_p[0] );
-            __m128d right_k2_2 = _mm_load_pd( &right_k2_p[2] );
-            __m128d right_k3_0 = _mm_load_pd( &right_k3_p[0] );
-            __m128d right_k3_2 = _mm_load_pd( &right_k3_p[2] );
-
-            right_k0_0 = _mm_mul_pd( x2_0, right_k0_0);
-            right_k0_2 = _mm_mul_pd( x2_2, right_k0_2);
-
-            right_k1_0 = _mm_mul_pd( x2_0, right_k1_0);
-            right_k1_2 = _mm_mul_pd( x2_2, right_k1_2);
-
-            right_k0_0 = _mm_hadd_pd( right_k0_0, right_k0_2);
-            right_k1_0 = _mm_hadd_pd( right_k1_0, right_k1_2);
-            right_k0_0 = _mm_hadd_pd( right_k0_0, right_k1_0);
-
-            right_k2_0 = _mm_mul_pd( x2_0, right_k2_0);
-            right_k2_2 = _mm_mul_pd( x2_2, right_k2_2);
-
-            right_k3_0 = _mm_mul_pd( x2_0, right_k3_0);
-            right_k3_2 = _mm_mul_pd( x2_2, right_k3_2);
-
-            right_k2_0 = _mm_hadd_pd( right_k2_0, right_k2_2);
-            right_k3_0 = _mm_hadd_pd( right_k3_0, right_k3_2);
-            right_k2_0 = _mm_hadd_pd( right_k2_0, right_k3_0);
-
-            __m128d uX1_k0_sse = _mm_load_pd( &uX1[j * 4] );
-            __m128d uX1_k2_sse = _mm_load_pd( &uX1[j * 4 + 2] );
-
-            __m128d x1px2_k0 = _mm_mul_pd( uX1_k0_sse, right_k0_0 );
-            __m128d x1px2_k2 = _mm_mul_pd( uX1_k2_sse, right_k2_0 );
-
-            __m128d EV_t_l0_k0 = EVV[0];
-            __m128d EV_t_l0_k2 = EVV[1];
-            __m128d EV_t_l1_k0 = EVV[2];
-            __m128d EV_t_l1_k2 = EVV[3];
-            __m128d EV_t_l2_k0 = EVV[4];
-            __m128d EV_t_l2_k2 = EVV[5];
-            __m128d EV_t_l3_k0 = EVV[6]; 
-            __m128d EV_t_l3_k2 = EVV[7];
-
-            EV_t_l0_k0 = _mm_mul_pd( x1px2_k0, EV_t_l0_k0 );
-            EV_t_l0_k2 = _mm_mul_pd( x1px2_k2, EV_t_l0_k2 );
-            EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l0_k2 );
-
-            EV_t_l1_k0 = _mm_mul_pd( x1px2_k0, EV_t_l1_k0 );
-            EV_t_l1_k2 = _mm_mul_pd( x1px2_k2, EV_t_l1_k2 );
-
-            EV_t_l1_k0 = _mm_hadd_pd( EV_t_l1_k0, EV_t_l1_k2 );
-            EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l1_k0 );
-
-            EV_t_l2_k0 = _mm_mul_pd( x1px2_k0, EV_t_l2_k0 );
-            EV_t_l2_k2 = _mm_mul_pd( x1px2_k2, EV_t_l2_k2 );
-            EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l2_k2 );
-
-            EV_t_l3_k0 = _mm_mul_pd( x1px2_k0, EV_t_l3_k0 );
-            EV_t_l3_k2 = _mm_mul_pd( x1px2_k2, EV_t_l3_k2 );
-            EV_t_l3_k0 = _mm_hadd_pd( EV_t_l3_k0, EV_t_l3_k2 );
-
-            EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l3_k0 );
-
-            values[j * 2]     = EV_t_l0_k0;
-            values[j * 2 + 1] = EV_t_l2_k0;                                
-
-            maxv = _mm_max_pd(maxv, _mm_and_pd(EV_t_l0_k0, absMask.m));
-            maxv = _mm_max_pd(maxv, _mm_and_pd(EV_t_l2_k0, absMask.m));                                    
-          }
-
-
-          _mm_store_pd(maxima, maxv);
-
-          max = PLL_MAX(maxima[0], maxima[1]);
-
-          if(max < PLL_MINLIKELIHOOD)
-          {
-            scaleGap = 1;
-
-            __m128d sv = _mm_set1_pd(PLL_TWOTOTHE256);
-
-            _mm_store_pd(&x3[0], _mm_mul_pd(values[0], sv));       
-            _mm_store_pd(&x3[2], _mm_mul_pd(values[1], sv));
-            _mm_store_pd(&x3[4], _mm_mul_pd(values[2], sv));
-            _mm_store_pd(&x3[6], _mm_mul_pd(values[3], sv));
-            _mm_store_pd(&x3[8], _mm_mul_pd(values[4], sv));       
-            _mm_store_pd(&x3[10], _mm_mul_pd(values[5], sv));
-            _mm_store_pd(&x3[12], _mm_mul_pd(values[6], sv));
-            _mm_store_pd(&x3[14], _mm_mul_pd(values[7], sv));                        
-          }
-          else
-          {
-            _mm_store_pd(&x3[0], values[0]);       
-            _mm_store_pd(&x3[2], values[1]);
-            _mm_store_pd(&x3[4], values[2]);
-            _mm_store_pd(&x3[6], values[3]);
-            _mm_store_pd(&x3[8], values[4]);       
-            _mm_store_pd(&x3[10], values[5]);
-            _mm_store_pd(&x3[12], values[6]);
-            _mm_store_pd(&x3[14], values[7]);
-          }
-        }                       
-
-        x3 = x3_start;
-
-        for (i = 0; i < n; i++)
-        {
-          if((x3_gap[i / 32] & mask32[i % 32]))
-          {            
-            if(scaleGap)
-            {   
-              if(!fastScaling)
-                ex3[i] += 1;
-              else
-                addScale += wgt[i];                  
-            }
-          }
-          else
-          {                              
-            __m128d maxv =_mm_setzero_pd();              
-
-            if(x2_gap[i / 32] & mask32[i % 32])
-              x2 = x2_gapColumn;
-            else
-            {
-              x2 = x2_ptr;
-              x2_ptr += 16;
-            }
-
-            uX1 = &umpX1[16 * tipX1[i]];             
-
-
-            for (j = 0; j < 4; j++)
-            {                              
-              double *x2_p = &x2[j*4];
-              double *right_k0_p = &right[j*16];
-              double *right_k1_p = &right[j*16 + 1*4];
-              double *right_k2_p = &right[j*16 + 2*4];
-              double *right_k3_p = &right[j*16 + 3*4];
-              __m128d x2_0 = _mm_load_pd( &x2_p[0] );
-              __m128d x2_2 = _mm_load_pd( &x2_p[2] );
-
-              __m128d right_k0_0 = _mm_load_pd( &right_k0_p[0] );
-              __m128d right_k0_2 = _mm_load_pd( &right_k0_p[2] );
-              __m128d right_k1_0 = _mm_load_pd( &right_k1_p[0] );
-              __m128d right_k1_2 = _mm_load_pd( &right_k1_p[2] );
-              __m128d right_k2_0 = _mm_load_pd( &right_k2_p[0] );
-              __m128d right_k2_2 = _mm_load_pd( &right_k2_p[2] );
-              __m128d right_k3_0 = _mm_load_pd( &right_k3_p[0] );
-              __m128d right_k3_2 = _mm_load_pd( &right_k3_p[2] );
-
-
-              right_k0_0 = _mm_mul_pd( x2_0, right_k0_0);
-              right_k0_2 = _mm_mul_pd( x2_2, right_k0_2);
-
-              right_k1_0 = _mm_mul_pd( x2_0, right_k1_0);
-              right_k1_2 = _mm_mul_pd( x2_2, right_k1_2);
-
-              right_k0_0 = _mm_hadd_pd( right_k0_0, right_k0_2);
-              right_k1_0 = _mm_hadd_pd( right_k1_0, right_k1_2);
-              right_k0_0 = _mm_hadd_pd( right_k0_0, right_k1_0);
-
-
-              right_k2_0 = _mm_mul_pd( x2_0, right_k2_0);
-              right_k2_2 = _mm_mul_pd( x2_2, right_k2_2);
-
-
-              right_k3_0 = _mm_mul_pd( x2_0, right_k3_0);
-              right_k3_2 = _mm_mul_pd( x2_2, right_k3_2);
-
-              right_k2_0 = _mm_hadd_pd( right_k2_0, right_k2_2);
-              right_k3_0 = _mm_hadd_pd( right_k3_0, right_k3_2);
-              right_k2_0 = _mm_hadd_pd( right_k2_0, right_k3_0);
-
-              {
-                //
-                // load left side from tip vector
-                //
-
-                __m128d uX1_k0_sse = _mm_load_pd( &uX1[j * 4] );
-                __m128d uX1_k2_sse = _mm_load_pd( &uX1[j * 4 + 2] );
-
-
-                //
-                // multiply left * right
-                //
-
-                __m128d x1px2_k0 = _mm_mul_pd( uX1_k0_sse, right_k0_0 );
-                __m128d x1px2_k2 = _mm_mul_pd( uX1_k2_sse, right_k2_0 );
-
-
-                //
-                // multiply with EV matrix (!?)
-                //                                
-
-                __m128d EV_t_l0_k0 = EVV[0];
-                __m128d EV_t_l0_k2 = EVV[1];
-                __m128d EV_t_l1_k0 = EVV[2];
-                __m128d EV_t_l1_k2 = EVV[3];
-                __m128d EV_t_l2_k0 = EVV[4];
-                __m128d EV_t_l2_k2 = EVV[5];
-                __m128d EV_t_l3_k0 = EVV[6]; 
-                __m128d EV_t_l3_k2 = EVV[7];
-
-
-                EV_t_l0_k0 = _mm_mul_pd( x1px2_k0, EV_t_l0_k0 );
-                EV_t_l0_k2 = _mm_mul_pd( x1px2_k2, EV_t_l0_k2 );
-                EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l0_k2 );
-
-                EV_t_l1_k0 = _mm_mul_pd( x1px2_k0, EV_t_l1_k0 );
-                EV_t_l1_k2 = _mm_mul_pd( x1px2_k2, EV_t_l1_k2 );
-
-                EV_t_l1_k0 = _mm_hadd_pd( EV_t_l1_k0, EV_t_l1_k2 );
-                EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l1_k0 );
-
-                EV_t_l2_k0 = _mm_mul_pd( x1px2_k0, EV_t_l2_k0 );
-                EV_t_l2_k2 = _mm_mul_pd( x1px2_k2, EV_t_l2_k2 );
-                EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l2_k2 );
-
-                EV_t_l3_k0 = _mm_mul_pd( x1px2_k0, EV_t_l3_k0 );
-                EV_t_l3_k2 = _mm_mul_pd( x1px2_k2, EV_t_l3_k2 );
-                EV_t_l3_k0 = _mm_hadd_pd( EV_t_l3_k0, EV_t_l3_k2 );
-
-                EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l3_k0 );
-
-                values[j * 2]     = EV_t_l0_k0;
-                values[j * 2 + 1] = EV_t_l2_k0;                            
-
-                maxv = _mm_max_pd(maxv, _mm_and_pd(EV_t_l0_k0, absMask.m));
-                maxv = _mm_max_pd(maxv, _mm_and_pd(EV_t_l2_k0, absMask.m));                
-              }            
-            }
-
-
-            _mm_store_pd(maxima, maxv);
-
-            max = PLL_MAX(maxima[0], maxima[1]);
-
-            if(max < PLL_MINLIKELIHOOD)
-            {
-              __m128d sv = _mm_set1_pd(PLL_TWOTOTHE256);
-
-              _mm_store_pd(&x3[0], _mm_mul_pd(values[0], sv));     
-              _mm_store_pd(&x3[2], _mm_mul_pd(values[1], sv));
-              _mm_store_pd(&x3[4], _mm_mul_pd(values[2], sv));
-              _mm_store_pd(&x3[6], _mm_mul_pd(values[3], sv));
-              _mm_store_pd(&x3[8], _mm_mul_pd(values[4], sv));     
-              _mm_store_pd(&x3[10], _mm_mul_pd(values[5], sv));
-              _mm_store_pd(&x3[12], _mm_mul_pd(values[6], sv));
-              _mm_store_pd(&x3[14], _mm_mul_pd(values[7], sv));      
-
-              if(!fastScaling)
-                ex3[i] += 1;
-              else
-                addScale += wgt[i];
-
-            }
-            else
-            {
-              _mm_store_pd(&x3[0], values[0]);     
-              _mm_store_pd(&x3[2], values[1]);
-              _mm_store_pd(&x3[4], values[2]);
-              _mm_store_pd(&x3[6], values[3]);
-              _mm_store_pd(&x3[8], values[4]);     
-              _mm_store_pd(&x3[10], values[5]);
-              _mm_store_pd(&x3[12], values[6]);
-              _mm_store_pd(&x3[14], values[7]);
-            }            
-
-            x3 += 16;
-          }
-        }
-      }
-      break;
-    case PLL_INNER_INNER:         
-      {
-        __m128d maxv =_mm_setzero_pd();
-
-        scaleGap = 0;
-
-        x1 = x1_gapColumn;                  
-        x2 = x2_gapColumn;          
-        x3 = x3_gapColumn;
-
-        for (j = 0; j < 4; j++)
-        {
-
-          double *x1_p = &x1[j*4];
-          double *left_k0_p = &left[j*16];
-          double *left_k1_p = &left[j*16 + 1*4];
-          double *left_k2_p = &left[j*16 + 2*4];
-          double *left_k3_p = &left[j*16 + 3*4];
-
-          __m128d x1_0 = _mm_load_pd( &x1_p[0] );
-          __m128d x1_2 = _mm_load_pd( &x1_p[2] );
-
-          __m128d left_k0_0 = _mm_load_pd( &left_k0_p[0] );
-          __m128d left_k0_2 = _mm_load_pd( &left_k0_p[2] );
-          __m128d left_k1_0 = _mm_load_pd( &left_k1_p[0] );
-          __m128d left_k1_2 = _mm_load_pd( &left_k1_p[2] );
-          __m128d left_k2_0 = _mm_load_pd( &left_k2_p[0] );
-          __m128d left_k2_2 = _mm_load_pd( &left_k2_p[2] );
-          __m128d left_k3_0 = _mm_load_pd( &left_k3_p[0] );
-          __m128d left_k3_2 = _mm_load_pd( &left_k3_p[2] );
-
-          left_k0_0 = _mm_mul_pd(x1_0, left_k0_0);
-          left_k0_2 = _mm_mul_pd(x1_2, left_k0_2);
-
-          left_k1_0 = _mm_mul_pd(x1_0, left_k1_0);
-          left_k1_2 = _mm_mul_pd(x1_2, left_k1_2);
-
-          left_k0_0 = _mm_hadd_pd( left_k0_0, left_k0_2 );
-          left_k1_0 = _mm_hadd_pd( left_k1_0, left_k1_2);
-          left_k0_0 = _mm_hadd_pd( left_k0_0, left_k1_0);
-
-          left_k2_0 = _mm_mul_pd(x1_0, left_k2_0);
-          left_k2_2 = _mm_mul_pd(x1_2, left_k2_2);
-
-          left_k3_0 = _mm_mul_pd(x1_0, left_k3_0);
-          left_k3_2 = _mm_mul_pd(x1_2, left_k3_2);
-
-          left_k2_0 = _mm_hadd_pd( left_k2_0, left_k2_2);
-          left_k3_0 = _mm_hadd_pd( left_k3_0, left_k3_2);
-          left_k2_0 = _mm_hadd_pd( left_k2_0, left_k3_0);
-
-
-          double *x2_p = &x2[j*4];
-          double *right_k0_p = &right[j*16];
-          double *right_k1_p = &right[j*16 + 1*4];
-          double *right_k2_p = &right[j*16 + 2*4];
-          double *right_k3_p = &right[j*16 + 3*4];
-          __m128d x2_0 = _mm_load_pd( &x2_p[0] );
-          __m128d x2_2 = _mm_load_pd( &x2_p[2] );
-
-          __m128d right_k0_0 = _mm_load_pd( &right_k0_p[0] );
-          __m128d right_k0_2 = _mm_load_pd( &right_k0_p[2] );
-          __m128d right_k1_0 = _mm_load_pd( &right_k1_p[0] );
-          __m128d right_k1_2 = _mm_load_pd( &right_k1_p[2] );
-          __m128d right_k2_0 = _mm_load_pd( &right_k2_p[0] );
-          __m128d right_k2_2 = _mm_load_pd( &right_k2_p[2] );
-          __m128d right_k3_0 = _mm_load_pd( &right_k3_p[0] );
-          __m128d right_k3_2 = _mm_load_pd( &right_k3_p[2] );
-
-          right_k0_0 = _mm_mul_pd( x2_0, right_k0_0);
-          right_k0_2 = _mm_mul_pd( x2_2, right_k0_2);
-
-          right_k1_0 = _mm_mul_pd( x2_0, right_k1_0);
-          right_k1_2 = _mm_mul_pd( x2_2, right_k1_2);
-
-          right_k0_0 = _mm_hadd_pd( right_k0_0, right_k0_2);
-          right_k1_0 = _mm_hadd_pd( right_k1_0, right_k1_2);
-          right_k0_0 = _mm_hadd_pd( right_k0_0, right_k1_0);
-
-          right_k2_0 = _mm_mul_pd( x2_0, right_k2_0);
-          right_k2_2 = _mm_mul_pd( x2_2, right_k2_2);
-
-          right_k3_0 = _mm_mul_pd( x2_0, right_k3_0);
-          right_k3_2 = _mm_mul_pd( x2_2, right_k3_2);
-
-          right_k2_0 = _mm_hadd_pd( right_k2_0, right_k2_2);
-          right_k3_0 = _mm_hadd_pd( right_k3_0, right_k3_2);
-          right_k2_0 = _mm_hadd_pd( right_k2_0, right_k3_0);                                    
-
-          __m128d x1px2_k0 = _mm_mul_pd( left_k0_0, right_k0_0 );
-          __m128d x1px2_k2 = _mm_mul_pd( left_k2_0, right_k2_0 );                                          
-
-          __m128d EV_t_l0_k0 = EVV[0];
-          __m128d EV_t_l0_k2 = EVV[1];
-          __m128d EV_t_l1_k0 = EVV[2];
-          __m128d EV_t_l1_k2 = EVV[3];
-          __m128d EV_t_l2_k0 = EVV[4];
-          __m128d EV_t_l2_k2 = EVV[5];
-          __m128d EV_t_l3_k0 = EVV[6]; 
-          __m128d EV_t_l3_k2 = EVV[7];
-
-          EV_t_l0_k0 = _mm_mul_pd( x1px2_k0, EV_t_l0_k0 );
-          EV_t_l0_k2 = _mm_mul_pd( x1px2_k2, EV_t_l0_k2 );
-          EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l0_k2 );
-
-          EV_t_l1_k0 = _mm_mul_pd( x1px2_k0, EV_t_l1_k0 );
-          EV_t_l1_k2 = _mm_mul_pd( x1px2_k2, EV_t_l1_k2 );
-
-          EV_t_l1_k0 = _mm_hadd_pd( EV_t_l1_k0, EV_t_l1_k2 );
-          EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l1_k0 );
-
-          EV_t_l2_k0 = _mm_mul_pd( x1px2_k0, EV_t_l2_k0 );
-          EV_t_l2_k2 = _mm_mul_pd( x1px2_k2, EV_t_l2_k2 );
-          EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l2_k2 );
-
-          EV_t_l3_k0 = _mm_mul_pd( x1px2_k0, EV_t_l3_k0 );
-          EV_t_l3_k2 = _mm_mul_pd( x1px2_k2, EV_t_l3_k2 );
-          EV_t_l3_k0 = _mm_hadd_pd( EV_t_l3_k0, EV_t_l3_k2 );
-
-          EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l3_k0 );
-
-
-          values[j * 2] = EV_t_l0_k0;
-          values[j * 2 + 1] = EV_t_l2_k0;                           
-
-          maxv = _mm_max_pd(maxv, _mm_and_pd(EV_t_l0_k0, absMask.m));
-          maxv = _mm_max_pd(maxv, _mm_and_pd(EV_t_l2_k0, absMask.m));
-        }
-
-        _mm_store_pd(maxima, maxv);
-
-        max = PLL_MAX(maxima[0], maxima[1]);
-
-        if(max < PLL_MINLIKELIHOOD)
-        {
-          __m128d sv = _mm_set1_pd(PLL_TWOTOTHE256);
-
-          scaleGap = 1;
-
-          _mm_store_pd(&x3[0], _mm_mul_pd(values[0], sv));         
-          _mm_store_pd(&x3[2], _mm_mul_pd(values[1], sv));
-          _mm_store_pd(&x3[4], _mm_mul_pd(values[2], sv));
-          _mm_store_pd(&x3[6], _mm_mul_pd(values[3], sv));
-          _mm_store_pd(&x3[8], _mm_mul_pd(values[4], sv));         
-          _mm_store_pd(&x3[10], _mm_mul_pd(values[5], sv));
-          _mm_store_pd(&x3[12], _mm_mul_pd(values[6], sv));
-          _mm_store_pd(&x3[14], _mm_mul_pd(values[7], sv));                      
-        }
-        else
-        {
-          _mm_store_pd(&x3[0], values[0]);         
-          _mm_store_pd(&x3[2], values[1]);
-          _mm_store_pd(&x3[4], values[2]);
-          _mm_store_pd(&x3[6], values[3]);
-          _mm_store_pd(&x3[8], values[4]);         
-          _mm_store_pd(&x3[10], values[5]);
-          _mm_store_pd(&x3[12], values[6]);
-          _mm_store_pd(&x3[14], values[7]);
-        }
-      }
-
-
-      x3 = x3_start;
-
-      for (i = 0; i < n; i++)
-      { 
-        if(x3_gap[i / 32] & mask32[i % 32])
-        {            
-          if(scaleGap)
-          {     
-            if(!fastScaling)
-              ex3[i] += 1;
-            else
-              addScale += wgt[i];                              
-          }
-        }
-        else
-        {
-          __m128d maxv =_mm_setzero_pd();                   
-
-          if(x1_gap[i / 32] & mask32[i % 32])
-            x1 = x1_gapColumn;
-          else
-          {
-            x1 = x1_ptr;
-            x1_ptr += 16;
-          }
-
-          if(x2_gap[i / 32] & mask32[i % 32])
-            x2 = x2_gapColumn;
-          else
-          {
-            x2 = x2_ptr;
-            x2_ptr += 16;
-          }
-
-
-          for (j = 0; j < 4; j++)
-          {
-
-            double *x1_p = &x1[j*4];
-            double *left_k0_p = &left[j*16];
-            double *left_k1_p = &left[j*16 + 1*4];
-            double *left_k2_p = &left[j*16 + 2*4];
-            double *left_k3_p = &left[j*16 + 3*4];
-
-            __m128d x1_0 = _mm_load_pd( &x1_p[0] );
-            __m128d x1_2 = _mm_load_pd( &x1_p[2] );
-
-            __m128d left_k0_0 = _mm_load_pd( &left_k0_p[0] );
-            __m128d left_k0_2 = _mm_load_pd( &left_k0_p[2] );
-            __m128d left_k1_0 = _mm_load_pd( &left_k1_p[0] );
-            __m128d left_k1_2 = _mm_load_pd( &left_k1_p[2] );
-            __m128d left_k2_0 = _mm_load_pd( &left_k2_p[0] );
-            __m128d left_k2_2 = _mm_load_pd( &left_k2_p[2] );
-            __m128d left_k3_0 = _mm_load_pd( &left_k3_p[0] );
-            __m128d left_k3_2 = _mm_load_pd( &left_k3_p[2] );
-
-            left_k0_0 = _mm_mul_pd(x1_0, left_k0_0);
-            left_k0_2 = _mm_mul_pd(x1_2, left_k0_2);
-
-            left_k1_0 = _mm_mul_pd(x1_0, left_k1_0);
-            left_k1_2 = _mm_mul_pd(x1_2, left_k1_2);
-
-            left_k0_0 = _mm_hadd_pd( left_k0_0, left_k0_2 );
-            left_k1_0 = _mm_hadd_pd( left_k1_0, left_k1_2);
-            left_k0_0 = _mm_hadd_pd( left_k0_0, left_k1_0);
-
-            left_k2_0 = _mm_mul_pd(x1_0, left_k2_0);
-            left_k2_2 = _mm_mul_pd(x1_2, left_k2_2);
-
-            left_k3_0 = _mm_mul_pd(x1_0, left_k3_0);
-            left_k3_2 = _mm_mul_pd(x1_2, left_k3_2);
-
-            left_k2_0 = _mm_hadd_pd( left_k2_0, left_k2_2);
-            left_k3_0 = _mm_hadd_pd( left_k3_0, left_k3_2);
-            left_k2_0 = _mm_hadd_pd( left_k2_0, left_k3_0);
-
-
-            //
-            // multiply/add right side
-            //
-            double *x2_p = &x2[j*4];
-            double *right_k0_p = &right[j*16];
-            double *right_k1_p = &right[j*16 + 1*4];
-            double *right_k2_p = &right[j*16 + 2*4];
-            double *right_k3_p = &right[j*16 + 3*4];
-            __m128d x2_0 = _mm_load_pd( &x2_p[0] );
-            __m128d x2_2 = _mm_load_pd( &x2_p[2] );
-
-            __m128d right_k0_0 = _mm_load_pd( &right_k0_p[0] );
-            __m128d right_k0_2 = _mm_load_pd( &right_k0_p[2] );
-            __m128d right_k1_0 = _mm_load_pd( &right_k1_p[0] );
-            __m128d right_k1_2 = _mm_load_pd( &right_k1_p[2] );
-            __m128d right_k2_0 = _mm_load_pd( &right_k2_p[0] );
-            __m128d right_k2_2 = _mm_load_pd( &right_k2_p[2] );
-            __m128d right_k3_0 = _mm_load_pd( &right_k3_p[0] );
-            __m128d right_k3_2 = _mm_load_pd( &right_k3_p[2] );
-
-            right_k0_0 = _mm_mul_pd( x2_0, right_k0_0);
-            right_k0_2 = _mm_mul_pd( x2_2, right_k0_2);
-
-            right_k1_0 = _mm_mul_pd( x2_0, right_k1_0);
-            right_k1_2 = _mm_mul_pd( x2_2, right_k1_2);
-
-            right_k0_0 = _mm_hadd_pd( right_k0_0, right_k0_2);
-            right_k1_0 = _mm_hadd_pd( right_k1_0, right_k1_2);
-            right_k0_0 = _mm_hadd_pd( right_k0_0, right_k1_0);
-
-            right_k2_0 = _mm_mul_pd( x2_0, right_k2_0);
-            right_k2_2 = _mm_mul_pd( x2_2, right_k2_2);
-
-
-            right_k3_0 = _mm_mul_pd( x2_0, right_k3_0);
-            right_k3_2 = _mm_mul_pd( x2_2, right_k3_2);
-
-            right_k2_0 = _mm_hadd_pd( right_k2_0, right_k2_2);
-            right_k3_0 = _mm_hadd_pd( right_k3_0, right_k3_2);
-            right_k2_0 = _mm_hadd_pd( right_k2_0, right_k3_0);     
-
-            //
-            // multiply left * right
-            //
-
-            __m128d x1px2_k0 = _mm_mul_pd( left_k0_0, right_k0_0 );
-            __m128d x1px2_k2 = _mm_mul_pd( left_k2_0, right_k2_0 );
-
-
-            //
-            // multiply with EV matrix (!?)
-            //       
-
-            __m128d EV_t_l0_k0 = EVV[0];
-            __m128d EV_t_l0_k2 = EVV[1];
-            __m128d EV_t_l1_k0 = EVV[2];
-            __m128d EV_t_l1_k2 = EVV[3];
-            __m128d EV_t_l2_k0 = EVV[4];
-            __m128d EV_t_l2_k2 = EVV[5];
-            __m128d EV_t_l3_k0 = EVV[6]; 
-            __m128d EV_t_l3_k2 = EVV[7];
-
-
-            EV_t_l0_k0 = _mm_mul_pd( x1px2_k0, EV_t_l0_k0 );
-            EV_t_l0_k2 = _mm_mul_pd( x1px2_k2, EV_t_l0_k2 );
-            EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l0_k2 );
-
-            EV_t_l1_k0 = _mm_mul_pd( x1px2_k0, EV_t_l1_k0 );
-            EV_t_l1_k2 = _mm_mul_pd( x1px2_k2, EV_t_l1_k2 );
-
-            EV_t_l1_k0 = _mm_hadd_pd( EV_t_l1_k0, EV_t_l1_k2 );
-            EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l1_k0 );
-
-            EV_t_l2_k0 = _mm_mul_pd( x1px2_k0, EV_t_l2_k0 );
-            EV_t_l2_k2 = _mm_mul_pd( x1px2_k2, EV_t_l2_k2 );
-            EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l2_k2 );
-
-
-            EV_t_l3_k0 = _mm_mul_pd( x1px2_k0, EV_t_l3_k0 );
-            EV_t_l3_k2 = _mm_mul_pd( x1px2_k2, EV_t_l3_k2 );
-            EV_t_l3_k0 = _mm_hadd_pd( EV_t_l3_k0, EV_t_l3_k2 );
-
-            EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l3_k0 );
-
-
-            values[j * 2] = EV_t_l0_k0;
-            values[j * 2 + 1] = EV_t_l2_k0;                         
-
-            maxv = _mm_max_pd(maxv, _mm_and_pd(EV_t_l0_k0, absMask.m));
-            maxv = _mm_max_pd(maxv, _mm_and_pd(EV_t_l2_k0, absMask.m));
-          }
-
-
-          _mm_store_pd(maxima, maxv);
-
-          max = PLL_MAX(maxima[0], maxima[1]);
-
-          if(max < PLL_MINLIKELIHOOD)
-          {
-            __m128d sv = _mm_set1_pd(PLL_TWOTOTHE256);
-
-            _mm_store_pd(&x3[0], _mm_mul_pd(values[0], sv));       
-            _mm_store_pd(&x3[2], _mm_mul_pd(values[1], sv));
-            _mm_store_pd(&x3[4], _mm_mul_pd(values[2], sv));
-            _mm_store_pd(&x3[6], _mm_mul_pd(values[3], sv));
-            _mm_store_pd(&x3[8], _mm_mul_pd(values[4], sv));       
-            _mm_store_pd(&x3[10], _mm_mul_pd(values[5], sv));
-            _mm_store_pd(&x3[12], _mm_mul_pd(values[6], sv));
-            _mm_store_pd(&x3[14], _mm_mul_pd(values[7], sv));        
-
-            if(!fastScaling)
-              ex3[i] += 1;
-            else
-              addScale += wgt[i];
-
-          }
-          else
-          {
-            _mm_store_pd(&x3[0], values[0]);       
-            _mm_store_pd(&x3[2], values[1]);
-            _mm_store_pd(&x3[4], values[2]);
-            _mm_store_pd(&x3[6], values[3]);
-            _mm_store_pd(&x3[8], values[4]);       
-            _mm_store_pd(&x3[10], values[5]);
-            _mm_store_pd(&x3[12], values[6]);
-            _mm_store_pd(&x3[14], values[7]);
-          }      
-
-
-
-          x3 += 16;
-
-        }
-      }
-      break;
-    default:
-      assert(0);
-  }
-
-  if(fastScaling)
-    *scalerIncrement = addScale;
-}
-
-
-
-/** @ingroup group1
- *  @brief Computation of conditional likelihood arrray for GTR GAMMA (Optimized SSE3 version for DNA data)
-
-    This is the SSE3 optimized version of ::newviewGAMMA_FLEX for computing the conditional
-    likelihood arrays at some node \a p, given child nodes \a q and \a r using the \b GAMMA
-    model of rate heterogeneity.
-
-    @note
-    For more details and function argument description check the function ::newviewGAMMA_FLEX
-*/
-static void newviewGTRGAMMA(int tipCase,
-                            double *x1_start, double *x2_start, double *x3_start,
-                            double *EV, double *tipVector,
-                            int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                            const int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling
-                            )
-{
-  int 
-    i, 
-    j, 
-    k, 
-    l,
-    addScale = 0;
-
-  //int scaling = 0;
-
-  double
-    *x1,
-    *x2,
-    *x3,
-    max;
-  PLL_ALIGN_BEGIN double
-    maxima[2] PLL_ALIGN_END,
-    EV_t[16] PLL_ALIGN_END;
-
-  __m128d 
-    values[8],
-    EVV[8];  
-
-  for(k = 0; k < 4; k++)
-    for (l=0; l < 4; l++)
-      EV_t[4 * l + k] = EV[4 * k + l];
-
-  for(k = 0; k < 8; k++)
-    EVV[k] = _mm_load_pd(&EV_t[k * 2]);
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:
-      {
-        double *uX1, *uX2;
-        PLL_ALIGN_BEGIN double umpX1[256] PLL_ALIGN_END, umpX2[256] PLL_ALIGN_END;
-
-
-        for (i = 1; i < 16; i++)
-        {
-          __m128d x1_1 = _mm_load_pd(&(tipVector[i*4]));
-          __m128d x1_2 = _mm_load_pd(&(tipVector[i*4 + 2]));       
-
-          for (j = 0; j < 4; j++)
-
-            for (k = 0; k < 4; k++) {
-              __m128d left1 = _mm_load_pd(&left[j*16 + k*4]);
-              __m128d left2 = _mm_load_pd(&left[j*16 + k*4 + 2]);
-
-              __m128d acc = _mm_setzero_pd();
-
-              acc = _mm_add_pd(acc, _mm_mul_pd(left1, x1_1));
-              acc = _mm_add_pd(acc, _mm_mul_pd(left2, x1_2));
-
-              acc = _mm_hadd_pd(acc, acc);
-              _mm_storel_pd(&umpX1[i*16 + j*4 + k], acc);
-            }
-
-          for (j = 0; j < 4; j++)
-            for (k = 0; k < 4; k++)
-            {
-              __m128d left1 = _mm_load_pd(&right[j*16 + k*4]);
-              __m128d left2 = _mm_load_pd(&right[j*16 + k*4 + 2]);
-
-              __m128d acc = _mm_setzero_pd();
-
-              acc = _mm_add_pd(acc, _mm_mul_pd(left1, x1_1));
-              acc = _mm_add_pd(acc, _mm_mul_pd(left2, x1_2));
-
-              acc = _mm_hadd_pd(acc, acc);
-              _mm_storel_pd(&umpX2[i*16 + j*4 + k], acc);
-
-            }
-        }       
-
-        for (i = 0; i < n; i++)
-        {
-          x3 = &x3_start[i * 16];
-
-
-          uX1 = &umpX1[16 * tipX1[i]];
-          uX2 = &umpX2[16 * tipX2[i]];                      
-
-          for (j = 0; j < 4; j++)
-          {                                                                                
-            __m128d uX1_k0_sse = _mm_load_pd( &uX1[j * 4] );
-            __m128d uX1_k2_sse = _mm_load_pd( &uX1[j * 4 + 2] );
-
-
-            __m128d uX2_k0_sse = _mm_load_pd( &uX2[j * 4] );
-            __m128d uX2_k2_sse = _mm_load_pd( &uX2[j * 4 + 2] );
-
-
-            //
-            // multiply left * right
-            //
-
-            __m128d x1px2_k0 = _mm_mul_pd( uX1_k0_sse, uX2_k0_sse );
-            __m128d x1px2_k2 = _mm_mul_pd( uX1_k2_sse, uX2_k2_sse );
-
-
-            //
-            // multiply with EV matrix (!?)
-            //
-
-            __m128d EV_t_l0_k0 = EVV[0];
-            __m128d EV_t_l0_k2 = EVV[1];
-            __m128d EV_t_l1_k0 = EVV[2];
-            __m128d EV_t_l1_k2 = EVV[3];
-            __m128d EV_t_l2_k0 = EVV[4];
-            __m128d EV_t_l2_k2 = EVV[5];
-            __m128d EV_t_l3_k0 = EVV[6]; 
-            __m128d EV_t_l3_k2 = EVV[7];
-
-            EV_t_l0_k0 = _mm_mul_pd( x1px2_k0, EV_t_l0_k0 );
-            EV_t_l0_k2 = _mm_mul_pd( x1px2_k2, EV_t_l0_k2 );
-            EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l0_k2 );
-
-            EV_t_l1_k0 = _mm_mul_pd( x1px2_k0, EV_t_l1_k0 );
-            EV_t_l1_k2 = _mm_mul_pd( x1px2_k2, EV_t_l1_k2 );
-
-            EV_t_l1_k0 = _mm_hadd_pd( EV_t_l1_k0, EV_t_l1_k2 );
-            EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l1_k0 );
-
-            EV_t_l2_k0 = _mm_mul_pd( x1px2_k0, EV_t_l2_k0 );
-            EV_t_l2_k2 = _mm_mul_pd( x1px2_k2, EV_t_l2_k2 );
-            EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l2_k2 );
-
-            EV_t_l3_k0 = _mm_mul_pd( x1px2_k0, EV_t_l3_k0 );
-            EV_t_l3_k2 = _mm_mul_pd( x1px2_k2, EV_t_l3_k2 );
-            EV_t_l3_k0 = _mm_hadd_pd( EV_t_l3_k0, EV_t_l3_k2 );
-
-            EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l3_k0 );
-
-            _mm_store_pd( &x3[j * 4 + 0], EV_t_l0_k0 );
-            _mm_store_pd( &x3[j * 4 + 2], EV_t_l2_k0 );
-          }
-        }
-      }
-      break;
-    case PLL_TIP_INNER:
-      { 
-        double *uX1;
-        PLL_ALIGN_BEGIN double umpX1[256] PLL_ALIGN_END;
-
-
-        for (i = 1; i < 16; i++)
-        {
-          __m128d x1_1 = _mm_load_pd(&(tipVector[i*4]));
-          __m128d x1_2 = _mm_load_pd(&(tipVector[i*4 + 2]));       
-
-          for (j = 0; j < 4; j++)
-            for (k = 0; k < 4; k++)
-            {            
-              __m128d left1 = _mm_load_pd(&left[j*16 + k*4]);
-              __m128d left2 = _mm_load_pd(&left[j*16 + k*4 + 2]);
-
-              __m128d acc = _mm_setzero_pd();
-
-              acc = _mm_add_pd(acc, _mm_mul_pd(left1, x1_1));
-              acc = _mm_add_pd(acc, _mm_mul_pd(left2, x1_2));
-
-              acc = _mm_hadd_pd(acc, acc);
-              _mm_storel_pd(&umpX1[i*16 + j*4 + k], acc);                
-            }
-        }
-
-        for (i = 0; i < n; i++)
-        {
-          __m128d maxv =_mm_setzero_pd();
-
-          x2 = &x2_start[i * 16];
-          x3 = &x3_start[i * 16];
-
-          uX1 = &umpX1[16 * tipX1[i]];       
-
-          for (j = 0; j < 4; j++)
-          {
-
-            //
-            // multiply/add right side
-            //
-            double *x2_p = &x2[j*4];
-            double *right_k0_p = &right[j*16];
-            double *right_k1_p = &right[j*16 + 1*4];
-            double *right_k2_p = &right[j*16 + 2*4];
-            double *right_k3_p = &right[j*16 + 3*4];
-            __m128d x2_0 = _mm_load_pd( &x2_p[0] );
-            __m128d x2_2 = _mm_load_pd( &x2_p[2] );
-
-            __m128d right_k0_0 = _mm_load_pd( &right_k0_p[0] );
-            __m128d right_k0_2 = _mm_load_pd( &right_k0_p[2] );
-            __m128d right_k1_0 = _mm_load_pd( &right_k1_p[0] );
-            __m128d right_k1_2 = _mm_load_pd( &right_k1_p[2] );
-            __m128d right_k2_0 = _mm_load_pd( &right_k2_p[0] );
-            __m128d right_k2_2 = _mm_load_pd( &right_k2_p[2] );
-            __m128d right_k3_0 = _mm_load_pd( &right_k3_p[0] );
-            __m128d right_k3_2 = _mm_load_pd( &right_k3_p[2] );
-
-
-
-            right_k0_0 = _mm_mul_pd( x2_0, right_k0_0);
-            right_k0_2 = _mm_mul_pd( x2_2, right_k0_2);
-
-            right_k1_0 = _mm_mul_pd( x2_0, right_k1_0);
-            right_k1_2 = _mm_mul_pd( x2_2, right_k1_2);
-
-            right_k0_0 = _mm_hadd_pd( right_k0_0, right_k0_2);
-            right_k1_0 = _mm_hadd_pd( right_k1_0, right_k1_2);
-            right_k0_0 = _mm_hadd_pd( right_k0_0, right_k1_0);
-
-
-            right_k2_0 = _mm_mul_pd( x2_0, right_k2_0);
-            right_k2_2 = _mm_mul_pd( x2_2, right_k2_2);
-
-
-            right_k3_0 = _mm_mul_pd( x2_0, right_k3_0);
-            right_k3_2 = _mm_mul_pd( x2_2, right_k3_2);
-
-            right_k2_0 = _mm_hadd_pd( right_k2_0, right_k2_2);
-            right_k3_0 = _mm_hadd_pd( right_k3_0, right_k3_2);
-            right_k2_0 = _mm_hadd_pd( right_k2_0, right_k3_0);
-
-            {
-              //
-              // load left side from tip vector
-              //
-
-              __m128d uX1_k0_sse = _mm_load_pd( &uX1[j * 4] );
-              __m128d uX1_k2_sse = _mm_load_pd( &uX1[j * 4 + 2] );
-
-
-              //
-              // multiply left * right
-              //
-
-              __m128d x1px2_k0 = _mm_mul_pd( uX1_k0_sse, right_k0_0 );
-              __m128d x1px2_k2 = _mm_mul_pd( uX1_k2_sse, right_k2_0 );
-
-
-              //
-              // multiply with EV matrix (!?)
-              //                                  
-
-              __m128d EV_t_l0_k0 = EVV[0];
-              __m128d EV_t_l0_k2 = EVV[1];
-              __m128d EV_t_l1_k0 = EVV[2];
-              __m128d EV_t_l1_k2 = EVV[3];
-              __m128d EV_t_l2_k0 = EVV[4];
-              __m128d EV_t_l2_k2 = EVV[5];
-              __m128d EV_t_l3_k0 = EVV[6]; 
-              __m128d EV_t_l3_k2 = EVV[7];
-
-
-              EV_t_l0_k0 = _mm_mul_pd( x1px2_k0, EV_t_l0_k0 );
-              EV_t_l0_k2 = _mm_mul_pd( x1px2_k2, EV_t_l0_k2 );
-              EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l0_k2 );
-
-              EV_t_l1_k0 = _mm_mul_pd( x1px2_k0, EV_t_l1_k0 );
-              EV_t_l1_k2 = _mm_mul_pd( x1px2_k2, EV_t_l1_k2 );
-
-              EV_t_l1_k0 = _mm_hadd_pd( EV_t_l1_k0, EV_t_l1_k2 );
-              EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l1_k0 );
-
-              EV_t_l2_k0 = _mm_mul_pd( x1px2_k0, EV_t_l2_k0 );
-              EV_t_l2_k2 = _mm_mul_pd( x1px2_k2, EV_t_l2_k2 );
-              EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l2_k2 );
-
-              EV_t_l3_k0 = _mm_mul_pd( x1px2_k0, EV_t_l3_k0 );
-              EV_t_l3_k2 = _mm_mul_pd( x1px2_k2, EV_t_l3_k2 );
-              EV_t_l3_k0 = _mm_hadd_pd( EV_t_l3_k0, EV_t_l3_k2 );
-
-              EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l3_k0 );
-
-              values[j * 2]     = EV_t_l0_k0;
-              values[j * 2 + 1] = EV_t_l2_k0;                              
-
-              maxv = _mm_max_pd(maxv, _mm_and_pd(EV_t_l0_k0, absMask.m));
-              maxv = _mm_max_pd(maxv, _mm_and_pd(EV_t_l2_k0, absMask.m));                  
-            }
-          }
-
-
-          _mm_store_pd(maxima, maxv);
-
-          max = PLL_MAX(maxima[0], maxima[1]);
-
-          if(max < PLL_MINLIKELIHOOD)
-          {
-            __m128d sv = _mm_set1_pd(PLL_TWOTOTHE256);
-
-            _mm_store_pd(&x3[0], _mm_mul_pd(values[0], sv));       
-            _mm_store_pd(&x3[2], _mm_mul_pd(values[1], sv));
-            _mm_store_pd(&x3[4], _mm_mul_pd(values[2], sv));
-            _mm_store_pd(&x3[6], _mm_mul_pd(values[3], sv));
-            _mm_store_pd(&x3[8], _mm_mul_pd(values[4], sv));       
-            _mm_store_pd(&x3[10], _mm_mul_pd(values[5], sv));
-            _mm_store_pd(&x3[12], _mm_mul_pd(values[6], sv));
-            _mm_store_pd(&x3[14], _mm_mul_pd(values[7], sv));        
-
-             if(!fastScaling)
-               ex3[i] += 1;
-             else
-               addScale += wgt[i];
-
-          }
-          else
-          {
-            _mm_store_pd(&x3[0], values[0]);       
-            _mm_store_pd(&x3[2], values[1]);
-            _mm_store_pd(&x3[4], values[2]);
-            _mm_store_pd(&x3[6], values[3]);
-            _mm_store_pd(&x3[8], values[4]);       
-            _mm_store_pd(&x3[10], values[5]);
-            _mm_store_pd(&x3[12], values[6]);
-            _mm_store_pd(&x3[14], values[7]);
-          }
-        }
-      }
-      break;
-    case PLL_INNER_INNER:
-
-      for (i = 0; i < n; i++)
-      {
-        __m128d maxv =_mm_setzero_pd();
-
-
-        x1 = &x1_start[i * 16];
-        x2 = &x2_start[i * 16];
-        x3 = &x3_start[i * 16];
-
-        for (j = 0; j < 4; j++)
-        {
-
-          double *x1_p = &x1[j*4];
-          double *left_k0_p = &left[j*16];
-          double *left_k1_p = &left[j*16 + 1*4];
-          double *left_k2_p = &left[j*16 + 2*4];
-          double *left_k3_p = &left[j*16 + 3*4];
-
-          __m128d x1_0 = _mm_load_pd( &x1_p[0] );
-          __m128d x1_2 = _mm_load_pd( &x1_p[2] );
-
-          __m128d left_k0_0 = _mm_load_pd( &left_k0_p[0] );
-          __m128d left_k0_2 = _mm_load_pd( &left_k0_p[2] );
-          __m128d left_k1_0 = _mm_load_pd( &left_k1_p[0] );
-          __m128d left_k1_2 = _mm_load_pd( &left_k1_p[2] );
-          __m128d left_k2_0 = _mm_load_pd( &left_k2_p[0] );
-          __m128d left_k2_2 = _mm_load_pd( &left_k2_p[2] );
-          __m128d left_k3_0 = _mm_load_pd( &left_k3_p[0] );
-          __m128d left_k3_2 = _mm_load_pd( &left_k3_p[2] );
-
-          left_k0_0 = _mm_mul_pd(x1_0, left_k0_0);
-          left_k0_2 = _mm_mul_pd(x1_2, left_k0_2);
-
-          left_k1_0 = _mm_mul_pd(x1_0, left_k1_0);
-          left_k1_2 = _mm_mul_pd(x1_2, left_k1_2);
-
-          left_k0_0 = _mm_hadd_pd( left_k0_0, left_k0_2 );
-          left_k1_0 = _mm_hadd_pd( left_k1_0, left_k1_2);
-          left_k0_0 = _mm_hadd_pd( left_k0_0, left_k1_0);
-
-          left_k2_0 = _mm_mul_pd(x1_0, left_k2_0);
-          left_k2_2 = _mm_mul_pd(x1_2, left_k2_2);
-
-          left_k3_0 = _mm_mul_pd(x1_0, left_k3_0);
-          left_k3_2 = _mm_mul_pd(x1_2, left_k3_2);
-
-          left_k2_0 = _mm_hadd_pd( left_k2_0, left_k2_2);
-          left_k3_0 = _mm_hadd_pd( left_k3_0, left_k3_2);
-          left_k2_0 = _mm_hadd_pd( left_k2_0, left_k3_0);
-
-
-          //
-          // multiply/add right side
-          //
-          double *x2_p = &x2[j*4];
-          double *right_k0_p = &right[j*16];
-          double *right_k1_p = &right[j*16 + 1*4];
-          double *right_k2_p = &right[j*16 + 2*4];
-          double *right_k3_p = &right[j*16 + 3*4];
-          __m128d x2_0 = _mm_load_pd( &x2_p[0] );
-          __m128d x2_2 = _mm_load_pd( &x2_p[2] );
-
-          __m128d right_k0_0 = _mm_load_pd( &right_k0_p[0] );
-          __m128d right_k0_2 = _mm_load_pd( &right_k0_p[2] );
-          __m128d right_k1_0 = _mm_load_pd( &right_k1_p[0] );
-          __m128d right_k1_2 = _mm_load_pd( &right_k1_p[2] );
-          __m128d right_k2_0 = _mm_load_pd( &right_k2_p[0] );
-          __m128d right_k2_2 = _mm_load_pd( &right_k2_p[2] );
-          __m128d right_k3_0 = _mm_load_pd( &right_k3_p[0] );
-          __m128d right_k3_2 = _mm_load_pd( &right_k3_p[2] );
-
-          right_k0_0 = _mm_mul_pd( x2_0, right_k0_0);
-          right_k0_2 = _mm_mul_pd( x2_2, right_k0_2);
-
-          right_k1_0 = _mm_mul_pd( x2_0, right_k1_0);
-          right_k1_2 = _mm_mul_pd( x2_2, right_k1_2);
-
-          right_k0_0 = _mm_hadd_pd( right_k0_0, right_k0_2);
-          right_k1_0 = _mm_hadd_pd( right_k1_0, right_k1_2);
-          right_k0_0 = _mm_hadd_pd( right_k0_0, right_k1_0);
-
-          right_k2_0 = _mm_mul_pd( x2_0, right_k2_0);
-          right_k2_2 = _mm_mul_pd( x2_2, right_k2_2);
-
-
-          right_k3_0 = _mm_mul_pd( x2_0, right_k3_0);
-          right_k3_2 = _mm_mul_pd( x2_2, right_k3_2);
-
-          right_k2_0 = _mm_hadd_pd( right_k2_0, right_k2_2);
-          right_k3_0 = _mm_hadd_pd( right_k3_0, right_k3_2);
-          right_k2_0 = _mm_hadd_pd( right_k2_0, right_k3_0);       
-
-          //
-          // multiply left * right
-          //
-
-          __m128d x1px2_k0 = _mm_mul_pd( left_k0_0, right_k0_0 );
-          __m128d x1px2_k2 = _mm_mul_pd( left_k2_0, right_k2_0 );
-
-
-          //
-          // multiply with EV matrix (!?)
-          //         
-
-          __m128d EV_t_l0_k0 = EVV[0];
-          __m128d EV_t_l0_k2 = EVV[1];
-          __m128d EV_t_l1_k0 = EVV[2];
-          __m128d EV_t_l1_k2 = EVV[3];
-          __m128d EV_t_l2_k0 = EVV[4];
-          __m128d EV_t_l2_k2 = EVV[5];
-          __m128d EV_t_l3_k0 = EVV[6]; 
-          __m128d EV_t_l3_k2 = EVV[7];
-
-
-          EV_t_l0_k0 = _mm_mul_pd( x1px2_k0, EV_t_l0_k0 );
-          EV_t_l0_k2 = _mm_mul_pd( x1px2_k2, EV_t_l0_k2 );
-          EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l0_k2 );
-
-          EV_t_l1_k0 = _mm_mul_pd( x1px2_k0, EV_t_l1_k0 );
-          EV_t_l1_k2 = _mm_mul_pd( x1px2_k2, EV_t_l1_k2 );
-
-          EV_t_l1_k0 = _mm_hadd_pd( EV_t_l1_k0, EV_t_l1_k2 );
-          EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l1_k0 );
-
-          EV_t_l2_k0 = _mm_mul_pd( x1px2_k0, EV_t_l2_k0 );
-          EV_t_l2_k2 = _mm_mul_pd( x1px2_k2, EV_t_l2_k2 );
-          EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l2_k2 );
-
-
-          EV_t_l3_k0 = _mm_mul_pd( x1px2_k0, EV_t_l3_k0 );
-          EV_t_l3_k2 = _mm_mul_pd( x1px2_k2, EV_t_l3_k2 );
-          EV_t_l3_k0 = _mm_hadd_pd( EV_t_l3_k0, EV_t_l3_k2 );
-
-          EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l3_k0 );
-
-
-          values[j * 2] = EV_t_l0_k0;
-          values[j * 2 + 1] = EV_t_l2_k0;                           
-
-          maxv = _mm_max_pd(maxv, _mm_and_pd(EV_t_l0_k0, absMask.m));
-          maxv = _mm_max_pd(maxv, _mm_and_pd(EV_t_l2_k0, absMask.m));
-        }
-
-
-        _mm_store_pd(maxima, maxv);
-
-        max = PLL_MAX(maxima[0], maxima[1]);
-
-        if(max < PLL_MINLIKELIHOOD)
-        {
-          __m128d sv = _mm_set1_pd(PLL_TWOTOTHE256);
-
-          _mm_store_pd(&x3[0], _mm_mul_pd(values[0], sv));         
-          _mm_store_pd(&x3[2], _mm_mul_pd(values[1], sv));
-          _mm_store_pd(&x3[4], _mm_mul_pd(values[2], sv));
-          _mm_store_pd(&x3[6], _mm_mul_pd(values[3], sv));
-          _mm_store_pd(&x3[8], _mm_mul_pd(values[4], sv));         
-          _mm_store_pd(&x3[10], _mm_mul_pd(values[5], sv));
-          _mm_store_pd(&x3[12], _mm_mul_pd(values[6], sv));
-          _mm_store_pd(&x3[14], _mm_mul_pd(values[7], sv));          
-
-           if(!fastScaling)
-             ex3[i] += 1;
-           else
-             addScale += wgt[i];        
-        }
-        else
-        {
-          _mm_store_pd(&x3[0], values[0]);         
-          _mm_store_pd(&x3[2], values[1]);
-          _mm_store_pd(&x3[4], values[2]);
-          _mm_store_pd(&x3[6], values[3]);
-          _mm_store_pd(&x3[8], values[4]);         
-          _mm_store_pd(&x3[10], values[5]);
-          _mm_store_pd(&x3[12], values[6]);
-          _mm_store_pd(&x3[14], values[7]);
-        }        
-      }
-
-      break;
-    default:
-      assert(0);
-  }
-
-  if(fastScaling)
-    *scalerIncrement = addScale;
-}
-
-
-/** @ingroup group1
- *  @brief Computation of conditional likelihood arrray for GTR CAT (Optimized SSE3 version for DNA data)
-
-    This is the SSE3 optimized version of ::newviewCAT_FLEX for computing the conditional
-    likelihood arrays at some node \a p, given child nodes \a q and \a r using the \b CAT
-    model of rate heterogeneity.
-
-    @note
-    For more details and function argument description check the function ::newviewCAT_FLEX
-*/
-static void newviewGTRCAT( int tipCase,  double *EV,  int *cptr,
-                           double *x1_start, double *x2_start,  double *x3_start, double *tipVector,
-                           int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                           int n,  double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling)
-{
-  double
-    *le,
-    *ri,
-    *x1,
-    *x2, 
-    *x3;
-  PLL_ALIGN_BEGIN double
-    EV_t[16] PLL_ALIGN_END;
-
-  int 
-    i, 
-    j, 
-    scale, 
-    addScale = 0;
-
-  __m128d
-    minlikelihood_sse = _mm_set1_pd( PLL_MINLIKELIHOOD ),
-                      sc = _mm_set1_pd(PLL_TWOTOTHE256),
-                      EVV[8];  
-
-  for(i = 0; i < 4; i++)
-    for (j=0; j < 4; j++)
-      EV_t[4 * j + i] = EV[4 * i + j];
-
-  for(i = 0; i < 8; i++)
-    EVV[i] = _mm_load_pd(&EV_t[i * 2]);
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:      
-      for (i = 0; i < n; i++)
-      {  
-        x1 = &(tipVector[4 * tipX1[i]]);
-        x2 = &(tipVector[4 * tipX2[i]]);
-
-        x3 = &x3_start[i * 4];
-
-        le =  &left[cptr[i] * 16];
-        ri =  &right[cptr[i] * 16];
-
-        __m128d x1_0 = _mm_load_pd( &x1[0] );
-        __m128d x1_2 = _mm_load_pd( &x1[2] );
-
-        __m128d left_k0_0 = _mm_load_pd( &le[0] );
-        __m128d left_k0_2 = _mm_load_pd( &le[2] );
-        __m128d left_k1_0 = _mm_load_pd( &le[4] );
-        __m128d left_k1_2 = _mm_load_pd( &le[6] );
-        __m128d left_k2_0 = _mm_load_pd( &le[8] );
-        __m128d left_k2_2 = _mm_load_pd( &le[10] );
-        __m128d left_k3_0 = _mm_load_pd( &le[12] );
-        __m128d left_k3_2 = _mm_load_pd( &le[14] );
-
-        left_k0_0 = _mm_mul_pd(x1_0, left_k0_0);
-        left_k0_2 = _mm_mul_pd(x1_2, left_k0_2);
-
-        left_k1_0 = _mm_mul_pd(x1_0, left_k1_0);
-        left_k1_2 = _mm_mul_pd(x1_2, left_k1_2);
-
-        left_k0_0 = _mm_hadd_pd( left_k0_0, left_k0_2 );
-        left_k1_0 = _mm_hadd_pd( left_k1_0, left_k1_2);
-        left_k0_0 = _mm_hadd_pd( left_k0_0, left_k1_0);
-
-        left_k2_0 = _mm_mul_pd(x1_0, left_k2_0);
-        left_k2_2 = _mm_mul_pd(x1_2, left_k2_2);
-
-        left_k3_0 = _mm_mul_pd(x1_0, left_k3_0);
-        left_k3_2 = _mm_mul_pd(x1_2, left_k3_2);
-
-        left_k2_0 = _mm_hadd_pd( left_k2_0, left_k2_2);
-        left_k3_0 = _mm_hadd_pd( left_k3_0, left_k3_2);
-        left_k2_0 = _mm_hadd_pd( left_k2_0, left_k3_0);
-
-        __m128d x2_0 = _mm_load_pd( &x2[0] );
-        __m128d x2_2 = _mm_load_pd( &x2[2] );
-
-        __m128d right_k0_0 = _mm_load_pd( &ri[0] );
-        __m128d right_k0_2 = _mm_load_pd( &ri[2] );
-        __m128d right_k1_0 = _mm_load_pd( &ri[4] );
-        __m128d right_k1_2 = _mm_load_pd( &ri[6] );
-        __m128d right_k2_0 = _mm_load_pd( &ri[8] );
-        __m128d right_k2_2 = _mm_load_pd( &ri[10] );
-        __m128d right_k3_0 = _mm_load_pd( &ri[12] );
-        __m128d right_k3_2 = _mm_load_pd( &ri[14] );
-
-        right_k0_0 = _mm_mul_pd( x2_0, right_k0_0);
-        right_k0_2 = _mm_mul_pd( x2_2, right_k0_2);
-
-        right_k1_0 = _mm_mul_pd( x2_0, right_k1_0);
-        right_k1_2 = _mm_mul_pd( x2_2, right_k1_2);
-
-        right_k0_0 = _mm_hadd_pd( right_k0_0, right_k0_2);
-        right_k1_0 = _mm_hadd_pd( right_k1_0, right_k1_2);
-        right_k0_0 = _mm_hadd_pd( right_k0_0, right_k1_0);
-
-        right_k2_0 = _mm_mul_pd( x2_0, right_k2_0);
-        right_k2_2 = _mm_mul_pd( x2_2, right_k2_2);
-
-        right_k3_0 = _mm_mul_pd( x2_0, right_k3_0);
-        right_k3_2 = _mm_mul_pd( x2_2, right_k3_2);
-
-        right_k2_0 = _mm_hadd_pd( right_k2_0, right_k2_2);
-        right_k3_0 = _mm_hadd_pd( right_k3_0, right_k3_2);
-        right_k2_0 = _mm_hadd_pd( right_k2_0, right_k3_0);         
-
-        __m128d x1px2_k0 = _mm_mul_pd( left_k0_0, right_k0_0 );
-        __m128d x1px2_k2 = _mm_mul_pd( left_k2_0, right_k2_0 );           
-
-        __m128d EV_t_l0_k0 = EVV[0];
-        __m128d EV_t_l0_k2 = EVV[1];
-        __m128d EV_t_l1_k0 = EVV[2];
-        __m128d EV_t_l1_k2 = EVV[3];
-        __m128d EV_t_l2_k0 = EVV[4];
-        __m128d EV_t_l2_k2 = EVV[5];
-        __m128d EV_t_l3_k0 = EVV[6];
-        __m128d EV_t_l3_k2 = EVV[7];
-
-        EV_t_l0_k0 = _mm_mul_pd( x1px2_k0, EV_t_l0_k0 );
-        EV_t_l0_k2 = _mm_mul_pd( x1px2_k2, EV_t_l0_k2 );
-        EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l0_k2 );
-
-        EV_t_l1_k0 = _mm_mul_pd( x1px2_k0, EV_t_l1_k0 );
-        EV_t_l1_k2 = _mm_mul_pd( x1px2_k2, EV_t_l1_k2 );
-
-        EV_t_l1_k0 = _mm_hadd_pd( EV_t_l1_k0, EV_t_l1_k2 );
-        EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l1_k0 );
-
-        EV_t_l2_k0 = _mm_mul_pd( x1px2_k0, EV_t_l2_k0 );
-        EV_t_l2_k2 = _mm_mul_pd( x1px2_k2, EV_t_l2_k2 );
-        EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l2_k2 );
-
-        EV_t_l3_k0 = _mm_mul_pd( x1px2_k0, EV_t_l3_k0 );
-        EV_t_l3_k2 = _mm_mul_pd( x1px2_k2, EV_t_l3_k2 );
-        EV_t_l3_k0 = _mm_hadd_pd( EV_t_l3_k0, EV_t_l3_k2 );
-
-        EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l3_k0 );      
-
-        _mm_store_pd(x3, EV_t_l0_k0);
-        _mm_store_pd(&x3[2], EV_t_l2_k0);                                   
-      }
-      break;
-    case PLL_TIP_INNER:      
-      for (i = 0; i < n; i++)
-      {
-        x1 = &(tipVector[4 * tipX1[i]]);
-        x2 = &x2_start[4 * i];
-        x3 = &x3_start[4 * i];
-
-        le =  &left[cptr[i] * 16];
-        ri =  &right[cptr[i] * 16];
-
-        __m128d x1_0 = _mm_load_pd( &x1[0] );
-        __m128d x1_2 = _mm_load_pd( &x1[2] );
-
-        __m128d left_k0_0 = _mm_load_pd( &le[0] );
-        __m128d left_k0_2 = _mm_load_pd( &le[2] );
-        __m128d left_k1_0 = _mm_load_pd( &le[4] );
-        __m128d left_k1_2 = _mm_load_pd( &le[6] );
-        __m128d left_k2_0 = _mm_load_pd( &le[8] );
-        __m128d left_k2_2 = _mm_load_pd( &le[10] );
-        __m128d left_k3_0 = _mm_load_pd( &le[12] );
-        __m128d left_k3_2 = _mm_load_pd( &le[14] );
-
-        left_k0_0 = _mm_mul_pd(x1_0, left_k0_0);
-        left_k0_2 = _mm_mul_pd(x1_2, left_k0_2);
-
-        left_k1_0 = _mm_mul_pd(x1_0, left_k1_0);
-        left_k1_2 = _mm_mul_pd(x1_2, left_k1_2);
-
-        left_k0_0 = _mm_hadd_pd( left_k0_0, left_k0_2 );
-        left_k1_0 = _mm_hadd_pd( left_k1_0, left_k1_2);
-        left_k0_0 = _mm_hadd_pd( left_k0_0, left_k1_0);
-
-        left_k2_0 = _mm_mul_pd(x1_0, left_k2_0);
-        left_k2_2 = _mm_mul_pd(x1_2, left_k2_2);
-
-        left_k3_0 = _mm_mul_pd(x1_0, left_k3_0);
-        left_k3_2 = _mm_mul_pd(x1_2, left_k3_2);
-
-        left_k2_0 = _mm_hadd_pd( left_k2_0, left_k2_2);
-        left_k3_0 = _mm_hadd_pd( left_k3_0, left_k3_2);
-        left_k2_0 = _mm_hadd_pd( left_k2_0, left_k3_0);
-
-        __m128d x2_0 = _mm_load_pd( &x2[0] );
-        __m128d x2_2 = _mm_load_pd( &x2[2] );
-
-        __m128d right_k0_0 = _mm_load_pd( &ri[0] );
-        __m128d right_k0_2 = _mm_load_pd( &ri[2] );
-        __m128d right_k1_0 = _mm_load_pd( &ri[4] );
-        __m128d right_k1_2 = _mm_load_pd( &ri[6] );
-        __m128d right_k2_0 = _mm_load_pd( &ri[8] );
-        __m128d right_k2_2 = _mm_load_pd( &ri[10] );
-        __m128d right_k3_0 = _mm_load_pd( &ri[12] );
-        __m128d right_k3_2 = _mm_load_pd( &ri[14] );
-
-        right_k0_0 = _mm_mul_pd( x2_0, right_k0_0);
-        right_k0_2 = _mm_mul_pd( x2_2, right_k0_2);
-
-        right_k1_0 = _mm_mul_pd( x2_0, right_k1_0);
-        right_k1_2 = _mm_mul_pd( x2_2, right_k1_2);
-
-        right_k0_0 = _mm_hadd_pd( right_k0_0, right_k0_2);
-        right_k1_0 = _mm_hadd_pd( right_k1_0, right_k1_2);
-        right_k0_0 = _mm_hadd_pd( right_k0_0, right_k1_0);
-
-        right_k2_0 = _mm_mul_pd( x2_0, right_k2_0);
-        right_k2_2 = _mm_mul_pd( x2_2, right_k2_2);
-
-        right_k3_0 = _mm_mul_pd( x2_0, right_k3_0);
-        right_k3_2 = _mm_mul_pd( x2_2, right_k3_2);
-
-        right_k2_0 = _mm_hadd_pd( right_k2_0, right_k2_2);
-        right_k3_0 = _mm_hadd_pd( right_k3_0, right_k3_2);
-        right_k2_0 = _mm_hadd_pd( right_k2_0, right_k3_0);         
-
-        __m128d x1px2_k0 = _mm_mul_pd( left_k0_0, right_k0_0 );
-        __m128d x1px2_k2 = _mm_mul_pd( left_k2_0, right_k2_0 );
-
-        __m128d EV_t_l0_k0 = EVV[0];
-        __m128d EV_t_l0_k2 = EVV[1];
-        __m128d EV_t_l1_k0 = EVV[2];
-        __m128d EV_t_l1_k2 = EVV[3];
-        __m128d EV_t_l2_k0 = EVV[4];
-        __m128d EV_t_l2_k2 = EVV[5];
-        __m128d EV_t_l3_k0 = EVV[6];
-        __m128d EV_t_l3_k2 = EVV[7];
-
-
-        EV_t_l0_k0 = _mm_mul_pd( x1px2_k0, EV_t_l0_k0 );
-        EV_t_l0_k2 = _mm_mul_pd( x1px2_k2, EV_t_l0_k2 );
-        EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l0_k2 );
-
-        EV_t_l1_k0 = _mm_mul_pd( x1px2_k0, EV_t_l1_k0 );
-        EV_t_l1_k2 = _mm_mul_pd( x1px2_k2, EV_t_l1_k2 );
-
-        EV_t_l1_k0 = _mm_hadd_pd( EV_t_l1_k0, EV_t_l1_k2 );
-        EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l1_k0 );
-
-        EV_t_l2_k0 = _mm_mul_pd( x1px2_k0, EV_t_l2_k0 );
-        EV_t_l2_k2 = _mm_mul_pd( x1px2_k2, EV_t_l2_k2 );
-        EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l2_k2 );
-
-        EV_t_l3_k0 = _mm_mul_pd( x1px2_k0, EV_t_l3_k0 );
-        EV_t_l3_k2 = _mm_mul_pd( x1px2_k2, EV_t_l3_k2 );
-        EV_t_l3_k0 = _mm_hadd_pd( EV_t_l3_k0, EV_t_l3_k2 );
-
-        EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l3_k0 );                                       
-
-        scale = 1;
-
-        __m128d v1 = _mm_and_pd(EV_t_l0_k0, absMask.m);
-        v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-        if(_mm_movemask_pd( v1 ) != 3)
-          scale = 0;
-        else
-        {
-          v1 = _mm_and_pd(EV_t_l2_k0, absMask.m);
-          v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-          if(_mm_movemask_pd( v1 ) != 3)
-            scale = 0;
-        }
-
-        if(scale)
-        {                     
-          _mm_store_pd(&x3[0], _mm_mul_pd(EV_t_l0_k0, sc));
-          _mm_store_pd(&x3[2], _mm_mul_pd(EV_t_l2_k0, sc));                   
-
-           if(!fastScaling)
-             ex3[i] += 1;
-           else
-             addScale += wgt[i];          
-        }       
-        else
-        {
-          _mm_store_pd(x3, EV_t_l0_k0);
-          _mm_store_pd(&x3[2], EV_t_l2_k0);
-        }
-
-
-      }
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-      {
-        x1 = &x1_start[4 * i];
-        x2 = &x2_start[4 * i];
-        x3 = &x3_start[4 * i];
-
-        le =  &left[cptr[i] * 16];
-        ri =  &right[cptr[i] * 16];
-
-        __m128d x1_0 = _mm_load_pd( &x1[0] );
-        __m128d x1_2 = _mm_load_pd( &x1[2] );
-
-        __m128d left_k0_0 = _mm_load_pd( &le[0] );
-        __m128d left_k0_2 = _mm_load_pd( &le[2] );
-        __m128d left_k1_0 = _mm_load_pd( &le[4] );
-        __m128d left_k1_2 = _mm_load_pd( &le[6] );
-        __m128d left_k2_0 = _mm_load_pd( &le[8] );
-        __m128d left_k2_2 = _mm_load_pd( &le[10] );
-        __m128d left_k3_0 = _mm_load_pd( &le[12] );
-        __m128d left_k3_2 = _mm_load_pd( &le[14] );
-
-        left_k0_0 = _mm_mul_pd(x1_0, left_k0_0);
-        left_k0_2 = _mm_mul_pd(x1_2, left_k0_2);
-
-        left_k1_0 = _mm_mul_pd(x1_0, left_k1_0);
-        left_k1_2 = _mm_mul_pd(x1_2, left_k1_2);
-
-        left_k0_0 = _mm_hadd_pd( left_k0_0, left_k0_2 );
-        left_k1_0 = _mm_hadd_pd( left_k1_0, left_k1_2);
-        left_k0_0 = _mm_hadd_pd( left_k0_0, left_k1_0);
-
-        left_k2_0 = _mm_mul_pd(x1_0, left_k2_0);
-        left_k2_2 = _mm_mul_pd(x1_2, left_k2_2);
-
-        left_k3_0 = _mm_mul_pd(x1_0, left_k3_0);
-        left_k3_2 = _mm_mul_pd(x1_2, left_k3_2);
-
-        left_k2_0 = _mm_hadd_pd( left_k2_0, left_k2_2);
-        left_k3_0 = _mm_hadd_pd( left_k3_0, left_k3_2);
-        left_k2_0 = _mm_hadd_pd( left_k2_0, left_k3_0);
-
-        __m128d x2_0 = _mm_load_pd( &x2[0] );
-        __m128d x2_2 = _mm_load_pd( &x2[2] );
-
-        __m128d right_k0_0 = _mm_load_pd( &ri[0] );
-        __m128d right_k0_2 = _mm_load_pd( &ri[2] );
-        __m128d right_k1_0 = _mm_load_pd( &ri[4] );
-        __m128d right_k1_2 = _mm_load_pd( &ri[6] );
-        __m128d right_k2_0 = _mm_load_pd( &ri[8] );
-        __m128d right_k2_2 = _mm_load_pd( &ri[10] );
-        __m128d right_k3_0 = _mm_load_pd( &ri[12] );
-        __m128d right_k3_2 = _mm_load_pd( &ri[14] );
-
-        right_k0_0 = _mm_mul_pd( x2_0, right_k0_0);
-        right_k0_2 = _mm_mul_pd( x2_2, right_k0_2);
-
-        right_k1_0 = _mm_mul_pd( x2_0, right_k1_0);
-        right_k1_2 = _mm_mul_pd( x2_2, right_k1_2);
-
-        right_k0_0 = _mm_hadd_pd( right_k0_0, right_k0_2);
-        right_k1_0 = _mm_hadd_pd( right_k1_0, right_k1_2);
-        right_k0_0 = _mm_hadd_pd( right_k0_0, right_k1_0);
-
-        right_k2_0 = _mm_mul_pd( x2_0, right_k2_0);
-        right_k2_2 = _mm_mul_pd( x2_2, right_k2_2);
-
-        right_k3_0 = _mm_mul_pd( x2_0, right_k3_0);
-        right_k3_2 = _mm_mul_pd( x2_2, right_k3_2);
-
-        right_k2_0 = _mm_hadd_pd( right_k2_0, right_k2_2);
-        right_k3_0 = _mm_hadd_pd( right_k3_0, right_k3_2);
-        right_k2_0 = _mm_hadd_pd( right_k2_0, right_k3_0);         
-
-        __m128d x1px2_k0 = _mm_mul_pd( left_k0_0, right_k0_0 );
-        __m128d x1px2_k2 = _mm_mul_pd( left_k2_0, right_k2_0 );
-
-        __m128d EV_t_l0_k0 = EVV[0];
-        __m128d EV_t_l0_k2 = EVV[1];
-        __m128d EV_t_l1_k0 = EVV[2];
-        __m128d EV_t_l1_k2 = EVV[3];
-        __m128d EV_t_l2_k0 = EVV[4];
-        __m128d EV_t_l2_k2 = EVV[5];
-        __m128d EV_t_l3_k0 = EVV[6];
-        __m128d EV_t_l3_k2 = EVV[7];
-
-
-        EV_t_l0_k0 = _mm_mul_pd( x1px2_k0, EV_t_l0_k0 );
-        EV_t_l0_k2 = _mm_mul_pd( x1px2_k2, EV_t_l0_k2 );
-        EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l0_k2 );
-
-        EV_t_l1_k0 = _mm_mul_pd( x1px2_k0, EV_t_l1_k0 );
-        EV_t_l1_k2 = _mm_mul_pd( x1px2_k2, EV_t_l1_k2 );
-
-        EV_t_l1_k0 = _mm_hadd_pd( EV_t_l1_k0, EV_t_l1_k2 );
-        EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l1_k0 );
-
-        EV_t_l2_k0 = _mm_mul_pd( x1px2_k0, EV_t_l2_k0 );
-        EV_t_l2_k2 = _mm_mul_pd( x1px2_k2, EV_t_l2_k2 );
-        EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l2_k2 );
-
-        EV_t_l3_k0 = _mm_mul_pd( x1px2_k0, EV_t_l3_k0 );
-        EV_t_l3_k2 = _mm_mul_pd( x1px2_k2, EV_t_l3_k2 );
-        EV_t_l3_k0 = _mm_hadd_pd( EV_t_l3_k0, EV_t_l3_k2 );
-
-        EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l3_k0 );                                              
-
-        scale = 1;
-
-        __m128d v1 = _mm_and_pd(EV_t_l0_k0, absMask.m);
-        v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-        if(_mm_movemask_pd( v1 ) != 3)
-          scale = 0;
-        else
-        {
-          v1 = _mm_and_pd(EV_t_l2_k0, absMask.m);
-          v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-          if(_mm_movemask_pd( v1 ) != 3)
-            scale = 0;
-        }
-
-        if(scale)
-        {                     
-          _mm_store_pd(&x3[0], _mm_mul_pd(EV_t_l0_k0, sc));
-          _mm_store_pd(&x3[2], _mm_mul_pd(EV_t_l2_k0, sc));                   
-
-          if(!fastScaling)
-            ex3[i] += 1;
-          else
-            addScale += wgt[i];   
-        }       
-        else
-        {
-          _mm_store_pd(x3, EV_t_l0_k0);
-          _mm_store_pd(&x3[2], EV_t_l2_k0);
-        }
-
-      }
-      break;
-    default:
-      assert(0);
-  }
-
-  if(fastScaling)
-    *scalerIncrement = addScale;
-}
-#endif
-
-/** @brief Check whether the position \a pos in bitvector \a x is a gap
-    
-    @param x
-      A bitvector represented by unsigned integers
-
-    @param pos
-      Position to check in \a x if it is set (i.e. it is a gap) 
-
-    @return
-      Returns the value of the bit vector (\b 1 if set, \b 0 if not)
-*/
-//#ifndef __clang__
-//__inline
-//#endif
-pllBoolean isGap(unsigned int *x, int pos)
-{
-  return (x[pos / 32] & mask32[pos % 32]);
-}
-
-/** @brief Check whether the position \a pos in bitvector \a x is \b NOT a gap
-    
-    @param x
-      A bitvector represented by unsigned integers
-
-    @param pos
-      Position to check in \a x if it is \b NOT set (i.e. it is \b NOT a gap) 
-
-    @return
-      Returns the value of the bit vector (\b 1 if set, \b 0 if not)
-*/
-//#ifndef __clang__
-//__inline
-//#endif
-pllBoolean noGap(unsigned int *x, int pos)
-{
-  return (!(x[pos / 32] & mask32[pos % 32]));
-}
-
-#if (!defined(__AVX) && defined(__SSE3))
-/** @ingroup group1
- *  @brief Computation of conditional likelihood arrray for GTR CAT with memory saving (Optimized SSE3 version for DNA data)
-
-    This is the SSE3 optimized version of ::newviewCAT_FLEX for computing the conditional
-    likelihood arrays at some node \a p, given child nodes \a q and \a r using the \b CAT
-    model of rate heterogeneity. The memory saving technique is incorporated.
-
-    @note
-    For more details and function argument description check the function ::newviewCAT_FLEX
-*/
-static void newviewGTRCAT_SAVE( int tipCase,  double *EV,  int *cptr,
-                                double *x1_start, double *x2_start,  double *x3_start, double *tipVector,
-                                int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                int n,  double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling,
-                                unsigned int *x1_gap, unsigned int *x2_gap, unsigned int *x3_gap,
-                                double *x1_gapColumn, double *x2_gapColumn, double *x3_gapColumn, const int maxCats)
-{
-  double
-    *le,
-    *ri,
-    *x1,
-    *x2,
-    *x3,
-    *x1_ptr = x1_start,
-    *x2_ptr = x2_start, 
-    *x3_ptr = x3_start;
-  PLL_ALIGN_BEGIN double
-    EV_t[16] PLL_ALIGN_END;
-
-  int 
-    i, 
-    j, 
-    scale, 
-    scaleGap = 0,
-    addScale = 0;
-
-  __m128d
-    minlikelihood_sse = _mm_set1_pd( PLL_MINLIKELIHOOD ),
-                      sc = _mm_set1_pd(PLL_TWOTOTHE256),
-                      EVV[8];  
-
-  for(i = 0; i < 4; i++)
-    for (j=0; j < 4; j++)
-      EV_t[4 * j + i] = EV[4 * i + j];
-
-  for(i = 0; i < 8; i++)
-    EVV[i] = _mm_load_pd(&EV_t[i * 2]);
-
-  {
-    x1 = x1_gapColumn;        
-    x2 = x2_gapColumn;
-    x3 = x3_gapColumn;
-
-    le =  &left[maxCats * 16];           
-    ri =  &right[maxCats * 16];                                                  
-
-    __m128d x1_0 = _mm_load_pd( &x1[0] );
-    __m128d x1_2 = _mm_load_pd( &x1[2] );
-
-    __m128d left_k0_0 = _mm_load_pd( &le[0] );
-    __m128d left_k0_2 = _mm_load_pd( &le[2] );
-    __m128d left_k1_0 = _mm_load_pd( &le[4] );
-    __m128d left_k1_2 = _mm_load_pd( &le[6] );
-    __m128d left_k2_0 = _mm_load_pd( &le[8] );
-    __m128d left_k2_2 = _mm_load_pd( &le[10] );
-    __m128d left_k3_0 = _mm_load_pd( &le[12] );
-    __m128d left_k3_2 = _mm_load_pd( &le[14] );
-
-    left_k0_0 = _mm_mul_pd(x1_0, left_k0_0);
-    left_k0_2 = _mm_mul_pd(x1_2, left_k0_2);
-
-    left_k1_0 = _mm_mul_pd(x1_0, left_k1_0);
-    left_k1_2 = _mm_mul_pd(x1_2, left_k1_2);
-
-    left_k0_0 = _mm_hadd_pd( left_k0_0, left_k0_2 );
-    left_k1_0 = _mm_hadd_pd( left_k1_0, left_k1_2);
-    left_k0_0 = _mm_hadd_pd( left_k0_0, left_k1_0);
-
-    left_k2_0 = _mm_mul_pd(x1_0, left_k2_0);
-    left_k2_2 = _mm_mul_pd(x1_2, left_k2_2);
-
-    left_k3_0 = _mm_mul_pd(x1_0, left_k3_0);
-    left_k3_2 = _mm_mul_pd(x1_2, left_k3_2);
-
-    left_k2_0 = _mm_hadd_pd( left_k2_0, left_k2_2);
-    left_k3_0 = _mm_hadd_pd( left_k3_0, left_k3_2);
-    left_k2_0 = _mm_hadd_pd( left_k2_0, left_k3_0);
-
-    __m128d x2_0 = _mm_load_pd( &x2[0] );
-    __m128d x2_2 = _mm_load_pd( &x2[2] );
-
-    __m128d right_k0_0 = _mm_load_pd( &ri[0] );
-    __m128d right_k0_2 = _mm_load_pd( &ri[2] );
-    __m128d right_k1_0 = _mm_load_pd( &ri[4] );
-    __m128d right_k1_2 = _mm_load_pd( &ri[6] );
-    __m128d right_k2_0 = _mm_load_pd( &ri[8] );
-    __m128d right_k2_2 = _mm_load_pd( &ri[10] );
-    __m128d right_k3_0 = _mm_load_pd( &ri[12] );
-    __m128d right_k3_2 = _mm_load_pd( &ri[14] );
-
-    right_k0_0 = _mm_mul_pd( x2_0, right_k0_0);
-    right_k0_2 = _mm_mul_pd( x2_2, right_k0_2);
-
-    right_k1_0 = _mm_mul_pd( x2_0, right_k1_0);
-    right_k1_2 = _mm_mul_pd( x2_2, right_k1_2);
-
-    right_k0_0 = _mm_hadd_pd( right_k0_0, right_k0_2);
-    right_k1_0 = _mm_hadd_pd( right_k1_0, right_k1_2);
-    right_k0_0 = _mm_hadd_pd( right_k0_0, right_k1_0);
-
-    right_k2_0 = _mm_mul_pd( x2_0, right_k2_0);
-    right_k2_2 = _mm_mul_pd( x2_2, right_k2_2);
-
-    right_k3_0 = _mm_mul_pd( x2_0, right_k3_0);
-    right_k3_2 = _mm_mul_pd( x2_2, right_k3_2);
-
-    right_k2_0 = _mm_hadd_pd( right_k2_0, right_k2_2);
-    right_k3_0 = _mm_hadd_pd( right_k3_0, right_k3_2);
-    right_k2_0 = _mm_hadd_pd( right_k2_0, right_k3_0);     
-
-    __m128d x1px2_k0 = _mm_mul_pd( left_k0_0, right_k0_0 );
-    __m128d x1px2_k2 = _mm_mul_pd( left_k2_0, right_k2_0 );
-
-    __m128d EV_t_l0_k0 = EVV[0];
-    __m128d EV_t_l0_k2 = EVV[1];
-    __m128d EV_t_l1_k0 = EVV[2];
-    __m128d EV_t_l1_k2 = EVV[3];
-    __m128d EV_t_l2_k0 = EVV[4];
-    __m128d EV_t_l2_k2 = EVV[5];
-    __m128d EV_t_l3_k0 = EVV[6];
-    __m128d EV_t_l3_k2 = EVV[7];
-
-    EV_t_l0_k0 = _mm_mul_pd( x1px2_k0, EV_t_l0_k0 );
-    EV_t_l0_k2 = _mm_mul_pd( x1px2_k2, EV_t_l0_k2 );
-    EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l0_k2 );
-
-    EV_t_l1_k0 = _mm_mul_pd( x1px2_k0, EV_t_l1_k0 );
-    EV_t_l1_k2 = _mm_mul_pd( x1px2_k2, EV_t_l1_k2 );
-
-    EV_t_l1_k0 = _mm_hadd_pd( EV_t_l1_k0, EV_t_l1_k2 );
-    EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l1_k0 );
-
-    EV_t_l2_k0 = _mm_mul_pd( x1px2_k0, EV_t_l2_k0 );
-    EV_t_l2_k2 = _mm_mul_pd( x1px2_k2, EV_t_l2_k2 );
-    EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l2_k2 );
-
-    EV_t_l3_k0 = _mm_mul_pd( x1px2_k0, EV_t_l3_k0 );
-    EV_t_l3_k2 = _mm_mul_pd( x1px2_k2, EV_t_l3_k2 );
-    EV_t_l3_k0 = _mm_hadd_pd( EV_t_l3_k0, EV_t_l3_k2 );
-
-    EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l3_k0 );                                   
-
-    if(tipCase != PLL_TIP_TIP)
-    {    
-      scale = 1;
-
-      __m128d v1 = _mm_and_pd(EV_t_l0_k0, absMask.m);
-      v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-      if(_mm_movemask_pd( v1 ) != 3)
-        scale = 0;
-      else
-      {
-        v1 = _mm_and_pd(EV_t_l2_k0, absMask.m);
-        v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-        if(_mm_movemask_pd( v1 ) != 3)
-          scale = 0;
-      }
-
-      if(scale)
-      {               
-        _mm_store_pd(&x3[0], _mm_mul_pd(EV_t_l0_k0, sc));
-        _mm_store_pd(&x3[2], _mm_mul_pd(EV_t_l2_k0, sc));                     
-
-        scaleGap = PLL_TRUE;       
-      } 
-      else
-      {
-        _mm_store_pd(x3, EV_t_l0_k0);
-        _mm_store_pd(&x3[2], EV_t_l2_k0);
-      }
-    }
-    else
-    {
-      _mm_store_pd(x3, EV_t_l0_k0);
-      _mm_store_pd(&x3[2], EV_t_l2_k0);
-    }
-  }
-
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:      
-      for (i = 0; i < n; i++)
-      {
-        if(noGap(x3_gap, i))
-        {
-          x1 = &(tipVector[4 * tipX1[i]]);
-          x2 = &(tipVector[4 * tipX2[i]]);
-
-          x3 = x3_ptr;
-
-          if(isGap(x1_gap, i))
-            le =  &left[maxCats * 16];
-          else            
-            le =  &left[cptr[i] * 16];    
-
-          if(isGap(x2_gap, i))
-            ri =  &right[maxCats * 16];
-          else            
-            ri =  &right[cptr[i] * 16];
-
-          __m128d x1_0 = _mm_load_pd( &x1[0] );
-          __m128d x1_2 = _mm_load_pd( &x1[2] );
-
-          __m128d left_k0_0 = _mm_load_pd( &le[0] );
-          __m128d left_k0_2 = _mm_load_pd( &le[2] );
-          __m128d left_k1_0 = _mm_load_pd( &le[4] );
-          __m128d left_k1_2 = _mm_load_pd( &le[6] );
-          __m128d left_k2_0 = _mm_load_pd( &le[8] );
-          __m128d left_k2_2 = _mm_load_pd( &le[10] );
-          __m128d left_k3_0 = _mm_load_pd( &le[12] );
-          __m128d left_k3_2 = _mm_load_pd( &le[14] );
-
-          left_k0_0 = _mm_mul_pd(x1_0, left_k0_0);
-          left_k0_2 = _mm_mul_pd(x1_2, left_k0_2);
-
-          left_k1_0 = _mm_mul_pd(x1_0, left_k1_0);
-          left_k1_2 = _mm_mul_pd(x1_2, left_k1_2);
-
-          left_k0_0 = _mm_hadd_pd( left_k0_0, left_k0_2 );
-          left_k1_0 = _mm_hadd_pd( left_k1_0, left_k1_2);
-          left_k0_0 = _mm_hadd_pd( left_k0_0, left_k1_0);
-
-          left_k2_0 = _mm_mul_pd(x1_0, left_k2_0);
-          left_k2_2 = _mm_mul_pd(x1_2, left_k2_2);
-
-          left_k3_0 = _mm_mul_pd(x1_0, left_k3_0);
-          left_k3_2 = _mm_mul_pd(x1_2, left_k3_2);
-
-          left_k2_0 = _mm_hadd_pd( left_k2_0, left_k2_2);
-          left_k3_0 = _mm_hadd_pd( left_k3_0, left_k3_2);
-          left_k2_0 = _mm_hadd_pd( left_k2_0, left_k3_0);
-
-          __m128d x2_0 = _mm_load_pd( &x2[0] );
-          __m128d x2_2 = _mm_load_pd( &x2[2] );
-
-          __m128d right_k0_0 = _mm_load_pd( &ri[0] );
-          __m128d right_k0_2 = _mm_load_pd( &ri[2] );
-          __m128d right_k1_0 = _mm_load_pd( &ri[4] );
-          __m128d right_k1_2 = _mm_load_pd( &ri[6] );
-          __m128d right_k2_0 = _mm_load_pd( &ri[8] );
-          __m128d right_k2_2 = _mm_load_pd( &ri[10] );
-          __m128d right_k3_0 = _mm_load_pd( &ri[12] );
-          __m128d right_k3_2 = _mm_load_pd( &ri[14] );
-
-          right_k0_0 = _mm_mul_pd( x2_0, right_k0_0);
-          right_k0_2 = _mm_mul_pd( x2_2, right_k0_2);
-
-          right_k1_0 = _mm_mul_pd( x2_0, right_k1_0);
-          right_k1_2 = _mm_mul_pd( x2_2, right_k1_2);
-
-          right_k0_0 = _mm_hadd_pd( right_k0_0, right_k0_2);
-          right_k1_0 = _mm_hadd_pd( right_k1_0, right_k1_2);
-          right_k0_0 = _mm_hadd_pd( right_k0_0, right_k1_0);
-
-          right_k2_0 = _mm_mul_pd( x2_0, right_k2_0);
-          right_k2_2 = _mm_mul_pd( x2_2, right_k2_2);
-
-          right_k3_0 = _mm_mul_pd( x2_0, right_k3_0);
-          right_k3_2 = _mm_mul_pd( x2_2, right_k3_2);
-
-          right_k2_0 = _mm_hadd_pd( right_k2_0, right_k2_2);
-          right_k3_0 = _mm_hadd_pd( right_k3_0, right_k3_2);
-          right_k2_0 = _mm_hadd_pd( right_k2_0, right_k3_0);       
-
-          __m128d x1px2_k0 = _mm_mul_pd( left_k0_0, right_k0_0 );
-          __m128d x1px2_k2 = _mm_mul_pd( left_k2_0, right_k2_0 );                 
-
-          __m128d EV_t_l0_k0 = EVV[0];
-          __m128d EV_t_l0_k2 = EVV[1];
-          __m128d EV_t_l1_k0 = EVV[2];
-          __m128d EV_t_l1_k2 = EVV[3];
-          __m128d EV_t_l2_k0 = EVV[4];
-          __m128d EV_t_l2_k2 = EVV[5];
-          __m128d EV_t_l3_k0 = EVV[6];
-          __m128d EV_t_l3_k2 = EVV[7];
-
-          EV_t_l0_k0 = _mm_mul_pd( x1px2_k0, EV_t_l0_k0 );
-          EV_t_l0_k2 = _mm_mul_pd( x1px2_k2, EV_t_l0_k2 );
-          EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l0_k2 );
-
-          EV_t_l1_k0 = _mm_mul_pd( x1px2_k0, EV_t_l1_k0 );
-          EV_t_l1_k2 = _mm_mul_pd( x1px2_k2, EV_t_l1_k2 );
-
-          EV_t_l1_k0 = _mm_hadd_pd( EV_t_l1_k0, EV_t_l1_k2 );
-          EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l1_k0 );
-
-          EV_t_l2_k0 = _mm_mul_pd( x1px2_k0, EV_t_l2_k0 );
-          EV_t_l2_k2 = _mm_mul_pd( x1px2_k2, EV_t_l2_k2 );
-          EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l2_k2 );
-
-          EV_t_l3_k0 = _mm_mul_pd( x1px2_k0, EV_t_l3_k0 );
-          EV_t_l3_k2 = _mm_mul_pd( x1px2_k2, EV_t_l3_k2 );
-          EV_t_l3_k0 = _mm_hadd_pd( EV_t_l3_k0, EV_t_l3_k2 );
-
-          EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l3_k0 );    
-
-          _mm_store_pd(x3, EV_t_l0_k0);
-          _mm_store_pd(&x3[2], EV_t_l2_k0);                                 
-
-          x3_ptr += 4;
-        }
-      }
-      break;
-    case PLL_TIP_INNER:      
-      for (i = 0; i < n; i++)
-      { 
-        if(isGap(x3_gap, i))
-        {
-          if(scaleGap)
-            {
-              if(!fastScaling)
-                ex3[i] += 1;
-              else
-                addScale += wgt[i];
-            }
-        }
-        else
-        {             
-          x1 = &(tipVector[4 * tipX1[i]]);
-
-          x2 = x2_ptr;
-          x3 = x3_ptr;
-
-          if(isGap(x1_gap, i))
-            le =  &left[maxCats * 16];
-          else
-            le =  &left[cptr[i] * 16];
-
-          if(isGap(x2_gap, i))
-          {              
-            ri =  &right[maxCats * 16];
-            x2 = x2_gapColumn;
-          }
-          else
-          {
-            ri =  &right[cptr[i] * 16];
-            x2 = x2_ptr;
-            x2_ptr += 4;
-          }                               
-
-          __m128d x1_0 = _mm_load_pd( &x1[0] );
-          __m128d x1_2 = _mm_load_pd( &x1[2] );
-
-          __m128d left_k0_0 = _mm_load_pd( &le[0] );
-          __m128d left_k0_2 = _mm_load_pd( &le[2] );
-          __m128d left_k1_0 = _mm_load_pd( &le[4] );
-          __m128d left_k1_2 = _mm_load_pd( &le[6] );
-          __m128d left_k2_0 = _mm_load_pd( &le[8] );
-          __m128d left_k2_2 = _mm_load_pd( &le[10] );
-          __m128d left_k3_0 = _mm_load_pd( &le[12] );
-          __m128d left_k3_2 = _mm_load_pd( &le[14] );
-
-          left_k0_0 = _mm_mul_pd(x1_0, left_k0_0);
-          left_k0_2 = _mm_mul_pd(x1_2, left_k0_2);
-
-          left_k1_0 = _mm_mul_pd(x1_0, left_k1_0);
-          left_k1_2 = _mm_mul_pd(x1_2, left_k1_2);
-
-          left_k0_0 = _mm_hadd_pd( left_k0_0, left_k0_2 );
-          left_k1_0 = _mm_hadd_pd( left_k1_0, left_k1_2);
-          left_k0_0 = _mm_hadd_pd( left_k0_0, left_k1_0);
-
-          left_k2_0 = _mm_mul_pd(x1_0, left_k2_0);
-          left_k2_2 = _mm_mul_pd(x1_2, left_k2_2);
-
-          left_k3_0 = _mm_mul_pd(x1_0, left_k3_0);
-          left_k3_2 = _mm_mul_pd(x1_2, left_k3_2);
-
-          left_k2_0 = _mm_hadd_pd( left_k2_0, left_k2_2);
-          left_k3_0 = _mm_hadd_pd( left_k3_0, left_k3_2);
-          left_k2_0 = _mm_hadd_pd( left_k2_0, left_k3_0);
-
-          __m128d x2_0 = _mm_load_pd( &x2[0] );
-          __m128d x2_2 = _mm_load_pd( &x2[2] );
-
-          __m128d right_k0_0 = _mm_load_pd( &ri[0] );
-          __m128d right_k0_2 = _mm_load_pd( &ri[2] );
-          __m128d right_k1_0 = _mm_load_pd( &ri[4] );
-          __m128d right_k1_2 = _mm_load_pd( &ri[6] );
-          __m128d right_k2_0 = _mm_load_pd( &ri[8] );
-          __m128d right_k2_2 = _mm_load_pd( &ri[10] );
-          __m128d right_k3_0 = _mm_load_pd( &ri[12] );
-          __m128d right_k3_2 = _mm_load_pd( &ri[14] );
-
-          right_k0_0 = _mm_mul_pd( x2_0, right_k0_0);
-          right_k0_2 = _mm_mul_pd( x2_2, right_k0_2);
-
-          right_k1_0 = _mm_mul_pd( x2_0, right_k1_0);
-          right_k1_2 = _mm_mul_pd( x2_2, right_k1_2);
-
-          right_k0_0 = _mm_hadd_pd( right_k0_0, right_k0_2);
-          right_k1_0 = _mm_hadd_pd( right_k1_0, right_k1_2);
-          right_k0_0 = _mm_hadd_pd( right_k0_0, right_k1_0);
-
-          right_k2_0 = _mm_mul_pd( x2_0, right_k2_0);
-          right_k2_2 = _mm_mul_pd( x2_2, right_k2_2);
-
-          right_k3_0 = _mm_mul_pd( x2_0, right_k3_0);
-          right_k3_2 = _mm_mul_pd( x2_2, right_k3_2);
-
-          right_k2_0 = _mm_hadd_pd( right_k2_0, right_k2_2);
-          right_k3_0 = _mm_hadd_pd( right_k3_0, right_k3_2);
-          right_k2_0 = _mm_hadd_pd( right_k2_0, right_k3_0);       
-
-          __m128d x1px2_k0 = _mm_mul_pd( left_k0_0, right_k0_0 );
-          __m128d x1px2_k2 = _mm_mul_pd( left_k2_0, right_k2_0 );
-
-          __m128d EV_t_l0_k0 = EVV[0];
-          __m128d EV_t_l0_k2 = EVV[1];
-          __m128d EV_t_l1_k0 = EVV[2];
-          __m128d EV_t_l1_k2 = EVV[3];
-          __m128d EV_t_l2_k0 = EVV[4];
-          __m128d EV_t_l2_k2 = EVV[5];
-          __m128d EV_t_l3_k0 = EVV[6];
-          __m128d EV_t_l3_k2 = EVV[7];
-
-
-          EV_t_l0_k0 = _mm_mul_pd( x1px2_k0, EV_t_l0_k0 );
-          EV_t_l0_k2 = _mm_mul_pd( x1px2_k2, EV_t_l0_k2 );
-          EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l0_k2 );
-
-          EV_t_l1_k0 = _mm_mul_pd( x1px2_k0, EV_t_l1_k0 );
-          EV_t_l1_k2 = _mm_mul_pd( x1px2_k2, EV_t_l1_k2 );
-
-          EV_t_l1_k0 = _mm_hadd_pd( EV_t_l1_k0, EV_t_l1_k2 );
-          EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l1_k0 );
-
-          EV_t_l2_k0 = _mm_mul_pd( x1px2_k0, EV_t_l2_k0 );
-          EV_t_l2_k2 = _mm_mul_pd( x1px2_k2, EV_t_l2_k2 );
-          EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l2_k2 );
-
-          EV_t_l3_k0 = _mm_mul_pd( x1px2_k0, EV_t_l3_k0 );
-          EV_t_l3_k2 = _mm_mul_pd( x1px2_k2, EV_t_l3_k2 );
-          EV_t_l3_k0 = _mm_hadd_pd( EV_t_l3_k0, EV_t_l3_k2 );
-
-          EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l3_k0 );                                     
-
-          scale = 1;
-
-          __m128d v1 = _mm_and_pd(EV_t_l0_k0, absMask.m);
-          v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-          if(_mm_movemask_pd( v1 ) != 3)
-            scale = 0;
-          else
-          {
-            v1 = _mm_and_pd(EV_t_l2_k0, absMask.m);
-            v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-            if(_mm_movemask_pd( v1 ) != 3)
-              scale = 0;
-          }
-
-          if(scale)
-          {                   
-            _mm_store_pd(&x3[0], _mm_mul_pd(EV_t_l0_k0, sc));
-            _mm_store_pd(&x3[2], _mm_mul_pd(EV_t_l2_k0, sc));                 
-            
-            if(!fastScaling)
-              ex3[i] += 1;
-            else
-              addScale += wgt[i];         
-          }     
-          else
-          {
-            _mm_store_pd(x3, EV_t_l0_k0);
-            _mm_store_pd(&x3[2], EV_t_l2_k0);
-          }
-
-          x3_ptr += 4;
-        }
-
-      }
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-      { 
-        if(isGap(x3_gap, i))
-        {
-          if(scaleGap)
-            {
-              if(!fastScaling)
-                ex3[i] += 1;
-              else
-                addScale += wgt[i];
-            }
-        }
-        else
-        {            
-          x3 = x3_ptr;
-
-          if(isGap(x1_gap, i))
-          {
-            x1 = x1_gapColumn;
-            le =  &left[maxCats * 16];
-          }
-          else
-          {
-            le =  &left[cptr[i] * 16];
-            x1 = x1_ptr;
-            x1_ptr += 4;
-          }
-
-          if(isGap(x2_gap, i))  
-          {
-            x2 = x2_gapColumn;
-            ri =  &right[maxCats * 16];     
-          }
-          else
-          {
-            ri =  &right[cptr[i] * 16];
-            x2 = x2_ptr;
-            x2_ptr += 4;
-          }                               
-
-          __m128d x1_0 = _mm_load_pd( &x1[0] );
-          __m128d x1_2 = _mm_load_pd( &x1[2] );
-
-          __m128d left_k0_0 = _mm_load_pd( &le[0] );
-          __m128d left_k0_2 = _mm_load_pd( &le[2] );
-          __m128d left_k1_0 = _mm_load_pd( &le[4] );
-          __m128d left_k1_2 = _mm_load_pd( &le[6] );
-          __m128d left_k2_0 = _mm_load_pd( &le[8] );
-          __m128d left_k2_2 = _mm_load_pd( &le[10] );
-          __m128d left_k3_0 = _mm_load_pd( &le[12] );
-          __m128d left_k3_2 = _mm_load_pd( &le[14] );
-
-          left_k0_0 = _mm_mul_pd(x1_0, left_k0_0);
-          left_k0_2 = _mm_mul_pd(x1_2, left_k0_2);
-
-          left_k1_0 = _mm_mul_pd(x1_0, left_k1_0);
-          left_k1_2 = _mm_mul_pd(x1_2, left_k1_2);
-
-          left_k0_0 = _mm_hadd_pd( left_k0_0, left_k0_2 );
-          left_k1_0 = _mm_hadd_pd( left_k1_0, left_k1_2);
-          left_k0_0 = _mm_hadd_pd( left_k0_0, left_k1_0);
-
-          left_k2_0 = _mm_mul_pd(x1_0, left_k2_0);
-          left_k2_2 = _mm_mul_pd(x1_2, left_k2_2);
-
-          left_k3_0 = _mm_mul_pd(x1_0, left_k3_0);
-          left_k3_2 = _mm_mul_pd(x1_2, left_k3_2);
-
-          left_k2_0 = _mm_hadd_pd( left_k2_0, left_k2_2);
-          left_k3_0 = _mm_hadd_pd( left_k3_0, left_k3_2);
-          left_k2_0 = _mm_hadd_pd( left_k2_0, left_k3_0);
-
-          __m128d x2_0 = _mm_load_pd( &x2[0] );
-          __m128d x2_2 = _mm_load_pd( &x2[2] );
-
-          __m128d right_k0_0 = _mm_load_pd( &ri[0] );
-          __m128d right_k0_2 = _mm_load_pd( &ri[2] );
-          __m128d right_k1_0 = _mm_load_pd( &ri[4] );
-          __m128d right_k1_2 = _mm_load_pd( &ri[6] );
-          __m128d right_k2_0 = _mm_load_pd( &ri[8] );
-          __m128d right_k2_2 = _mm_load_pd( &ri[10] );
-          __m128d right_k3_0 = _mm_load_pd( &ri[12] );
-          __m128d right_k3_2 = _mm_load_pd( &ri[14] );
-
-          right_k0_0 = _mm_mul_pd( x2_0, right_k0_0);
-          right_k0_2 = _mm_mul_pd( x2_2, right_k0_2);
-
-          right_k1_0 = _mm_mul_pd( x2_0, right_k1_0);
-          right_k1_2 = _mm_mul_pd( x2_2, right_k1_2);
-
-          right_k0_0 = _mm_hadd_pd( right_k0_0, right_k0_2);
-          right_k1_0 = _mm_hadd_pd( right_k1_0, right_k1_2);
-          right_k0_0 = _mm_hadd_pd( right_k0_0, right_k1_0);
-
-          right_k2_0 = _mm_mul_pd( x2_0, right_k2_0);
-          right_k2_2 = _mm_mul_pd( x2_2, right_k2_2);
-
-          right_k3_0 = _mm_mul_pd( x2_0, right_k3_0);
-          right_k3_2 = _mm_mul_pd( x2_2, right_k3_2);
-
-          right_k2_0 = _mm_hadd_pd( right_k2_0, right_k2_2);
-          right_k3_0 = _mm_hadd_pd( right_k3_0, right_k3_2);
-          right_k2_0 = _mm_hadd_pd( right_k2_0, right_k3_0);       
-
-          __m128d x1px2_k0 = _mm_mul_pd( left_k0_0, right_k0_0 );
-          __m128d x1px2_k2 = _mm_mul_pd( left_k2_0, right_k2_0 );
-
-          __m128d EV_t_l0_k0 = EVV[0];
-          __m128d EV_t_l0_k2 = EVV[1];
-          __m128d EV_t_l1_k0 = EVV[2];
-          __m128d EV_t_l1_k2 = EVV[3];
-          __m128d EV_t_l2_k0 = EVV[4];
-          __m128d EV_t_l2_k2 = EVV[5];
-          __m128d EV_t_l3_k0 = EVV[6];
-          __m128d EV_t_l3_k2 = EVV[7];
-
-
-          EV_t_l0_k0 = _mm_mul_pd( x1px2_k0, EV_t_l0_k0 );
-          EV_t_l0_k2 = _mm_mul_pd( x1px2_k2, EV_t_l0_k2 );
-          EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l0_k2 );
-
-          EV_t_l1_k0 = _mm_mul_pd( x1px2_k0, EV_t_l1_k0 );
-          EV_t_l1_k2 = _mm_mul_pd( x1px2_k2, EV_t_l1_k2 );
-
-          EV_t_l1_k0 = _mm_hadd_pd( EV_t_l1_k0, EV_t_l1_k2 );
-          EV_t_l0_k0 = _mm_hadd_pd( EV_t_l0_k0, EV_t_l1_k0 );
-
-          EV_t_l2_k0 = _mm_mul_pd( x1px2_k0, EV_t_l2_k0 );
-          EV_t_l2_k2 = _mm_mul_pd( x1px2_k2, EV_t_l2_k2 );
-          EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l2_k2 );
-
-          EV_t_l3_k0 = _mm_mul_pd( x1px2_k0, EV_t_l3_k0 );
-          EV_t_l3_k2 = _mm_mul_pd( x1px2_k2, EV_t_l3_k2 );
-          EV_t_l3_k0 = _mm_hadd_pd( EV_t_l3_k0, EV_t_l3_k2 );
-
-          EV_t_l2_k0 = _mm_hadd_pd( EV_t_l2_k0, EV_t_l3_k0 );                                            
-
-          scale = 1;
-
-          __m128d v1 = _mm_and_pd(EV_t_l0_k0, absMask.m);
-          v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-          if(_mm_movemask_pd( v1 ) != 3)
-            scale = 0;
-          else
-          {
-            v1 = _mm_and_pd(EV_t_l2_k0, absMask.m);
-            v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-            if(_mm_movemask_pd( v1 ) != 3)
-              scale = 0;
-          }
-
-          if(scale)
-          {                   
-            _mm_store_pd(&x3[0], _mm_mul_pd(EV_t_l0_k0, sc));
-            _mm_store_pd(&x3[2], _mm_mul_pd(EV_t_l2_k0, sc));                 
-
-            if(!fastScaling)
-              ex3[i] += 1;
-            else
-              addScale += wgt[i];         
-          }     
-          else
-          {
-            _mm_store_pd(x3, EV_t_l0_k0);
-            _mm_store_pd(&x3[2], EV_t_l2_k0);
-          }
-
-          x3_ptr += 4;
-        }
-      }
-      break;
-    default:
-      assert(0);
-  }
-
-
-  if(fastScaling)
-    *scalerIncrement = addScale;
-}
-
-/** @ingroup group1
- *  @brief Computation of conditional likelihood arrray for GTR GAMMA with memory saving (Optimized SSE3 version for AA data)
-
-    This is the SSE3 optimized version of ::newviewGAMMA_FLEX for computing the conditional
-    likelihood arrays at some node \a p, given child nodes \a q and \a r using the \b GAMMA
-    model of rate heterogeneity. The memory saving technique is incorporated.
-
-    @note
-    For more details and function argument description check the function ::newviewGAMMA_FLEX
-*/
-static void newviewGTRGAMMAPROT_GAPPED_SAVE(int tipCase,
-                                            double *x1, double *x2, double *x3, double *extEV, double *tipVector,
-                                            int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                            int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling,
-                                            unsigned int *x1_gap, unsigned int *x2_gap, unsigned int *x3_gap,  
-                                            double *x1_gapColumn, double *x2_gapColumn, double *x3_gapColumn
-                                            )
-{
-  double  *uX1, *uX2, *v;
-  double x1px2;
-  int  i, j, l, k, scale, addScale = 0,   
-       gapScaling = 0;
-  double 
-    *vl, *vr, *x1v, *x2v,
-    *x1_ptr = x1,
-    *x2_ptr = x2,
-    *x3_ptr = x3;
-
-
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:
-      {
-        double umpX1[1840], umpX2[1840];
-
-        for(i = 0; i < 23; i++)
-        {
-          v = &(tipVector[20 * i]);
-
-          for(k = 0; k < 80; k++)
-          {
-            double *ll =  &left[k * 20];
-            double *rr =  &right[k * 20];
-
-            __m128d umpX1v = _mm_setzero_pd();
-            __m128d umpX2v = _mm_setzero_pd();
-
-            for(l = 0; l < 20; l+=2)
-            {
-              __m128d vv = _mm_load_pd(&v[l]);
-              umpX1v = _mm_add_pd(umpX1v, _mm_mul_pd(vv, _mm_load_pd(&ll[l])));
-              umpX2v = _mm_add_pd(umpX2v, _mm_mul_pd(vv, _mm_load_pd(&rr[l])));                                 
-            }
-
-            umpX1v = _mm_hadd_pd(umpX1v, umpX1v);
-            umpX2v = _mm_hadd_pd(umpX2v, umpX2v);
-
-            _mm_storel_pd(&umpX1[80 * i + k], umpX1v);
-            _mm_storel_pd(&umpX2[80 * i + k], umpX2v);
-          }
-        }
-
-        {
-          uX1 = &umpX1[1760];
-          uX2 = &umpX2[1760];
-
-          for(j = 0; j < 4; j++)
-          {
-            v = &x3_gapColumn[j * 20];
-
-            __m128d zero =  _mm_setzero_pd();
-            for(k = 0; k < 20; k+=2)                                
-              _mm_store_pd(&v[k], zero);
-
-            for(k = 0; k < 20; k++)
-            { 
-              double *eev = &extEV[k * 20];
-              x1px2 = uX1[j * 20 + k] * uX2[j * 20 + k];
-              __m128d x1px2v = _mm_set1_pd(x1px2);
-
-              for(l = 0; l < 20; l+=2)
-              {
-                __m128d vv = _mm_load_pd(&v[l]);
-                __m128d ee = _mm_load_pd(&eev[l]);
-
-                vv = _mm_add_pd(vv, _mm_mul_pd(x1px2v,ee));
-
-                _mm_store_pd(&v[l], vv);
-              }
-            }
-          }        
-        }       
-
-        for(i = 0; i < n; i++)
-        {
-          if(!(x3_gap[i / 32] & mask32[i % 32]))
-          {
-            uX1 = &umpX1[80 * tipX1[i]];
-            uX2 = &umpX2[80 * tipX2[i]];
-
-            for(j = 0; j < 4; j++)
-            {
-              v = &x3_ptr[j * 20];
-
-
-              __m128d zero =  _mm_setzero_pd();
-              for(k = 0; k < 20; k+=2)                              
-                _mm_store_pd(&v[k], zero);
-
-              for(k = 0; k < 20; k++)
-              { 
-                double *eev = &extEV[k * 20];
-                x1px2 = uX1[j * 20 + k] * uX2[j * 20 + k];
-                __m128d x1px2v = _mm_set1_pd(x1px2);
-
-                for(l = 0; l < 20; l+=2)
-                {
-                  __m128d vv = _mm_load_pd(&v[l]);
-                  __m128d ee = _mm_load_pd(&eev[l]);
-
-                  vv = _mm_add_pd(vv, _mm_mul_pd(x1px2v,ee));
-
-                  _mm_store_pd(&v[l], vv);
-                }
-              }
-            }      
-            x3_ptr += 80;
-          }
-        }
-      }
-      break;
-    case PLL_TIP_INNER:
-      {
-        double umpX1[1840], ump_x2[20];
-
-
-        for(i = 0; i < 23; i++)
-        {
-          v = &(tipVector[20 * i]);
-
-          for(k = 0; k < 80; k++)
-          {
-            double *ll =  &left[k * 20];
-
-            __m128d umpX1v = _mm_setzero_pd();
-
-            for(l = 0; l < 20; l+=2)
-            {
-              __m128d vv = _mm_load_pd(&v[l]);
-              umpX1v = _mm_add_pd(umpX1v, _mm_mul_pd(vv, _mm_load_pd(&ll[l])));                                                 
-            }
-
-            umpX1v = _mm_hadd_pd(umpX1v, umpX1v);                               
-            _mm_storel_pd(&umpX1[80 * i + k], umpX1v);          
-
-          }
-        }
-
-        {
-          uX1 = &umpX1[1760];
-
-          for(k = 0; k < 4; k++)
-          {
-            v = &(x2_gapColumn[k * 20]);
-
-            for(l = 0; l < 20; l++)
-            {              
-              double *r =  &right[k * 400 + l * 20];
-              __m128d ump_x2v = _mm_setzero_pd();           
-
-              for(j = 0; j < 20; j+= 2)
-              {
-                __m128d vv = _mm_load_pd(&v[j]);
-                __m128d rr = _mm_load_pd(&r[j]);
-                ump_x2v = _mm_add_pd(ump_x2v, _mm_mul_pd(vv, rr));
-              }
-
-              ump_x2v = _mm_hadd_pd(ump_x2v, ump_x2v);
-
-              _mm_storel_pd(&ump_x2[l], ump_x2v);                                    
-            }
-
-            v = &(x3_gapColumn[20 * k]);
-
-            __m128d zero =  _mm_setzero_pd();
-            for(l = 0; l < 20; l+=2)                                
-              _mm_store_pd(&v[l], zero);
-
-            for(l = 0; l < 20; l++)
-            {
-              double *eev = &extEV[l * 20];
-              x1px2 = uX1[k * 20 + l]  * ump_x2[l];
-              __m128d x1px2v = _mm_set1_pd(x1px2);
-
-              for(j = 0; j < 20; j+=2)
-              {
-                __m128d vv = _mm_load_pd(&v[j]);
-                __m128d ee = _mm_load_pd(&eev[j]);
-
-                vv = _mm_add_pd(vv, _mm_mul_pd(x1px2v,ee));
-
-                _mm_store_pd(&v[j], vv);
-              }                             
-            }                   
-
-          }
-
-          { 
-            v = x3_gapColumn;
-            __m128d minlikelihood_sse = _mm_set1_pd( PLL_MINLIKELIHOOD );
-
-            scale = 1;
-            for(l = 0; scale && (l < 80); l += 2)
-            {
-              __m128d vv = _mm_load_pd(&v[l]);
-              __m128d v1 = _mm_and_pd(vv, absMask.m);
-              v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-              if(_mm_movemask_pd( v1 ) != 3)
-                scale = 0;
-            }             
-          }
-
-
-          if (scale)
-          {
-            gapScaling = 1;
-            __m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-
-            for(l = 0; l < 80; l+=2)
-            {
-              __m128d ex3v = _mm_load_pd(&v[l]);                  
-              _mm_store_pd(&v[l], _mm_mul_pd(ex3v,twoto));      
-            }                                                          
-          }
-        }
-
-        for (i = 0; i < n; i++)
-        {           
-          if((x3_gap[i / 32] & mask32[i % 32]))
-          {            
-            if(gapScaling)
-            {   
-              if(!fastScaling)
-                ex3[i] += 1;
-              else
-                addScale += wgt[i];                  
-            }
-          }
-          else
-          {
-            uX1 = &umpX1[80 * tipX1[i]];
-
-            if(x2_gap[i / 32] & mask32[i % 32])
-              x2v = x2_gapColumn;
-            else
-            {
-              x2v = x2_ptr;
-              x2_ptr += 80;
-            }
-
-            for(k = 0; k < 4; k++)
-            {
-              v = &(x2v[k * 20]);
-
-              for(l = 0; l < 20; l++)
-              {            
-                double *r =  &right[k * 400 + l * 20];
-                __m128d ump_x2v = _mm_setzero_pd();         
-
-                for(j = 0; j < 20; j+= 2)
-                {
-                  __m128d vv = _mm_load_pd(&v[j]);
-                  __m128d rr = _mm_load_pd(&r[j]);
-                  ump_x2v = _mm_add_pd(ump_x2v, _mm_mul_pd(vv, rr));
-                }
-
-                ump_x2v = _mm_hadd_pd(ump_x2v, ump_x2v);
-
-                _mm_storel_pd(&ump_x2[l], ump_x2v);                                  
-              }
-
-              v = &x3_ptr[20 * k];
-
-              __m128d zero =  _mm_setzero_pd();
-              for(l = 0; l < 20; l+=2)                              
-                _mm_store_pd(&v[l], zero);
-
-              for(l = 0; l < 20; l++)
-              {
-                double *eev = &extEV[l * 20];
-                x1px2 = uX1[k * 20 + l]  * ump_x2[l];
-                __m128d x1px2v = _mm_set1_pd(x1px2);
-
-                for(j = 0; j < 20; j+=2)
-                {
-                  __m128d vv = _mm_load_pd(&v[j]);
-                  __m128d ee = _mm_load_pd(&eev[j]);
-
-                  vv = _mm_add_pd(vv, _mm_mul_pd(x1px2v,ee));
-
-                  _mm_store_pd(&v[j], vv);
-                }                                   
-              }                 
-
-            }
-
-
-            { 
-              v = x3_ptr;
-              __m128d minlikelihood_sse = _mm_set1_pd( PLL_MINLIKELIHOOD );
-
-              scale = 1;
-              for(l = 0; scale && (l < 80); l += 2)
-              {
-                __m128d vv = _mm_load_pd(&v[l]);
-                __m128d v1 = _mm_and_pd(vv, absMask.m);
-                v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-                if(_mm_movemask_pd( v1 ) != 3)
-                  scale = 0;
-              }           
-            }
-
-
-            if (scale)
-            {
-              __m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-
-              for(l = 0; l < 80; l+=2)
-              {
-                __m128d ex3v = _mm_load_pd(&v[l]);                
-                _mm_store_pd(&v[l], _mm_mul_pd(ex3v,twoto));    
-              }                           
-              
-              if(!fastScaling)
-                ex3[i] += 1;
-              else
-                addScale += wgt[i];                   
-            }
-
-            x3_ptr += 80;
-          }
-        }
-      }
-      break;
-    case PLL_INNER_INNER:
-      {
-        for(k = 0; k < 4; k++)
-        {
-          vl = &(x1_gapColumn[20 * k]);
-          vr = &(x2_gapColumn[20 * k]);
-          v =  &(x3_gapColumn[20 * k]);
-
-          __m128d zero =  _mm_setzero_pd();
-          for(l = 0; l < 20; l+=2)                                  
-            _mm_store_pd(&v[l], zero);
-
-          for(l = 0; l < 20; l++)
-          {              
-            {
-              __m128d al = _mm_setzero_pd();
-              __m128d ar = _mm_setzero_pd();
-
-              double *ll   = &left[k * 400 + l * 20];
-              double *rr   = &right[k * 400 + l * 20];
-              double *EVEV = &extEV[20 * l];
-
-              for(j = 0; j < 20; j+=2)
-              {
-                __m128d lv  = _mm_load_pd(&ll[j]);
-                __m128d rv  = _mm_load_pd(&rr[j]);
-                __m128d vll = _mm_load_pd(&vl[j]);
-                __m128d vrr = _mm_load_pd(&vr[j]);
-
-                al = _mm_add_pd(al, _mm_mul_pd(vll, lv));
-                ar = _mm_add_pd(ar, _mm_mul_pd(vrr, rv));
-              }                  
-
-              al = _mm_hadd_pd(al, al);
-              ar = _mm_hadd_pd(ar, ar);
-
-              al = _mm_mul_pd(al, ar);
-
-              for(j = 0; j < 20; j+=2)
-              {
-                __m128d vv  = _mm_load_pd(&v[j]);
-                __m128d EVV = _mm_load_pd(&EVEV[j]);
-
-                vv = _mm_add_pd(vv, _mm_mul_pd(al, EVV));
-
-                _mm_store_pd(&v[j], vv);
-              }                                           
-            }            
-
-          }
-        }
-
-
-        { 
-          v = x3_gapColumn;
-          __m128d minlikelihood_sse = _mm_set1_pd( PLL_MINLIKELIHOOD );
-
-          scale = 1;
-          for(l = 0; scale && (l < 80); l += 2)
-          {
-            __m128d vv = _mm_load_pd(&v[l]);
-            __m128d v1 = _mm_and_pd(vv, absMask.m);
-            v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-            if(_mm_movemask_pd( v1 ) != 3)
-              scale = 0;
-          }               
-        }
-
-        if (scale)
-        {
-          gapScaling = 1;
-          __m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-
-          for(l = 0; l < 80; l+=2)
-          {
-            __m128d ex3v = _mm_load_pd(&v[l]);            
-            _mm_store_pd(&v[l], _mm_mul_pd(ex3v,twoto));        
-          }                               
-
-
-        }
-      }
-
-      for (i = 0; i < n; i++)
-      {
-        if(x3_gap[i / 32] & mask32[i % 32])
-        {            
-          if(gapScaling)
-          {     
-            if(!fastScaling)
-              ex3[i] += 1;
-            else
-              addScale += wgt[i];                              
-          }
-        }
-        else
-        {
-          if(x1_gap[i / 32] & mask32[i % 32])
-            x1v = x1_gapColumn;
-          else
-          {
-            x1v = x1_ptr;
-            x1_ptr += 80;
-          }
-
-          if(x2_gap[i / 32] & mask32[i % 32])
-            x2v = x2_gapColumn;
-          else
-          {
-            x2v = x2_ptr;
-            x2_ptr += 80;
-          }
-
-          for(k = 0; k < 4; k++)
-          {
-            vl = &(x1v[20 * k]);
-            vr = &(x2v[20 * k]);
-            v =  &x3_ptr[20 * k];
-
-            __m128d zero =  _mm_setzero_pd();
-            for(l = 0; l < 20; l+=2)                                
-              _mm_store_pd(&v[l], zero);
-
-            for(l = 0; l < 20; l++)
-            {            
-              {
-                __m128d al = _mm_setzero_pd();
-                __m128d ar = _mm_setzero_pd();
-
-                double *ll   = &left[k * 400 + l * 20];
-                double *rr   = &right[k * 400 + l * 20];
-                double *EVEV = &extEV[20 * l];
-
-                for(j = 0; j < 20; j+=2)
-                {
-                  __m128d lv  = _mm_load_pd(&ll[j]);
-                  __m128d rv  = _mm_load_pd(&rr[j]);
-                  __m128d vll = _mm_load_pd(&vl[j]);
-                  __m128d vrr = _mm_load_pd(&vr[j]);
-
-                  al = _mm_add_pd(al, _mm_mul_pd(vll, lv));
-                  ar = _mm_add_pd(ar, _mm_mul_pd(vrr, rv));
-                }                
-
-                al = _mm_hadd_pd(al, al);
-                ar = _mm_hadd_pd(ar, ar);
-
-                al = _mm_mul_pd(al, ar);
-
-                for(j = 0; j < 20; j+=2)
-                {
-                  __m128d vv  = _mm_load_pd(&v[j]);
-                  __m128d EVV = _mm_load_pd(&EVEV[j]);
-
-                  vv = _mm_add_pd(vv, _mm_mul_pd(al, EVV));
-
-                  _mm_store_pd(&v[j], vv);
-                }                                                 
-              }          
-
-            }
-          }
-
-
-
-          { 
-            v = x3_ptr;
-            __m128d minlikelihood_sse = _mm_set1_pd( PLL_MINLIKELIHOOD );
-
-            scale = 1;
-            for(l = 0; scale && (l < 80); l += 2)
-            {
-              __m128d vv = _mm_load_pd(&v[l]);
-              __m128d v1 = _mm_and_pd(vv, absMask.m);
-              v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-              if(_mm_movemask_pd( v1 ) != 3)
-                scale = 0;
-            }             
-          }
-
-
-          if (scale)
-          {
-            __m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-
-            for(l = 0; l < 80; l+=2)
-            {
-              __m128d ex3v = _mm_load_pd(&v[l]);                  
-              _mm_store_pd(&v[l], _mm_mul_pd(ex3v,twoto));      
-            }                             
-
-            if(!fastScaling)
-              ex3[i] += 1;
-            else
-              addScale += wgt[i];                         
-          }
-          x3_ptr += 80;
-        }
-      }
-      break;
-    default:
-      assert(0);
-  }
-
-  if(fastScaling)
-    *scalerIncrement = addScale;  
-}
-
-
-
-/** @ingroup group1
- *  @brief Computation of conditional likelihood arrray for GTR GAMMA (Optimized SSE3 version for AA data)
-
-    This is the SSE3 optimized version of ::newviewGAMMA_FLEX for computing the conditional
-    likelihood arrays at some node \a p, given child nodes \a q and \a r using the \b GAMMA
-    model of rate heterogeneity.
-
-    @note
-    For more details and function argument description check the function ::newviewGAMMA_FLEX
-*/
-static void newviewGTRGAMMAPROT(int tipCase,
-                                double *x1, double *x2, double *x3, double *extEV, double *tipVector,
-                                int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling)
-{
-  double  *uX1, *uX2, *v;
-  double x1px2;
-  int  i, j, l, k, scale, addScale = 0;
-  double *vl, *vr;
-
-
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:
-      {
-        double umpX1[1840], umpX2[1840];
-
-        for(i = 0; i < 23; i++)
-        {
-          v = &(tipVector[20 * i]);
-
-          for(k = 0; k < 80; k++)
-          {
-            double *ll =  &left[k * 20];
-            double *rr =  &right[k * 20];
-
-            __m128d umpX1v = _mm_setzero_pd();
-            __m128d umpX2v = _mm_setzero_pd();
-
-            for(l = 0; l < 20; l+=2)
-            {
-              __m128d vv = _mm_load_pd(&v[l]);
-              umpX1v = _mm_add_pd(umpX1v, _mm_mul_pd(vv, _mm_load_pd(&ll[l])));
-              umpX2v = _mm_add_pd(umpX2v, _mm_mul_pd(vv, _mm_load_pd(&rr[l])));                                 
-            }
-
-            umpX1v = _mm_hadd_pd(umpX1v, umpX1v);
-            umpX2v = _mm_hadd_pd(umpX2v, umpX2v);
-
-            _mm_storel_pd(&umpX1[80 * i + k], umpX1v);
-            _mm_storel_pd(&umpX2[80 * i + k], umpX2v);
-
-          }
-        }
-
-        for(i = 0; i < n; i++)
-        {
-          uX1 = &umpX1[80 * tipX1[i]];
-          uX2 = &umpX2[80 * tipX2[i]];
-
-          for(j = 0; j < 4; j++)
-          {
-            v = &x3[i * 80 + j * 20];
-
-
-            __m128d zero =  _mm_setzero_pd();
-            for(k = 0; k < 20; k+=2)                                
-              _mm_store_pd(&v[k], zero);
-
-            for(k = 0; k < 20; k++)
-            { 
-              double *eev = &extEV[k * 20];
-              x1px2 = uX1[j * 20 + k] * uX2[j * 20 + k];
-              __m128d x1px2v = _mm_set1_pd(x1px2);
-
-              for(l = 0; l < 20; l+=2)
-              {
-                __m128d vv = _mm_load_pd(&v[l]);
-                __m128d ee = _mm_load_pd(&eev[l]);
-
-                vv = _mm_add_pd(vv, _mm_mul_pd(x1px2v,ee));
-
-                _mm_store_pd(&v[l], vv);
-              }
-            }
-
-
-          }        
-        }
-      }
-      break;
-    case PLL_TIP_INNER:
-      {
-        double umpX1[1840], ump_x2[20];
-
-
-        for(i = 0; i < 23; i++)
-        {
-          v = &(tipVector[20 * i]);
-
-          for(k = 0; k < 80; k++)
-          {
-            double *ll =  &left[k * 20];
-
-            __m128d umpX1v = _mm_setzero_pd();
-
-            for(l = 0; l < 20; l+=2)
-            {
-              __m128d vv = _mm_load_pd(&v[l]);
-              umpX1v = _mm_add_pd(umpX1v, _mm_mul_pd(vv, _mm_load_pd(&ll[l])));                                                 
-            }
-
-            umpX1v = _mm_hadd_pd(umpX1v, umpX1v);                               
-            _mm_storel_pd(&umpX1[80 * i + k], umpX1v);          
-
-
-          }
-        }
-
-        for (i = 0; i < n; i++)
-        {
-          uX1 = &umpX1[80 * tipX1[i]];
-
-          for(k = 0; k < 4; k++)
-          {
-            v = &(x2[80 * i + k * 20]);
-
-            for(l = 0; l < 20; l++)
-            {              
-              double *r =  &right[k * 400 + l * 20];
-              __m128d ump_x2v = _mm_setzero_pd();           
-
-              for(j = 0; j < 20; j+= 2)
-              {
-                __m128d vv = _mm_load_pd(&v[j]);
-                __m128d rr = _mm_load_pd(&r[j]);
-                ump_x2v = _mm_add_pd(ump_x2v, _mm_mul_pd(vv, rr));
-              }
-
-              ump_x2v = _mm_hadd_pd(ump_x2v, ump_x2v);
-
-              _mm_storel_pd(&ump_x2[l], ump_x2v);                                    
-            }
-
-            v = &(x3[80 * i + 20 * k]);
-
-            __m128d zero =  _mm_setzero_pd();
-            for(l = 0; l < 20; l+=2)                                
-              _mm_store_pd(&v[l], zero);
-
-            for(l = 0; l < 20; l++)
-            {
-              double *eev = &extEV[l * 20];
-              x1px2 = uX1[k * 20 + l]  * ump_x2[l];
-              __m128d x1px2v = _mm_set1_pd(x1px2);
-
-              for(j = 0; j < 20; j+=2)
-              {
-                __m128d vv = _mm_load_pd(&v[j]);
-                __m128d ee = _mm_load_pd(&eev[j]);
-
-                vv = _mm_add_pd(vv, _mm_mul_pd(x1px2v,ee));
-
-                _mm_store_pd(&v[j], vv);
-              }                             
-            }                   
-
-          }
-
-
-          { 
-            v = &(x3[80 * i]);
-            __m128d minlikelihood_sse = _mm_set1_pd( PLL_MINLIKELIHOOD );
-
-            scale = 1;
-            for(l = 0; scale && (l < 80); l += 2)
-            {
-              __m128d vv = _mm_load_pd(&v[l]);
-              __m128d v1 = _mm_and_pd(vv, absMask.m);
-              v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-              if(_mm_movemask_pd( v1 ) != 3)
-                scale = 0;
-            }             
-          }
-
-
-          if (scale)
-          {
-
-            __m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-
-            for(l = 0; l < 80; l+=2)
-            {
-              __m128d ex3v = _mm_load_pd(&v[l]);                  
-              _mm_store_pd(&v[l], _mm_mul_pd(ex3v,twoto));      
-            }                             
-
-
-            if(!fastScaling)
-              ex3[i] += 1;
-            else
-              addScale += wgt[i];
-
-          }
-        }
-      }
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-      {
-        for(k = 0; k < 4; k++)
-        {
-          vl = &(x1[80 * i + 20 * k]);
-          vr = &(x2[80 * i + 20 * k]);
-          v =  &(x3[80 * i + 20 * k]);
-
-
-          __m128d zero =  _mm_setzero_pd();
-          for(l = 0; l < 20; l+=2)                                  
-            _mm_store_pd(&v[l], zero);
-
-
-          for(l = 0; l < 20; l++)
-          {              
-
-            {
-              __m128d al = _mm_setzero_pd();
-              __m128d ar = _mm_setzero_pd();
-
-              double *ll   = &left[k * 400 + l * 20];
-              double *rr   = &right[k * 400 + l * 20];
-              double *EVEV = &extEV[20 * l];
-
-              for(j = 0; j < 20; j+=2)
-              {
-                __m128d lv  = _mm_load_pd(&ll[j]);
-                __m128d rv  = _mm_load_pd(&rr[j]);
-                __m128d vll = _mm_load_pd(&vl[j]);
-                __m128d vrr = _mm_load_pd(&vr[j]);
-
-                al = _mm_add_pd(al, _mm_mul_pd(vll, lv));
-                ar = _mm_add_pd(ar, _mm_mul_pd(vrr, rv));
-              }                  
-
-              al = _mm_hadd_pd(al, al);
-              ar = _mm_hadd_pd(ar, ar);
-
-              al = _mm_mul_pd(al, ar);
-
-              for(j = 0; j < 20; j+=2)
-              {
-                __m128d vv  = _mm_load_pd(&v[j]);
-                __m128d EVV = _mm_load_pd(&EVEV[j]);
-
-                vv = _mm_add_pd(vv, _mm_mul_pd(al, EVV));
-
-                _mm_store_pd(&v[j], vv);
-              }                                           
-            }            
-
-          }
-        }
-
-
-
-        { 
-          v = &(x3[80 * i]);
-          __m128d minlikelihood_sse = _mm_set1_pd( PLL_MINLIKELIHOOD );
-
-          scale = 1;
-          for(l = 0; scale && (l < 80); l += 2)
-          {
-            __m128d vv = _mm_load_pd(&v[l]);
-            __m128d v1 = _mm_and_pd(vv, absMask.m);
-            v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-            if(_mm_movemask_pd( v1 ) != 3)
-              scale = 0;
-          }               
-        }
-
-
-        if (scale)
-        {
-
-          __m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-
-          for(l = 0; l < 80; l+=2)
-          {
-            __m128d ex3v = _mm_load_pd(&v[l]);            
-            _mm_store_pd(&v[l], _mm_mul_pd(ex3v,twoto));        
-          }                               
-
-
-          if(!fastScaling)
-            ex3[i] += 1;
-          else
-            addScale += wgt[i];
-        }
-      }
-      break;
-    default:
-      assert(0);
-  }
-
-  if(fastScaling)
-    *scalerIncrement = addScale;
-}
-
-
-
-/** @ingroup group1
- *  @brief Computation of conditional likelihood arrray for GTR CAT (Optimized SSE3 version for AA data)
-
-    This is the SSE3 optimized version of ::newviewCAT_FLEX for computing the conditional
-    likelihood arrays at some node \a p, given child nodes \a q and \a r using the \b CAT
-    model of rate heterogeneity.
-
-    @note
-    For more details and function argument description check the function ::newviewCAT_FLEX
-*/
-static void newviewGTRCATPROT(int tipCase, double *extEV,
-                              int *cptr,
-                              double *x1, double *x2, double *x3, double *tipVector,
-                              int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                              int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling)
-{
-  double
-    *le, *ri, *v, *vl, *vr;
-
-  int i, l, j, scale, addScale = 0;
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:
-      {
-        for (i = 0; i < n; i++)
-        {
-          le = &left[cptr[i] * 400];
-          ri = &right[cptr[i] * 400];
-
-          vl = &(tipVector[20 * tipX1[i]]);
-          vr = &(tipVector[20 * tipX2[i]]);
-          v  = &x3[20 * i];
-
-          for(l = 0; l < 20; l+=2)
-            _mm_store_pd(&v[l], _mm_setzero_pd());                      
-
-
-          for(l = 0; l < 20; l++)
-          {
-            __m128d x1v = _mm_setzero_pd();
-            __m128d x2v = _mm_setzero_pd();      
-            double 
-              *ev = &extEV[l * 20],
-              *lv = &le[l * 20],
-              *rv = &ri[l * 20];
-
-            for(j = 0; j < 20; j+=2)
-            {
-              x1v = _mm_add_pd(x1v, _mm_mul_pd(_mm_load_pd(&vl[j]), _mm_load_pd(&lv[j])));                  
-              x2v = _mm_add_pd(x2v, _mm_mul_pd(_mm_load_pd(&vr[j]), _mm_load_pd(&rv[j])));
-            }
-
-            x1v = _mm_hadd_pd(x1v, x1v);
-            x2v = _mm_hadd_pd(x2v, x2v);
-
-            x1v = _mm_mul_pd(x1v, x2v);
-
-            for(j = 0; j < 20; j+=2)
-            {
-              __m128d vv = _mm_load_pd(&v[j]);
-              vv = _mm_add_pd(vv, _mm_mul_pd(x1v, _mm_load_pd(&ev[j])));
-              _mm_store_pd(&v[j], vv);
-            }               
-
-          }        
-        }
-      }
-      break;
-    case PLL_TIP_INNER:
-      {
-        for (i = 0; i < n; i++)
-        {
-          le = &left[cptr[i] * 400];
-          ri = &right[cptr[i] * 400];
-
-          vl = &(tipVector[20 * tipX1[i]]);
-          vr = &x2[20 * i];
-          v  = &x3[20 * i];
-
-          for(l = 0; l < 20; l+=2)
-            _mm_store_pd(&v[l], _mm_setzero_pd());                      
-
-
-
-          for(l = 0; l < 20; l++)
-          {
-
-            __m128d x1v = _mm_setzero_pd();
-            __m128d x2v = _mm_setzero_pd();     
-            double 
-              *ev = &extEV[l * 20],
-              *lv = &le[l * 20],
-              *rv = &ri[l * 20];
-
-            for(j = 0; j < 20; j+=2)
-            {
-              x1v = _mm_add_pd(x1v, _mm_mul_pd(_mm_load_pd(&vl[j]), _mm_load_pd(&lv[j])));                  
-              x2v = _mm_add_pd(x2v, _mm_mul_pd(_mm_load_pd(&vr[j]), _mm_load_pd(&rv[j])));
-            }
-
-            x1v = _mm_hadd_pd(x1v, x1v);
-            x2v = _mm_hadd_pd(x2v, x2v);
-
-            x1v = _mm_mul_pd(x1v, x2v);
-
-            for(j = 0; j < 20; j+=2)
-            {
-              __m128d vv = _mm_load_pd(&v[j]);
-              vv = _mm_add_pd(vv, _mm_mul_pd(x1v, _mm_load_pd(&ev[j])));
-              _mm_store_pd(&v[j], vv);
-            }               
-
-          }
-
-          {         
-            __m128d minlikelihood_sse = _mm_set1_pd( PLL_MINLIKELIHOOD );
-
-            scale = 1;
-            for(l = 0; scale && (l < 20); l += 2)
-            {
-              __m128d vv = _mm_load_pd(&v[l]);
-              __m128d v1 = _mm_and_pd(vv, absMask.m);
-              v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-              if(_mm_movemask_pd( v1 ) != 3)
-                scale = 0;
-            }             
-          }
-
-
-          if(scale)
-          {
-
-            __m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-
-            for(l = 0; l < 20; l+=2)
-            {
-              __m128d ex3v = _mm_load_pd(&v[l]);
-              _mm_store_pd(&v[l], _mm_mul_pd(ex3v,twoto));                  
-            }
-
-            if(!fastScaling)
-              ex3[i] += 1;
-            else
-              addScale += wgt[i];         
-          }
-        }
-      }
-      break;
-    case PLL_INNER_INNER:
-      for(i = 0; i < n; i++)
-      {
-        le = &left[cptr[i] * 400];
-        ri = &right[cptr[i] * 400];
-
-        vl = &x1[20 * i];
-        vr = &x2[20 * i];
-        v = &x3[20 * i];
-
-
-        for(l = 0; l < 20; l+=2)
-          _mm_store_pd(&v[l], _mm_setzero_pd());                        
-
-
-        for(l = 0; l < 20; l++)
-        {
-
-          __m128d x1v = _mm_setzero_pd();
-          __m128d x2v = _mm_setzero_pd();
-          double 
-            *ev = &extEV[l * 20],
-            *lv = &le[l * 20],
-            *rv = &ri[l * 20];
-
-
-          for(j = 0; j < 20; j+=2)
-          {
-            x1v = _mm_add_pd(x1v, _mm_mul_pd(_mm_load_pd(&vl[j]), _mm_load_pd(&lv[j])));                    
-            x2v = _mm_add_pd(x2v, _mm_mul_pd(_mm_load_pd(&vr[j]), _mm_load_pd(&rv[j])));
-          }
-
-          x1v = _mm_hadd_pd(x1v, x1v);
-          x2v = _mm_hadd_pd(x2v, x2v);
-
-          x1v = _mm_mul_pd(x1v, x2v);
-
-          for(j = 0; j < 20; j+=2)
-          {
-            __m128d vv = _mm_load_pd(&v[j]);
-            vv = _mm_add_pd(vv, _mm_mul_pd(x1v, _mm_load_pd(&ev[j])));
-            _mm_store_pd(&v[j], vv);
-          }                 
-
-        }
-
-        {           
-          __m128d minlikelihood_sse = _mm_set1_pd( PLL_MINLIKELIHOOD );
-
-          scale = 1;
-          for(l = 0; scale && (l < 20); l += 2)
-          {
-            __m128d vv = _mm_load_pd(&v[l]);
-            __m128d v1 = _mm_and_pd(vv, absMask.m);
-            v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-            if(_mm_movemask_pd( v1 ) != 3)
-              scale = 0;
-          }               
-        }
-
-
-        if(scale)
-        {
-
-          __m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-
-          for(l = 0; l < 20; l+=2)
-          {
-            __m128d ex3v = _mm_load_pd(&v[l]);            
-            _mm_store_pd(&v[l], _mm_mul_pd(ex3v,twoto));        
-          }                               
-
-
-          if(!fastScaling)
-            ex3[i] += 1;
-          else
-            addScale += wgt[i];    
-        }
-      }
-      break;
-    default:
-      assert(0);
-  }
-
-  if(fastScaling)
-    *scalerIncrement = addScale;
-}
-
-/** @ingroup group1
- *  @brief Computation of conditional likelihood arrray for GTR CAT with memory saving (Optimized SSE3 version for AA data)
-
-    This is the SSE3 optimized version of ::newviewCAT_FLEX for computing the conditional
-    likelihood arrays at some node \a p, given child nodes \a q and \a r using the \b CAT
-    model of rate heterogeneity.
-
-    @note
-    For more details and function argument description check the function ::newviewCAT_FLEX
-*/
-static void newviewGTRCATPROT_SAVE(int tipCase, double *extEV,
-                                   int *cptr,
-                                   double *x1, double *x2, double *x3, double *tipVector,
-                                   int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                   int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean fastScaling,
-                                   unsigned int *x1_gap, unsigned int *x2_gap, unsigned int *x3_gap,
-                                   double *x1_gapColumn, double *x2_gapColumn, double *x3_gapColumn, const int maxCats)
-{
-  double
-    *le, 
-    *ri, 
-    *v, 
-    *vl, 
-    *vr,
-    *x1_ptr = x1,
-    *x2_ptr = x2, 
-    *x3_ptr = x3;
-
-  int 
-    i, 
-    l, 
-    j, 
-    scale, 
-    scaleGap = 0,
-    addScale = 0;
-
-  {
-    vl = x1_gapColumn;        
-    vr = x2_gapColumn;
-    v = x3_gapColumn;
-
-    le = &left[maxCats * 400];
-    ri = &right[maxCats * 400];   
-
-    for(l = 0; l < 20; l+=2)
-      _mm_store_pd(&v[l], _mm_setzero_pd());                    
-
-    for(l = 0; l < 20; l++)
-    {
-      __m128d x1v = _mm_setzero_pd();
-      __m128d x2v = _mm_setzero_pd();
-      double 
-        *ev = &extEV[l * 20],
-        *lv = &le[l * 20],
-        *rv = &ri[l * 20];
-
-
-      for(j = 0; j < 20; j+=2)
-      {
-        x1v = _mm_add_pd(x1v, _mm_mul_pd(_mm_load_pd(&vl[j]), _mm_load_pd(&lv[j])));                
-        x2v = _mm_add_pd(x2v, _mm_mul_pd(_mm_load_pd(&vr[j]), _mm_load_pd(&rv[j])));
-      }
-
-      x1v = _mm_hadd_pd(x1v, x1v);
-      x2v = _mm_hadd_pd(x2v, x2v);
-
-      x1v = _mm_mul_pd(x1v, x2v);
-
-      for(j = 0; j < 20; j+=2)
-      {
-        __m128d vv = _mm_load_pd(&v[j]);
-        vv = _mm_add_pd(vv, _mm_mul_pd(x1v, _mm_load_pd(&ev[j])));
-        _mm_store_pd(&v[j], vv);
-      }                 
-    }
-
-    if(tipCase != PLL_TIP_TIP)
-    {       
-      __m128d minlikelihood_sse = _mm_set1_pd( PLL_MINLIKELIHOOD );
-
-      scale = 1;
-      for(l = 0; scale && (l < 20); l += 2)
-      {
-        __m128d vv = _mm_load_pd(&v[l]);
-        __m128d v1 = _mm_and_pd(vv, absMask.m);
-        v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-        if(_mm_movemask_pd( v1 ) != 3)
-          scale = 0;
-      }                 
-
-      if(scale)
-      {
-        __m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-
-        for(l = 0; l < 20; l+=2)
-        {
-          __m128d ex3v = _mm_load_pd(&v[l]);              
-          _mm_store_pd(&v[l], _mm_mul_pd(ex3v,twoto));  
-        }                                 
-
-        scaleGap = PLL_TRUE;       
-      }
-    }
-  }
-
-  switch(tipCase)
-  {
-    case PLL_TIP_TIP:
-      {
-        for (i = 0; i < n; i++)
-        {
-          if(noGap(x3_gap, i))
-          {             
-            vl = &(tipVector[20 * tipX1[i]]);
-            vr = &(tipVector[20 * tipX2[i]]);
-            v  = x3_ptr;
-
-            if(isGap(x1_gap, i))
-              le =  &left[maxCats * 400];
-            else                  
-              le =  &left[cptr[i] * 400];         
-
-            if(isGap(x2_gap, i))
-              ri =  &right[maxCats * 400];
-            else                  
-              ri =  &right[cptr[i] * 400];
-
-            for(l = 0; l < 20; l+=2)
-              _mm_store_pd(&v[l], _mm_setzero_pd());                    
-
-            for(l = 0; l < 20; l++)
-            {
-              __m128d x1v = _mm_setzero_pd();
-              __m128d x2v = _mm_setzero_pd();    
-              double 
-                *ev = &extEV[l * 20],
-                *lv = &le[l * 20],
-                *rv = &ri[l * 20];
-
-              for(j = 0; j < 20; j+=2)
-              {
-                x1v = _mm_add_pd(x1v, _mm_mul_pd(_mm_load_pd(&vl[j]), _mm_load_pd(&lv[j])));                
-                x2v = _mm_add_pd(x2v, _mm_mul_pd(_mm_load_pd(&vr[j]), _mm_load_pd(&rv[j])));
-              }
-
-              x1v = _mm_hadd_pd(x1v, x1v);
-              x2v = _mm_hadd_pd(x2v, x2v);
-
-              x1v = _mm_mul_pd(x1v, x2v);
-
-              for(j = 0; j < 20; j+=2)
-              {
-                __m128d vv = _mm_load_pd(&v[j]);
-                vv = _mm_add_pd(vv, _mm_mul_pd(x1v, _mm_load_pd(&ev[j])));
-                _mm_store_pd(&v[j], vv);
-              }            
-            }
-
-            x3_ptr += 20;
-
-          }   
-        }
-      }
-      break;
-    case PLL_TIP_INNER:
-      {
-        for (i = 0; i < n; i++)
-        {
-          if(isGap(x3_gap, i))
-          {
-            if(scaleGap)
-              {
-                if(!fastScaling)
-                  ex3[i] += 1;
-                else
-                  addScale += wgt[i];
-              }
-          }
-          else
-          {      
-            vl = &(tipVector[20 * tipX1[i]]);
-
-            vr = x2_ptr;
-            v = x3_ptr;
-
-            if(isGap(x1_gap, i))
-              le =  &left[maxCats * 400];
-            else
-              le =  &left[cptr[i] * 400];
-
-            if(isGap(x2_gap, i))
-            {            
-              ri =  &right[maxCats * 400];
-              vr = x2_gapColumn;
-            }
-            else
-            {
-              ri =  &right[cptr[i] * 400];
-              vr = x2_ptr;
-              x2_ptr += 20;
-            }                                             
-
-            for(l = 0; l < 20; l+=2)
-              _mm_store_pd(&v[l], _mm_setzero_pd());                               
-
-            for(l = 0; l < 20; l++)
-            {
-              __m128d x1v = _mm_setzero_pd();
-              __m128d x2v = _mm_setzero_pd();   
-              double 
-                *ev = &extEV[l * 20],
-                *lv = &le[l * 20],
-                *rv = &ri[l * 20];
-
-              for(j = 0; j < 20; j+=2)
-              {
-                x1v = _mm_add_pd(x1v, _mm_mul_pd(_mm_load_pd(&vl[j]), _mm_load_pd(&lv[j])));                
-                x2v = _mm_add_pd(x2v, _mm_mul_pd(_mm_load_pd(&vr[j]), _mm_load_pd(&rv[j])));
-              }
-
-              x1v = _mm_hadd_pd(x1v, x1v);
-              x2v = _mm_hadd_pd(x2v, x2v);
-
-              x1v = _mm_mul_pd(x1v, x2v);
-
-              for(j = 0; j < 20; j+=2)
-              {
-                __m128d vv = _mm_load_pd(&v[j]);
-                vv = _mm_add_pd(vv, _mm_mul_pd(x1v, _mm_load_pd(&ev[j])));
-                _mm_store_pd(&v[j], vv);
-              }             
-            }
-
-            {       
-              __m128d minlikelihood_sse = _mm_set1_pd( PLL_MINLIKELIHOOD );
-
-              scale = 1;
-              for(l = 0; scale && (l < 20); l += 2)
-              {
-                __m128d vv = _mm_load_pd(&v[l]);
-                __m128d v1 = _mm_and_pd(vv, absMask.m);
-                v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-                if(_mm_movemask_pd( v1 ) != 3)
-                  scale = 0;
-              }           
-            }
-
-
-            if(scale)
-            {
-              __m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-
-              for(l = 0; l < 20; l+=2)
-              {
-                __m128d ex3v = _mm_load_pd(&v[l]);
-                _mm_store_pd(&v[l], _mm_mul_pd(ex3v,twoto));                
-              }
-              
-              if(!fastScaling)
-                ex3[i] += 1;
-              else
-                addScale += wgt[i];       
-            }
-            x3_ptr += 20;
-          }
-        }
-      }
-      break;
-    case PLL_INNER_INNER:
-      for(i = 0; i < n; i++)
-      { 
-        if(isGap(x3_gap, i))
-        {
-          if(scaleGap)
-            {
-              if(!fastScaling)
-                ex3[i] += 1;
-              else
-                addScale += wgt[i];
-            }
-        }
-        else
-        {                    
-          v = x3_ptr;
-
-          if(isGap(x1_gap, i))
-          {
-            vl = x1_gapColumn;
-            le =  &left[maxCats * 400];
-          }
-          else
-          {
-            le =  &left[cptr[i] * 400];
-            vl = x1_ptr;
-            x1_ptr += 20;
-          }
-
-          if(isGap(x2_gap, i))  
-          {
-            vr = x2_gapColumn;
-            ri =  &right[maxCats * 400];            
-          }
-          else
-          {
-            ri =  &right[cptr[i] * 400];
-            vr = x2_ptr;
-            x2_ptr += 20;
-          }                               
-
-          for(l = 0; l < 20; l+=2)
-            _mm_store_pd(&v[l], _mm_setzero_pd());                      
-
-          for(l = 0; l < 20; l++)
-          {
-            __m128d x1v = _mm_setzero_pd();
-            __m128d x2v = _mm_setzero_pd();
-            double 
-              *ev = &extEV[l * 20],
-              *lv = &le[l * 20],
-              *rv = &ri[l * 20];
-
-            for(j = 0; j < 20; j+=2)
-            {
-              x1v = _mm_add_pd(x1v, _mm_mul_pd(_mm_load_pd(&vl[j]), _mm_load_pd(&lv[j])));                  
-              x2v = _mm_add_pd(x2v, _mm_mul_pd(_mm_load_pd(&vr[j]), _mm_load_pd(&rv[j])));
-            }
-
-            x1v = _mm_hadd_pd(x1v, x1v);
-            x2v = _mm_hadd_pd(x2v, x2v);
-
-            x1v = _mm_mul_pd(x1v, x2v);
-
-            for(j = 0; j < 20; j+=2)
-            {
-              __m128d vv = _mm_load_pd(&v[j]);
-              vv = _mm_add_pd(vv, _mm_mul_pd(x1v, _mm_load_pd(&ev[j])));
-              _mm_store_pd(&v[j], vv);
-            }               
-
-          }
-
-          {         
-            __m128d minlikelihood_sse = _mm_set1_pd( PLL_MINLIKELIHOOD );
-
-            scale = 1;
-            for(l = 0; scale && (l < 20); l += 2)
-            {
-              __m128d vv = _mm_load_pd(&v[l]);
-              __m128d v1 = _mm_and_pd(vv, absMask.m);
-              v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-              if(_mm_movemask_pd( v1 ) != 3)
-                scale = 0;
-            }             
-          }
-
-          if(scale)
-          {
-            __m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-
-            for(l = 0; l < 20; l+=2)
-            {
-              __m128d ex3v = _mm_load_pd(&v[l]);                  
-              _mm_store_pd(&v[l], _mm_mul_pd(ex3v,twoto));      
-            }                             
-
-            if(!fastScaling)
-              ex3[i] += 1;
-            else
-              addScale += wgt[i];          
-          }
-          x3_ptr += 20;
-        }
-      }
-      break;
-    default:
-      assert(0);
-  }
-
-  if(fastScaling)
-    *scalerIncrement = addScale;
-}
-
-
-/** @ingroup group1
- *  @brief Computation of conditional likelihood arrray for the GTR GAMMA and for the LG4 model (Optimized SSE3 version for AA data)
-
-    This is the SSE3 optimized version of ::newviewGAMMA_FLEX for computing the conditional
-    likelihood arrays at some node \a p, given child nodes \a q and \a r using the \b GAMMA
-    model of rate heterogeneity and the LG4 model of evolution. Note that the original unoptimized
-    function does not incorporate the LG4 model.
-
-    @note
-    For more details and function argument description check the function ::newviewGAMMA_FLEX
-*/
-static void newviewGTRGAMMAPROT_LG4(int tipCase,
-                                    double *x1, double *x2, double *x3, double *extEV[4], double *tipVector[4],
-                                    int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                    int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling)
-{
-  double  *uX1, *uX2, *v;
-  double x1px2;
-  int  i, j, l, k, scale, addScale = 0;
-  double *vl, *vr;
-#ifndef __SSE3
-  double al, ar;
-#endif
-
-
-
-  switch(tipCase)
-    {
-    case PLL_TIP_TIP:
-      {
-        double umpX1[1840], umpX2[1840];
-
-        for(i = 0; i < 23; i++)
-          {
-           
-
-            for(k = 0; k < 80; k++)
-              {
-                
-                v = &(tipVector[k / 20][20 * i]);
-#ifdef __SSE3
-                double *ll =  &left[k * 20];
-                double *rr =  &right[k * 20];
-                
-                __m128d umpX1v = _mm_setzero_pd();
-                __m128d umpX2v = _mm_setzero_pd();
-
-                for(l = 0; l < 20; l+=2)
-                  {
-                    __m128d vv = _mm_load_pd(&v[l]);
-                    umpX1v = _mm_add_pd(umpX1v, _mm_mul_pd(vv, _mm_load_pd(&ll[l])));
-                    umpX2v = _mm_add_pd(umpX2v, _mm_mul_pd(vv, _mm_load_pd(&rr[l])));                                   
-                  }
-                
-                umpX1v = _mm_hadd_pd(umpX1v, umpX1v);
-                umpX2v = _mm_hadd_pd(umpX2v, umpX2v);
-                
-                _mm_storel_pd(&umpX1[80 * i + k], umpX1v);
-                _mm_storel_pd(&umpX2[80 * i + k], umpX2v);
-#else
-                umpX1[80 * i + k] = 0.0;
-                umpX2[80 * i + k] = 0.0;
-
-                for(l = 0; l < 20; l++)
-                  {
-                    umpX1[80 * i + k] +=  v[l] *  left[k * 20 + l];
-                    umpX2[80 * i + k] +=  v[l] * right[k * 20 + l];
-                  }
-#endif
-              }
-          }
-
-        for(i = 0; i < n; i++)
-          {
-            uX1 = &umpX1[80 * tipX1[i]];
-            uX2 = &umpX2[80 * tipX2[i]];
-
-            for(j = 0; j < 4; j++)
-              {
-                v = &x3[i * 80 + j * 20];
-
-#ifdef __SSE3
-                __m128d zero =  _mm_setzero_pd();
-                for(k = 0; k < 20; k+=2)                                    
-                  _mm_store_pd(&v[k], zero);
-
-                for(k = 0; k < 20; k++)
-                  { 
-                    double *eev = &extEV[j][k * 20];
-                    x1px2 = uX1[j * 20 + k] * uX2[j * 20 + k];
-                    __m128d x1px2v = _mm_set1_pd(x1px2);
-
-                    for(l = 0; l < 20; l+=2)
-                      {
-                        __m128d vv = _mm_load_pd(&v[l]);
-                        __m128d ee = _mm_load_pd(&eev[l]);
-
-                        vv = _mm_add_pd(vv, _mm_mul_pd(x1px2v,ee));
-                        
-                        _mm_store_pd(&v[l], vv);
-                      }
-                  }
-
-#else
-
-                for(k = 0; k < 20; k++)
-                  v[k] = 0.0;
-
-                for(k = 0; k < 20; k++)
-                  {                
-                    x1px2 = uX1[j * 20 + k] * uX2[j * 20 + k];
-                   
-                    for(l = 0; l < 20; l++)                                                     
-                      v[l] += x1px2 * extEV[j][20 * k + l];                  
-                  }
-#endif
-              }    
-          }
-      }
-      break;
-    case PLL_TIP_INNER:
-      {
-        double umpX1[1840], ump_x2[20];
-
-
-        for(i = 0; i < 23; i++)
-          {
-           
-
-            for(k = 0; k < 80; k++)
-              { 
-                v = &(tipVector[k / 20][20 * i]);
-#ifdef __SSE3
-                double *ll =  &left[k * 20];
-                                
-                __m128d umpX1v = _mm_setzero_pd();
-                
-                for(l = 0; l < 20; l+=2)
-                  {
-                    __m128d vv = _mm_load_pd(&v[l]);
-                    umpX1v = _mm_add_pd(umpX1v, _mm_mul_pd(vv, _mm_load_pd(&ll[l])));                                                   
-                  }
-                
-                umpX1v = _mm_hadd_pd(umpX1v, umpX1v);                           
-                _mm_storel_pd(&umpX1[80 * i + k], umpX1v);              
-#else       
-                umpX1[80 * i + k] = 0.0;
-
-                for(l = 0; l < 20; l++)
-                  umpX1[80 * i + k] +=  v[l] * left[k * 20 + l];
-#endif
-
-              }
-          }
-
-        for (i = 0; i < n; i++)
-          {
-            uX1 = &umpX1[80 * tipX1[i]];
-
-            for(k = 0; k < 4; k++)
-              {
-                v = &(x2[80 * i + k * 20]);
-#ifdef __SSE3              
-                for(l = 0; l < 20; l++)
-                  {                
-                    double *r =  &right[k * 400 + l * 20];
-                    __m128d ump_x2v = _mm_setzero_pd();     
-                    
-                    for(j = 0; j < 20; j+= 2)
-                      {
-                        __m128d vv = _mm_load_pd(&v[j]);
-                        __m128d rr = _mm_load_pd(&r[j]);
-                        ump_x2v = _mm_add_pd(ump_x2v, _mm_mul_pd(vv, rr));
-                      }
-                     
-                    ump_x2v = _mm_hadd_pd(ump_x2v, ump_x2v);
-                    
-                    _mm_storel_pd(&ump_x2[l], ump_x2v);                              
-                  }
-
-                v = &(x3[80 * i + 20 * k]);
-
-                __m128d zero =  _mm_setzero_pd();
-                for(l = 0; l < 20; l+=2)                                    
-                  _mm_store_pd(&v[l], zero);
-                  
-                for(l = 0; l < 20; l++)
-                  {
-                    double *eev = &extEV[k][l * 20];
-                    x1px2 = uX1[k * 20 + l]  * ump_x2[l];
-                    __m128d x1px2v = _mm_set1_pd(x1px2);
-                  
-                    for(j = 0; j < 20; j+=2)
-                      {
-                        __m128d vv = _mm_load_pd(&v[j]);
-                        __m128d ee = _mm_load_pd(&eev[j]);
-                        
-                        vv = _mm_add_pd(vv, _mm_mul_pd(x1px2v,ee));
-                        
-                        _mm_store_pd(&v[j], vv);
-                      }                             
-                  }                     
-#else
-                for(l = 0; l < 20; l++)
-                  {
-                    ump_x2[l] = 0.0;
-
-                    for(j = 0; j < 20; j++)
-                      ump_x2[l] += v[j] * right[k * 400 + l * 20 + j];
-                  }
-
-                v = &(x3[80 * i + 20 * k]);
-
-                for(l = 0; l < 20; l++)
-                  v[l] = 0;
-
-                for(l = 0; l < 20; l++)
-                  {
-                    x1px2 = uX1[k * 20 + l]  * ump_x2[l];
-                    for(j = 0; j < 20; j++)
-                      v[j] += x1px2 * extEV[k][l * 20  + j];
-                  }
-#endif
-              }
-           
-#ifdef __SSE3
-            { 
-              v = &(x3[80 * i]);
-              __m128d minlikelihood_sse = _mm_set1_pd( PLL_MINLIKELIHOOD );
-              
-              scale = 1;
-              for(l = 0; scale && (l < 80); l += 2)
-                {
-                  __m128d vv = _mm_load_pd(&v[l]);
-                  __m128d v1 = _mm_and_pd(vv, absMask.m);
-                  v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-                  if(_mm_movemask_pd( v1 ) != 3)
-                    scale = 0;
-                }                 
-            }
-#else
-            v = &x3[80 * i];
-            scale = 1;
-            for(l = 0; scale && (l < 80); l++)
-              scale = (PLL_ABS(v[l]) <  PLL_MINLIKELIHOOD );
-#endif
-
-            if (scale)
-              {
-#ifdef __SSE3
-               __m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-               
-               for(l = 0; l < 80; l+=2)
-                 {
-                   __m128d ex3v = _mm_load_pd(&v[l]);             
-                   _mm_store_pd(&v[l], _mm_mul_pd(ex3v,twoto)); 
-                 }                                
-#else
-                for(l = 0; l < 80; l++)
-                  v[l] *= PLL_TWOTOTHE256;
-#endif
-
-                if(useFastScaling)
-                  addScale += wgt[i];
-                else
-                  ex3[i]  += 1;        
-              }
-          }
-      }
-      break;
-    case PLL_INNER_INNER:
-      for (i = 0; i < n; i++)
-       {
-         for(k = 0; k < 4; k++)
-           {
-             vl = &(x1[80 * i + 20 * k]);
-             vr = &(x2[80 * i + 20 * k]);
-             v =  &(x3[80 * i + 20 * k]);
-
-#ifdef __SSE3
-             __m128d zero =  _mm_setzero_pd();
-             for(l = 0; l < 20; l+=2)                               
-               _mm_store_pd(&v[l], zero);
-#else
-             for(l = 0; l < 20; l++)
-               v[l] = 0;
-#endif
-
-             for(l = 0; l < 20; l++)
-               {                 
-#ifdef __SSE3
-                 {
-                   __m128d al = _mm_setzero_pd();
-                   __m128d ar = _mm_setzero_pd();
-
-                   double *ll   = &left[k * 400 + l * 20];
-                   double *rr   = &right[k * 400 + l * 20];
-                   double *EVEV = &extEV[k][20 * l];
-                   
-                   for(j = 0; j < 20; j+=2)
-                     {
-                       __m128d lv  = _mm_load_pd(&ll[j]);
-                       __m128d rv  = _mm_load_pd(&rr[j]);
-                       __m128d vll = _mm_load_pd(&vl[j]);
-                       __m128d vrr = _mm_load_pd(&vr[j]);
-                       
-                       al = _mm_add_pd(al, _mm_mul_pd(vll, lv));
-                       ar = _mm_add_pd(ar, _mm_mul_pd(vrr, rv));
-                     }                   
-                       
-                   al = _mm_hadd_pd(al, al);
-                   ar = _mm_hadd_pd(ar, ar);
-                   
-                   al = _mm_mul_pd(al, ar);
-
-                   for(j = 0; j < 20; j+=2)
-                     {
-                       __m128d vv  = _mm_load_pd(&v[j]);
-                       __m128d EVV = _mm_load_pd(&EVEV[j]);
-
-                       vv = _mm_add_pd(vv, _mm_mul_pd(al, EVV));
-
-                       _mm_store_pd(&v[j], vv);
-                     }                                            
-                 }               
-#else
-                 al = 0.0;
-                 ar = 0.0;
-
-                 for(j = 0; j < 20; j++)
-                   {
-                     al += vl[j] * left[k * 400 + l * 20 + j];
-                     ar += vr[j] * right[k * 400 + l * 20 + j];
-                   }
-
-                 x1px2 = al * ar;
-
-                 for(j = 0; j < 20; j++)
-                   v[j] += x1px2 * extEV[k][20 * l + j];
-#endif
-               }
-           }
-         
-
-#ifdef __SSE3
-         { 
-           v = &(x3[80 * i]);
-           __m128d minlikelihood_sse = _mm_set1_pd( PLL_MINLIKELIHOOD );
-           
-           scale = 1;
-           for(l = 0; scale && (l < 80); l += 2)
-             {
-               __m128d vv = _mm_load_pd(&v[l]);
-               __m128d v1 = _mm_and_pd(vv, absMask.m);
-               v1 = _mm_cmplt_pd(v1,  minlikelihood_sse);
-               if(_mm_movemask_pd( v1 ) != 3)
-                 scale = 0;
-             }            
-         }
-#else
-         v = &(x3[80 * i]);
-         scale = 1;
-         for(l = 0; scale && (l < 80); l++)
-           scale = ((PLL_ABS(v[l]) <  PLL_MINLIKELIHOOD ));
-#endif
-
-         if (scale)
-           {
-#ifdef __SSE3
-               __m128d twoto = _mm_set_pd(PLL_TWOTOTHE256, PLL_TWOTOTHE256);
-               
-               for(l = 0; l < 80; l+=2)
-                 {
-                   __m128d ex3v = _mm_load_pd(&v[l]);             
-                   _mm_store_pd(&v[l], _mm_mul_pd(ex3v,twoto)); 
-                 }                                
-#else        
-             for(l = 0; l < 80; l++)
-               v[l] *= PLL_TWOTOTHE256;
-#endif
-
-             if(useFastScaling)
-               addScale += wgt[i];
-             else
-               ex3[i]  += 1;      
-           }
-       }
-      break;
-    default:
-      assert(0);
-    }
-
-  if(useFastScaling)
-    *scalerIncrement = addScale;
-
-}
-#endif
-
-
diff --git a/pllrepo/src/optimizeModel.c b/pllrepo/src/optimizeModel.c
deleted file mode 100644
index dde1b95..0000000
--- a/pllrepo/src/optimizeModel.c
+++ /dev/null
@@ -1,3145 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file optimizeModel.c
- *
- * @brief Model optimization routines
- */ 
-
-#include "mem_alloc.h"
-
-#ifndef WIN32
-#include <unistd.h>
-#endif
-
-#include <math.h>
-#include <time.h> 
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <assert.h>
-
-#include "pll.h"
-#include "pllInternal.h"
-
-static const double MNBRAK_GOLD =    1.618034;          /**< Golden ratio */
-static const double MNBRAK_TINY =      1.e-20;
-static const double MNBRAK_GLIMIT =     100.0;
-static const double BRENT_ZEPS  =       1.e-5;
-static const double BRENT_CGOLD =   0.3819660;
-
-extern int optimizeRatesInvocations;
-extern int optimizeAlphaInvocations;
-extern int optimizeInvarInvocations;
-extern char ratesFileName[1024];
-extern char lengthFileName[1024];
-extern char lengthFileNameModel[1024];
-extern char *protModels[PLL_NUM_PROT_MODELS];
-
-static void optParamGeneric(pllInstance *tr, partitionList * pr, double modelEpsilon, linkageList *ll, int numberOfModels, int rateNumber, double lim_inf, double lim_sup, int whichParameterType);
-// FLAG for easier debugging of model parameter optimization routines 
-
-//#define _DEBUG_MOD_OPT
-
-
-/*********************FUNCTIONS FOR EXACT MODEL OPTIMIZATION UNDER GTRGAMMA ***************************************/
-
-
-/* the following function is used to set rates in the Q matrix 
-   the data structure called symmetryVector is used to 
-   define the symmetries between rates as they are specified 
-   in some of the secondary structure substitution models that 
-   generally don't use GTR matrices but more restricted forms thereof */
-
-/** @brief Set a specific rate in the substitition matrix
-  *
-  * This function is used to set the \a position-th substitution rate of
-  * partition \a index to \a rate.
-  *
-  * @param pr
-  *   List of partitions
-  *
-  * @param model
-  *   Index of partition
-  *
-  * @param rate
-  *   The new value to which to set the specific substition rate
-  *
-  * @param posititon
-  *   The number of the substition rate
-  */
-static void setRateModel(partitionList *pr, int model, double rate, int position)
-{
-  int
-    states   = pr->partitionData[model]->states,
-    numRates = (states * states - states) / 2;
-
-  if(pr->partitionData[model]->dataType == PLL_DNA_DATA)
-    assert(position >= 0 && position < (numRates - 1));
-  else
-    assert(position >= 0 && position < numRates);
-
-  assert(pr->partitionData[model]->dataType != PLL_BINARY_DATA);
-
-  assert(rate >= PLL_RATE_MIN && rate <= PLL_RATE_MAX);
-
-  if(pr->partitionData[model]->nonGTR)
-    {    
-      int 
-        i, 
-        index    = pr->partitionData[model]->symmetryVector[position],
-        lastRate = pr->partitionData[model]->symmetryVector[numRates - 1];
-           
-      for(i = 0; i < numRates; i++)
-        {       
-          if(pr->partitionData[model]->symmetryVector[i] == index)
-            {
-              if(index == lastRate)
-                pr->partitionData[model]->substRates[i] = 1.0;
-              else
-                pr->partitionData[model]->substRates[i] = rate;      
-            }
-          
-          //printf("%f ", tr->partitionData[model].substRates[i]);
-        }
-      //printf("\n");
-    }
-  else
-    pr->partitionData[model]->substRates[position] = rate;
-}
-
-//LIBRARY: the only thing that we will need to do here is to 
-//replace linkList by a string and also add some error correction 
-//code
-
-/* 
-   the following three functions are used to link/unlink parameters 
-   between partitions. This should work in a generic way, however 
-   this is so far mainly used for linking unlinking GTR matrix parameter 
-   estimates across different protein data partitions.
-   Generally this mechanism can also be used for linking/inlinking alpha paremeters 
-   between partitions and the like.
-   However, all alpha parameter estimates for all partitions and GTR estimates for 
-   DNA partitions are unlinked by default. This is actually hard-coded 
-   in here. 
-*/
-
-/* initializwe a parameter linkage list for a certain parameter type (can be whatever).
-   the input is an integer vector that contaions NumberOfModels (numberOfPartitions) elements.
-
-   if we want to have all alpha parameters unlinked and have say 4 partitions the input 
-   vector would look like this: {0, 1, 2, 3}, if we want to link partitions 0 and 3 the vector 
-   should look like this: {0, 1, 2, 0} 
-*/
-
-
-
-
-
-
-/* dedicated helper function to initialize the linkage list, that is, essentiaylly compute 
-   the integer vector int *linkList used above for linking GTR models.
-   
-   Once again, this is hard-coded in RAxML, because users can not influence the linking.
-
-*/
-   
-
-/* free linkage list data structure */
-
-#define ALPHA_F    0
-#define RATE_F     1
-#define FREQ_F     2
-#define LXRATE_F   3
-#define LXWEIGHT_F 4
-
-static void updateWeights(partitionList *pr, int model, int rate, double value)
-{
-    int j;
-    double w = 0.0;
-    assert(rate >= 0 && rate < 4);
-    pr->partitionData[model]->lg4x_weightExponents[rate] = value;
-    for (j = 0; j < 4; j++)
-        w += exp(pr->partitionData[model]->lg4x_weightExponents[j]);
-    for (j = 0; j < 4; j++)
-        pr->partitionData[model]->lg4x_weights[j] = exp(
-                pr->partitionData[model]->lg4x_weightExponents[j]) / w;
-}
-
-static void optimizeWeights(pllInstance *tr, partitionList *pr, double modelEpsilon, linkageList *ll,
-        int numberOfModels)
-{
-    int i;
-    double initialLH = 0.0, finalLH = 0.0;
-    pllEvaluateLikelihood(tr, pr, tr->start, PLL_FALSE, PLL_FALSE);
-    initialLH = tr->likelihood;
-    for (i = 0; i < 4; i++)
-        optParamGeneric(tr, pr, modelEpsilon, ll, numberOfModels, i, -1000000.0,
-                200.0, LXWEIGHT_F);
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-    pllMasterBarrier(tr, pr, PLL_THREAD_COPY_LG4X_RATES);
-#endif
-
-    pllEvaluateLikelihood(tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-    finalLH = tr->likelihood;
-    if (finalLH < initialLH)
-        printf("Final: %f initial: %f\n", finalLH, initialLH);
-    assert(finalLH >= initialLH);
-}
-
-/** @brief Wrapper function for changing a specific model parameter to the specified value
-  *
-  * Change the \a rateNumber-th model parameter of the type specified by \a whichParameterType to
-  * the value \a value.
-  * This routine is usually called by model optimization routines to restore the original
-  * model parameter vlaue when optimization leads to worse likelihood than the original, or
-  * when optimizing routines and testing the new parameter.
-  * In case of changing a frequency or substitution rate the Q matrix is also decomposed (into
-  * eigenvalues and eigenvectors)
-  *
-  * @param index
-  *   Index of partition
-  *
-  * @param rateNumber
-  *   The index of the model parameter
-  *
-  * @param value
-  *   The value to which the parameter must be changed
-  *
-  * @param whichParameterType
-  *   Type of model parameter. Can be \b RATE_F, \b ALPHA_F or \b FREQ_F, that is substitution rates,
-  *   alpha rates, or base frequencies rates
-  */   
-static void changeModelParameters(int index, int rateNumber, double value, int whichParameterType, pllInstance *tr, partitionList * pr)
-{
-  switch(whichParameterType)
-    {
-    case RATE_F:
-      setRateModel(pr, index, value, rateNumber);  
-      pllInitReversibleGTR(tr, pr, index);          
-      break;
-    case ALPHA_F:
-      pr->partitionData[index]->alpha = value;
-      pllMakeGammaCats(pr->partitionData[index]->alpha, pr->partitionData[index]->gammaRates, 4, tr->useMedian);
-      break;
-    case FREQ_F:
-      {
-        int 
-          states = pr->partitionData[index]->states,
-          j;
-
-        double 
-          w = 0.0;
-
-        pr->partitionData[index]->freqExponents[rateNumber] = value;
-
-        for(j = 0; j < states; j++)
-          w += exp(pr->partitionData[index]->freqExponents[j]);
-
-        for(j = 0; j < states; j++)              
-          pr->partitionData[index]->frequencies[j] = exp(pr->partitionData[index]->freqExponents[j]) / w;
-        
-        pllInitReversibleGTR(tr, pr, index);
-      }
-      break;
-    case LXRATE_F:
-        pr->partitionData[index]->gammaRates[rateNumber] = value;
-        break;
-    case LXWEIGHT_F:
-        updateWeights(pr, index, rateNumber, value);
-        break;
-    default:
-      assert(0);
-    }
-}
-
-/* function that evaluates the change to a parameter */
-/** @brief Evaluate the change of a parameter
- *
- *  Evaluate the likelihood for each entry \a i in the linkage list when changing the
- *  \a rateNumber-th parameter of type \a whichFunction (\b ALPHA_F, \b RATE_F 
- *  or \b FREQ_F) to \a value[i]. The resulting likelihood for each entry list \a i in the
- *  linkage list is then stored in \a result[i]
- *
- *  @param tr
- *    PLL instance
- *
- *  @param pr
- *    List of partitions
- *
- *  @param rateNumber
- *    Index of the parameter to optimize 
- *
- *  @param value
- *
- *  @param result
- *    An array where the total likelihood of each entry list \a i in the linkage list \a ll  is stored when evaluating the new \a i-th parameter of array \a value
- *
- *  @param converged
- *
- *  @param whichFunction
- *    Type of the model parameter. Possible values are \b ALPHA_F, \b RATE_F and \b FREQ_F
- *
- *  @param numberOfModels
- *    Number of partitions for which we are optimizing 
- *
- *  @param ll
- *    Linkage list
- *
- *  @param modelEpsilon
- *    Epsilon threshold
- */
-static void evaluateChange(pllInstance *tr, partitionList *pr, int rateNumber, double *value, double *result, pllBoolean* converged, int whichFunction, int numberOfModels, linkageList *ll, double modelEpsilon)
-{ 
-  int 
-    i, 
-    k, 
-    pos;
-
-  pllBoolean
-    atLeastOnePartition = PLL_FALSE;
-
-  for(i = 0, pos = 0; i < ll->entries; i++)
-    {
-      if(ll->ld[i].valid)
-        {
-          if(converged[pos])
-            {
-              for(k = 0; k < ll->ld[i].partitions; k++)
-                pr->partitionData[ll->ld[i].partitionList[k]]->executeModel = PLL_FALSE;
-            }
-          else
-            {
-              atLeastOnePartition = PLL_TRUE;
-              for(k = 0; k < ll->ld[i].partitions; k++)
-                {
-                  int 
-                    index = ll->ld[i].partitionList[k];
-
-
-                  changeModelParameters(index, rateNumber, value[pos], whichFunction, tr, pr);
-                }
-            }
-          pos++;
-        }
-      else
-        {
-          for(k = 0; k < ll->ld[i].partitions; k++)
-            pr->partitionData[ll->ld[i].partitionList[k]]->executeModel = PLL_FALSE;
-        }      
-    }
-
-  assert(pos == numberOfModels);
-
-    //some error checks for individual model parameters
-    switch (whichFunction)
-    {
-    case RATE_F:
-        assert(rateNumber != -1);
-        break;
-    case ALPHA_F:
-        break;
-    case LXRATE_F:
-        assert(rateNumber != -1);
-        break;
-    case LXWEIGHT_F:
-        assert(rateNumber != -1);
-        break;
-    case FREQ_F:
-        break;
-    default:
-        assert(0);
-    }
-
-    switch (whichFunction)
-    {
-    case RATE_F:
-    case ALPHA_F:
-    case LXRATE_F:
-    case FREQ_F:
-        pllEvaluateLikelihood(tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-        break;
-    case LXWEIGHT_F:
-        pllEvaluateLikelihood(tr, pr, tr->start, PLL_FALSE, PLL_FALSE);
-        break;
-    default:
-        assert(0);
-    }
-    //nested optimization for LX4 model, now optimize the weights!
-    if (whichFunction == LXRATE_F && atLeastOnePartition)
-    {
-        pllBoolean *buffer = (pllBoolean*) malloc(
-                pr->numberOfPartitions* sizeof(pllBoolean));
-
-        for (i = 0; i < pr->numberOfPartitions; i++) {
-            buffer[i] = pr->partitionData[i]->executeModel;
-            pr->partitionData[i]->executeModel = PLL_FALSE;
-        }
-
-        for (i = 0, pos = 0; i < ll->entries; i++)
-        {
-            int index = ll->ld[i].partitionList[0];
-            if (ll->ld[i].valid)
-                pr->partitionData[index]->executeModel = PLL_TRUE;
-        }
-        optimizeWeights(tr, pr, modelEpsilon, ll, numberOfModels);
-
-        for (i = 0; i < pr->numberOfPartitions; i++) {
-            pr->partitionData[i]->executeModel = buffer[i];
-        }
-
-        free(buffer);
-    }
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-
-   switch (whichFunction)
-    {
-      case RATE_F:
-        pllMasterBarrier(tr, pr, PLL_THREAD_OPT_RATE);
-        break;
-      case ALPHA_F:
-        pllMasterBarrier(tr, pr, PLL_THREAD_OPT_ALPHA);
-        break;
-      case FREQ_F:
-        pllMasterBarrier(tr, pr, PLL_THREAD_OPT_RATE);
-        break;
-      case LXRATE_F:
-        pllMasterBarrier(tr, pr, PLL_THREAD_OPT_LG4X_RATE);
-        break;
-      case LXWEIGHT_F:
-        pllMasterBarrier(tr, pr, PLL_THREAD_OPT_LG4X_RATE);
-        break;
-      default:
-        break;
-    }
-#else
-   //commented out evaluate below in the course of the LG4X integration
-   //pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-#endif     
-
-
-  for(i = 0, pos = 0; i < ll->entries; i++)     
-    {
-      if(ll->ld[i].valid)
-        {
-          result[pos] = 0.0;
-          
-          for(k = 0; k < ll->ld[i].partitions; k++)
-            {
-              int 
-                index = ll->ld[i].partitionList[k];
-
-              assert(pr->partitionData[index]->partitionLH <= 0.0);
-              result[pos] -= pr->partitionData[index]->partitionLH;
-              
-            }
-          pos++;
-        }
-      for(k = 0; k < ll->ld[i].partitions; k++)
-        {
-          int index = ll->ld[i].partitionList[k];
-          pr->partitionData[index]->executeModel = PLL_TRUE;
-        }         
-    }
-  
-  assert(pos == numberOfModels);   
-}
-
-/* generic implementation of Brent's algorithm for one-dimensional parameter optimization */
-
-/** @brief Brent's algorithm
- *
- *  Generic implementation of Brent's algorithm for one-dimensional parameter optimization
- *
- *  @param ax
- *
- *  @param bx
- *
- *  @param cx
- *
- *  @param fb
- *
- *  @param tol
- *
- *  @param xmin
- *
- *  @param result
- *
- *  @param numberOfModels
- *    Number of partitions for which we are optimizing 
- *
- *  @param whichFunction
- *    Type of the model parameter. Possible values are \b ALPHA_F, \b RATE_F and \b FREQ_F
- *
- *  @param rateNumber
- *     Index of the parameter to optimize 
- *   
- *  @param tr
- *    PLL instance
- *
- *  @param pr
- *    List of partitions
- *
- *  @param ll
- *    Linkage list
- *
- *  @param lim_inf
- *    Lower bound for the rate assignment
- *
- *  @param lim_sup
- *    Upper bound for the rate assignment
- *
- *  @todo
- *     Fill the rest of the entries. Also, why not preallocate all memory instead of allocating
- *     at every call? We can save a lot of time which is lost due to function calls, finding free
- *     memory blocks by allocation strategy, and also prevent mem fragmentation.
- */
-static void brentGeneric(double *ax, double *bx, double *cx, double *fb, double tol, double *xmin, double *result, int numberOfModels, 
-                         int whichFunction, int rateNumber, pllInstance *tr, partitionList *pr, linkageList *ll, double lim_inf, double lim_sup)
-{
-  int iter, i;
-  double 
-    *a     = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *b     = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *d     = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *etemp = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *fu    = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *fv    = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *fw    = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *fx    = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *p     = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *q     = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *r     = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *tol1  = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *tol2  = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *u     = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *v     = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *w     = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *x     = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *xm    = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *e     = (double *)rax_malloc(sizeof(double) * numberOfModels);
-  pllBoolean *converged = (pllBoolean *)rax_malloc(sizeof(pllBoolean) * numberOfModels);
-  pllBoolean allConverged;
-  
-  for(i = 0; i < numberOfModels; i++)    
-    converged[i] = PLL_FALSE;
-
-  for(i = 0; i < numberOfModels; i++)
-    {
-      e[i] = 0.0;
-      d[i] = 0.0;
-    }
-
-  for(i = 0; i < numberOfModels; i++)
-    {
-      a[i]=((ax[i] < cx[i]) ? ax[i] : cx[i]);
-      b[i]=((ax[i] > cx[i]) ? ax[i] : cx[i]);
-      x[i] = w[i] = v[i] = bx[i];
-      fw[i] = fv[i] = fx[i] = fb[i];
-    }
-
-  for(i = 0; i < numberOfModels; i++)
-    {      
-      assert(a[i] >= lim_inf && a[i] <= lim_sup);
-      assert(b[i] >= lim_inf && b[i] <= lim_sup);
-      assert(x[i] >= lim_inf && x[i] <= lim_sup);
-      assert(v[i] >= lim_inf && v[i] <= lim_sup);
-      assert(w[i] >= lim_inf && w[i] <= lim_sup);
-    }
-  
-  
-
-  for(iter = 1; iter <= PLL_ITMAX; iter++)
-    {
-      allConverged = PLL_TRUE;
-
-      for(i = 0; i < numberOfModels && allConverged; i++)
-        allConverged = allConverged && converged[i];
-
-      if(allConverged)
-        {
-          rax_free(converged);
-          rax_free(a);
-          rax_free(b);
-          rax_free(d);
-          rax_free(etemp);
-          rax_free(fu);
-          rax_free(fv);
-          rax_free(fw);
-          rax_free(fx);
-          rax_free(p);
-          rax_free(q);
-          rax_free(r);
-          rax_free(tol1);
-          rax_free(tol2);
-          rax_free(u);
-          rax_free(v);
-          rax_free(w);
-          rax_free(x);
-          rax_free(xm);
-          rax_free(e);
-          return;
-        }     
-
-      for(i = 0; i < numberOfModels; i++)
-        {
-          if(!converged[i])
-            {                 
-              assert(a[i] >= lim_inf && a[i] <= lim_sup);
-              assert(b[i] >= lim_inf && b[i] <= lim_sup);
-              assert(x[i] >= lim_inf && x[i] <= lim_sup);
-              assert(v[i] >= lim_inf && v[i] <= lim_sup);
-              assert(w[i] >= lim_inf && w[i] <= lim_sup);
-  
-              xm[i] = 0.5 * (a[i] + b[i]);
-              tol2[i] = 2.0 * (tol1[i] = tol * fabs(x[i]) + BRENT_ZEPS);
-          
-              if(fabs(x[i] - xm[i]) <= (tol2[i] - 0.5 * (b[i] - a[i])))
-                {                
-                  result[i] =  -fx[i];
-                  xmin[i]   = x[i];
-                  converged[i] = PLL_TRUE;                
-                }
-              else
-                {
-                  if(fabs(e[i]) > tol1[i])
-                    {                
-                      r[i] = (x[i] - w[i]) * (fx[i] - fv[i]);
-                      q[i] = (x[i] - v[i]) * (fx[i] - fw[i]);
-                      p[i] = (x[i] - v[i]) * q[i] - (x[i] - w[i]) * r[i];
-                      q[i] = 2.0 * (q[i] - r[i]);
-                      if(q[i] > 0.0)
-                        p[i] = -p[i];
-                      q[i] = fabs(q[i]);
-                      etemp[i] = e[i];
-                      e[i] = d[i];
-                      if((fabs(p[i]) >= fabs(0.5 * q[i] * etemp[i])) || (p[i] <= q[i] * (a[i]-x[i])) || (p[i] >= q[i] * (b[i] - x[i])))
-                        d[i] = BRENT_CGOLD * (e[i] = (x[i] >= xm[i] ? a[i] - x[i] : b[i] - x[i]));
-                      else
-                        {
-                          d[i] = p[i] / q[i];
-                          u[i] = x[i] + d[i];
-                          if( u[i] - a[i] < tol2[i] || b[i] - u[i] < tol2[i])
-                            d[i] = PLL_SIGN(tol1[i], xm[i] - x[i]);
-                        }
-                    }
-                  else
-                    {                
-                      d[i] = BRENT_CGOLD * (e[i] = (x[i] >= xm[i] ? a[i] - x[i]: b[i] - x[i]));
-                    }
-                  u[i] = ((fabs(d[i]) >= tol1[i]) ? (x[i] + d[i]) : (x[i] + PLL_SIGN(tol1[i], d[i])));
-                }
-
-              if(!converged[i])
-                assert(u[i] >= lim_inf && u[i] <= lim_sup);
-            }
-        }
-                 
-      evaluateChange(tr, pr, rateNumber, u, fu, converged, whichFunction, numberOfModels, ll, tol);
-
-      for(i = 0; i < numberOfModels; i++)
-        {
-          if(!converged[i])
-            {
-              if(fu[i] <= fx[i])
-                {
-                  if(u[i] >= x[i])
-                    a[i] = x[i];
-                  else
-                    b[i] = x[i];
-                  
-                  PLL_SHFT(v[i],w[i],x[i],u[i]);
-                  PLL_SHFT(fv[i],fw[i],fx[i],fu[i]);
-                }
-              else
-                {
-                  if(u[i] < x[i])
-                    a[i] = u[i];
-                  else
-                    b[i] = u[i];
-                  
-                  if(fu[i] <= fw[i] || w[i] == x[i])
-                    {
-                      v[i] = w[i];
-                      w[i] = u[i];
-                      fv[i] = fw[i];
-                      fw[i] = fu[i];
-                    }
-                  else
-                    {
-                      if(fu[i] <= fv[i] || v[i] == x[i] || v[i] == w[i])
-                        {
-                          v[i] = u[i];
-                          fv[i] = fu[i];
-                        }
-                    }       
-                }
-              
-              assert(a[i] >= lim_inf && a[i] <= lim_sup);
-              assert(b[i] >= lim_inf && b[i] <= lim_sup);
-              assert(x[i] >= lim_inf && x[i] <= lim_sup);
-              assert(v[i] >= lim_inf && v[i] <= lim_sup);
-              assert(w[i] >= lim_inf && w[i] <= lim_sup);
-              assert(u[i] >= lim_inf && u[i] <= lim_sup);
-            }
-        }
-    }
-
-  rax_free(converged);
-  rax_free(a);
-  rax_free(b);
-  rax_free(d);
-  rax_free(etemp);
-  rax_free(fu);
-  rax_free(fv);
-  rax_free(fw);
-  rax_free(fx);
-  rax_free(p);
-  rax_free(q);
-  rax_free(r);
-  rax_free(tol1);
-  rax_free(tol2);
-  rax_free(u);
-  rax_free(v);
-  rax_free(w);
-  rax_free(x);
-  rax_free(xm);
-  rax_free(e);
-
-  printf("\n. Too many iterations in BRENT !");
-  assert(0);
-}
-
-/* generic bracketing function required for Brent's algorithm. For details please see the corresponding chapter in the book Numerical Recipees in C */
-
-/** @brief Bracketing function
- *
- *  Generic bracketing function required for Brent's algorithm.
- *  
- *  @param param
- *
- *  @param ax
- *
- *  @param bx
- *
- *  @param cx
- *
- *  @param fa
- *
- *  @param fb
- *
- *  @param fc
- *
- *  @param lim_inf
- *    Lower bound for the rate assignment
- *
- *  @param lim_sup
- *    Upper bound for the rate assignment
- *
- *  @param numberOfModels
- *    Number of partitions for which we are optimizing 
- *
- *  @param rateNumber
- *     Index of the parameter to optimize 
- *
- *  @param whichFunction
- *    Type of the model parameter. Possible values are \b ALPHA_F, \b RATE_F and \b FREQ_F
- *
- *  @param tr
- *    PLL instance
- *
- *  @param pr
- *    List of partitions
- *
- *  @param ll
- *    Linkage list
- *
- *  @param modelEpsilon
- *
- *  @return
- *    Fill this
- *
- *  @todo
- *    Fill remaining details
- */
-static int brakGeneric(double *param, double *ax, double *bx, double *cx, double *fa, double *fb, 
-                       double *fc, double lim_inf, double lim_sup, 
-                       int numberOfModels, int rateNumber, int whichFunction, pllInstance *tr, partitionList *pr,
-                       linkageList *ll, double modelEpsilon)
-{
-  double 
-    *ulim = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *u    = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *r    = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *q    = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *fu   = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *dum  = (double *)rax_malloc(sizeof(double) * numberOfModels), 
-    *temp = (double *)rax_malloc(sizeof(double) * numberOfModels);
-  
-  int 
-    i,
-    *state    = (int *)rax_malloc(sizeof(int) * numberOfModels),
-    *endState = (int *)rax_malloc(sizeof(int) * numberOfModels);
-
-  pllBoolean *converged = (pllBoolean *)rax_malloc(sizeof(pllBoolean) * numberOfModels);
-  pllBoolean allConverged;
-
-  for(i = 0; i < numberOfModels; i++)
-    converged[i] = PLL_FALSE;
-
-  for(i = 0; i < numberOfModels; i++)
-    {
-      state[i] = 0;
-      endState[i] = 0;
-
-      u[i] = 0.0;
-
-      param[i] = ax[i];
-
-      if(param[i] > lim_sup)    
-        param[i] = ax[i] = lim_sup;
-      
-      if(param[i] < lim_inf) 
-        param[i] = ax[i] = lim_inf;
-
-      assert(param[i] >= lim_inf && param[i] <= lim_sup);
-    }
-   
-  
-  evaluateChange(tr, pr, rateNumber, param, fa, converged, whichFunction, numberOfModels, ll, modelEpsilon);
-
-
-  for(i = 0; i < numberOfModels; i++)
-    {
-      param[i] = bx[i];
-      if(param[i] > lim_sup) 
-        param[i] = bx[i] = lim_sup;
-      if(param[i] < lim_inf) 
-        param[i] = bx[i] = lim_inf;
-
-      assert(param[i] >= lim_inf && param[i] <= lim_sup);
-    }
-  
-  evaluateChange(tr, pr, rateNumber, param, fb, converged, whichFunction, numberOfModels, ll, modelEpsilon);
-
-  for(i = 0; i < numberOfModels; i++)  
-    {
-      if (fb[i] > fa[i]) 
-        {         
-          PLL_SHFT(dum[i],ax[i],bx[i],dum[i]);
-          PLL_SHFT(dum[i],fa[i],fb[i],dum[i]);
-        }
-      
-      cx[i] = bx[i] + MNBRAK_GOLD * (bx[i] - ax[i]);
-      
-      param[i] = cx[i];
-      
-      if(param[i] > lim_sup) 
-        param[i] = cx[i] = lim_sup;
-      if(param[i] < lim_inf) 
-        param[i] = cx[i] = lim_inf;
-
-      assert(param[i] >= lim_inf && param[i] <= lim_sup);
-    }
-  
- 
-  evaluateChange(tr, pr, rateNumber, param, fc, converged, whichFunction, numberOfModels,  ll, modelEpsilon);
-
-   while(1) 
-     {       
-       allConverged = PLL_TRUE;
-
-       for(i = 0; i < numberOfModels && allConverged; i++)
-         allConverged = allConverged && converged[i];
-
-       if(allConverged)
-         {
-           for(i = 0; i < numberOfModels; i++)
-             {         
-               if(ax[i] > lim_sup) 
-                 ax[i] = lim_sup;
-               if(ax[i] < lim_inf) 
-                 ax[i] = lim_inf;
-
-               if(bx[i] > lim_sup) 
-                 bx[i] = lim_sup;
-               if(bx[i] < lim_inf) 
-                 bx[i] = lim_inf;
-               
-               if(cx[i] > lim_sup) 
-                 cx[i] = lim_sup;
-               if(cx[i] < lim_inf) 
-                 cx[i] = lim_inf;
-             }
-
-           rax_free(converged);
-           rax_free(ulim);
-           rax_free(u);
-           rax_free(r);
-           rax_free(q);
-           rax_free(fu);
-           rax_free(dum); 
-           rax_free(temp);
-           rax_free(state);   
-           rax_free(endState);
-           return 0;
-           
-         }
-
-       for(i = 0; i < numberOfModels; i++)
-         {
-           if(!converged[i])
-             {
-               switch(state[i])
-                 {
-                 case 0:
-                   endState[i] = 0;
-                   if(!(fb[i] > fc[i]))                  
-                     converged[i] = PLL_TRUE;                                
-                   else
-                     {
-                   
-                       if(ax[i] > lim_sup) 
-                         ax[i] = lim_sup;
-                       if(ax[i] < lim_inf) 
-                         ax[i] = lim_inf;
-                       if(bx[i] > lim_sup) 
-                         bx[i] = lim_sup;
-                       if(bx[i] < lim_inf) 
-                         bx[i] = lim_inf;
-                       if(cx[i] > lim_sup) 
-                         cx[i] = lim_sup;
-                       if(cx[i] < lim_inf) 
-                         cx[i] = lim_inf;
-                       
-                       r[i]=(bx[i]-ax[i])*(fb[i]-fc[i]);
-                       q[i]=(bx[i]-cx[i])*(fb[i]-fa[i]);
-                       u[i]=(bx[i])-((bx[i]-cx[i])*q[i]-(bx[i]-ax[i])*r[i])/
-                         (2.0 * PLL_SIGN(PLL_MAX(fabs(q[i]-r[i]),MNBRAK_TINY),q[i]-r[i]));
-                       
-                       ulim[i]=(bx[i])+MNBRAK_GLIMIT*(cx[i]-bx[i]);
-                       
-                       if(u[i] > lim_sup) 
-                         u[i] = lim_sup;
-                       if(u[i] < lim_inf) 
-                         u[i] = lim_inf;
-                       if(ulim[i] > lim_sup) 
-                         ulim[i] = lim_sup;
-                       if(ulim[i] < lim_inf) 
-                         ulim[i] = lim_inf;
-                       
-                       if ((bx[i]-u[i])*(u[i]-cx[i]) > 0.0)
-                         {
-                           param[i] = u[i];
-                           if(param[i] > lim_sup)                            
-                             param[i] = u[i] = lim_sup;
-                           if(param[i] < lim_inf)
-                             param[i] = u[i] = lim_inf;
-                           endState[i] = 1;
-                         }
-                       else 
-                         {
-                           if ((cx[i]-u[i])*(u[i]-ulim[i]) > 0.0) 
-                             {
-                               param[i] = u[i];
-                               if(param[i] > lim_sup) 
-                                 param[i] = u[i] = lim_sup;
-                               if(param[i] < lim_inf) 
-                                 param[i] = u[i] = lim_inf;
-                               endState[i] = 2;
-                             }                         
-                           else
-                             {
-                               if ((u[i]-ulim[i])*(ulim[i]-cx[i]) >= 0.0) 
-                                 {
-                                   u[i] = ulim[i];
-                                   param[i] = u[i];     
-                                   if(param[i] > lim_sup) 
-                                     param[i] = u[i] = ulim[i] = lim_sup;
-                                   if(param[i] < lim_inf) 
-                                     param[i] = u[i] = ulim[i] = lim_inf;
-                                   endState[i] = 0;
-                                 }                              
-                               else 
-                                 {                
-                                   u[i]=(cx[i])+MNBRAK_GOLD*(cx[i]-bx[i]);
-                                   param[i] = u[i];
-                                   endState[i] = 0;
-                                   if(param[i] > lim_sup) 
-                                     param[i] = u[i] = lim_sup;
-                                   if(param[i] < lim_inf) 
-                                     param[i] = u[i] = lim_inf;
-                                 }
-                             }    
-                         }
-                     }
-                   break;
-                 case 1:
-                   endState[i] = 0;
-                   break;
-                 case 2:
-                   endState[i] = 3;
-                   break;
-                 default:
-                   assert(0);
-                 }
-               assert(param[i] >= lim_inf && param[i] <= lim_sup);
-             }
-         }
-             
-       evaluateChange(tr, pr, rateNumber, param, temp, converged, whichFunction, numberOfModels, ll, modelEpsilon);
-
-       for(i = 0; i < numberOfModels; i++)
-         {
-           if(!converged[i])
-             {         
-               switch(endState[i])
-                 {
-                 case 0:
-                   fu[i] = temp[i];
-                   PLL_SHFT(ax[i],bx[i],cx[i],u[i]);
-                   PLL_SHFT(fa[i],fb[i],fc[i],fu[i]);
-                   state[i] = 0;
-                   break;
-                 case 1:
-                   fu[i] = temp[i];
-                   if (fu[i] < fc[i]) 
-                     {
-                       ax[i]=(bx[i]);
-                       bx[i]=u[i];
-                       fa[i]=(fb[i]);
-                       fb[i]=fu[i]; 
-                       converged[i] = PLL_TRUE;               
-                     } 
-                   else 
-                     {
-                       if (fu[i] > fb[i]) 
-                         {
-                           assert(u[i] >= lim_inf && u[i] <= lim_sup);
-                           cx[i]=u[i];
-                           fc[i]=fu[i];
-                           converged[i] = PLL_TRUE;                       
-                         }
-                       else
-                         {                 
-                           u[i]=(cx[i])+MNBRAK_GOLD*(cx[i]-bx[i]);
-                           param[i] = u[i];
-                           if(param[i] > lim_sup) {param[i] = u[i] = lim_sup;}
-                           if(param[i] < lim_inf) {param[i] = u[i] = lim_inf;}    
-                           state[i] = 1;                 
-                         }                
-                     }
-                   break;
-                 case 2: 
-                   fu[i] = temp[i];
-                   if (fu[i] < fc[i]) 
-                     {               
-                       PLL_SHFT(bx[i],cx[i],u[i], cx[i]+MNBRAK_GOLD*(cx[i]-bx[i]));
-                       state[i] = 2;
-                     }     
-                   else
-                     {
-                       state[i] = 0;
-                       PLL_SHFT(ax[i],bx[i],cx[i],u[i]);
-                       PLL_SHFT(fa[i],fb[i],fc[i],fu[i]);
-                     }
-                   break;          
-                 case 3:                  
-                   PLL_SHFT(fb[i],fc[i],fu[i], temp[i]);
-                   PLL_SHFT(ax[i],bx[i],cx[i],u[i]);
-                   PLL_SHFT(fa[i],fb[i],fc[i],fu[i]);
-                   state[i] = 0;
-                   break;
-                 default:
-                   assert(0);
-                 }
-             }
-         }
-    }
-   
-
-   assert(0);
-   rax_free(converged);
-   rax_free(ulim);
-   rax_free(u);
-   rax_free(r);
-   rax_free(q);
-   rax_free(fu);
-   rax_free(dum); 
-   rax_free(temp);
-   rax_free(state);   
-   rax_free(endState);
-
-  
-
-   return(0);
-}
-
-/*******************************************************************************************************/
-/******** LG4X ***************************************************************************************/
-
-void pllOptLG4X(pllInstance *tr, partitionList * pr, double modelEpsilon,
-        linkageList *ll, int numberOfModels)
-{
-    int i;
-    double lg4xScaler, *lg4xScalers = (double *) calloc(pr->numberOfPartitions,
-            sizeof(double)), wgtsum = 0.0;
-    for (i = 0; i < 4; i++)
-        optParamGeneric(tr, pr, modelEpsilon, ll, numberOfModels, i, PLL_LG4X_RATE_MIN,
-                PLL_LG4X_RATE_MAX, LXRATE_F);
-    for (i = 0; i < pr->numberOfPartitions; i++)
-        lg4xScalers[i] = 1.0;
-    for (i = 0; i < ll->entries; i++)
-    {
-        if (ll->ld[i].valid)
-        {
-            int j, index = ll->ld[i].partitionList[0];
-            double averageRate = 0.0;
-            assert(ll->ld[i].partitions == 1);
-            for (j = 0; j < 4; j++)
-                averageRate += pr->partitionData[index]->gammaRates[j];
-            averageRate /= 4.0;
-            lg4xScalers[index] = averageRate;
-        }
-    }
-    if (pr->numberOfPartitions > 1)
-    {
-        for (i = 0; i < pr->numberOfPartitions; i++)
-            pr->partitionData[i]->fracchange = pr->partitionData[i]->rawFracchange * (1.0 / lg4xScalers[i]);
-    }
-    for (i = 0; i < pr->numberOfPartitions; i++)
-        wgtsum += (double) pr->partitionData[i]->partitionWeight;
-    lg4xScaler = 0.0;
-    for (i = 0; i < pr->numberOfPartitions; i++)
-    {
-        double fraction = (double) pr->partitionData[i]->partitionWeight / wgtsum;
-        lg4xScaler += (fraction * lg4xScalers[i]);
-    }
-    tr->fracchange = tr->rawFracchange * (1.0 / lg4xScaler);
-    free(lg4xScalers);
-}
-
-/**********************************************************************************************************/
-/* ALPHA PARAM ********************************************************************************************/
-
-
-//this function is required for implementing the LG4X model later-on 
-
-/** @brief Optimize alpha rates
-  *
-  * Generic routine for alpha rates optimization
-  *
-  * @param tr
-  *   PLL instance
-  *
-  * @param pr
-  *   List of partitions
-  *
-  * @param modelEpsilon
-  *   Don't know yet
-  *
-  * @param ll
-  *   Linkage list
-  *
-  * @todo
-  *   Implement the LG4X model
-  */
-void pllOptAlphasGeneric(pllInstance *tr, partitionList * pr, double modelEpsilon, linkageList *ll)
-{
-  int 
-    i,
-    non_LG4X_Partitions = 0,
-    LG4X_Partitions  = 0;
-
-  /* assumes homogeneous super-partitions, that either contain DNA or AA partitions !*/
-  /* does not check whether AA are all linked */
-
-  /* first do non-LG4X partitions */
-
-  for(i = 0; ll && i < ll->entries; i++)
-    {
-      switch(pr->partitionData[ll->ld[i].partitionList[0]]->dataType)
-        {
-        case PLL_DNA_DATA:                          
-        case PLL_BINARY_DATA:
-        case PLL_SECONDARY_DATA:
-        case PLL_SECONDARY_DATA_6:
-        case PLL_SECONDARY_DATA_7:
-        case PLL_GENERIC_32:
-        case PLL_GENERIC_64:
-            if (pr->partitionData[ll->ld[i].partitionList[0]]->optimizeAlphaParameter)
-            {
-                ll->ld[i].valid = PLL_TRUE;
-                non_LG4X_Partitions++;
-            }
-            else
-                ll->ld[i].valid = PLL_FALSE;
-            break;
-        case PLL_AA_DATA:
-            if (pr->partitionData[ll->ld[i].partitionList[0]]->optimizeAlphaParameter)
-            {
-                if (pr->partitionData[ll->ld[i].partitionList[0]]->protModels == PLL_LG4X)
-                {
-                    LG4X_Partitions++;
-                    ll->ld[i].valid = PLL_FALSE;
-                }
-                else
-                {
-                    ll->ld[i].valid = PLL_TRUE;
-                    non_LG4X_Partitions++;
-                }
-            }
-            else
-                ll->ld[i].valid = PLL_FALSE;
-            break;
-        default:
-            assert(0);
-        }      
-    }   
-
- 
-
-  if(non_LG4X_Partitions > 0)    
-    optParamGeneric(tr, pr, modelEpsilon, ll, non_LG4X_Partitions, -1, PLL_ALPHA_MIN, PLL_ALPHA_MAX, ALPHA_F);
-  
-  /* then LG4x partitions */
-
-  for(i = 0; ll && i < ll->entries; i++)
-    {
-      switch(pr->partitionData[ll->ld[i].partitionList[0]]->dataType)
-        {
-        case PLL_DNA_DATA:                          
-        case PLL_BINARY_DATA:
-        case PLL_SECONDARY_DATA:
-        case PLL_SECONDARY_DATA_6:
-        case PLL_SECONDARY_DATA_7:
-        case PLL_GENERIC_32:
-        case PLL_GENERIC_64:
-          ll->ld[i].valid = PLL_FALSE;    
-          break;
-        case PLL_AA_DATA:     
-          if(pr->partitionData[ll->ld[i].partitionList[0]]->protModels == PLL_LG4X)
-            ll->ld[i].valid = PLL_TRUE;
-          else
-            ll->ld[i].valid = PLL_FALSE;                    
-          break;
-        default:
-          assert(0);
-        }      
-    }   
-  
-  if(LG4X_Partitions > 0)
-    pllOptLG4X(tr, pr, modelEpsilon, ll, LG4X_Partitions);
-
-  for(i = 0; ll && i < ll->entries; i++)
-    ll->ld[i].valid = PLL_TRUE;
-}
-
-/** @brief Optimize model parameters
-  *
-  * Function for optimizing the \a rateNumber-th model parameter of type \a whichParameterTYpe,
-  * i.e. alpha rate, substitution rate, or base frequency rate, in all partitions with the \a
-  * valid flag set to \b PLL_TRUE.
-  *
-  * @param tr
-  *   PLL instance
-  *
-  * @param pr
-  *   List of partitions
-  *   
-  * @param modelEpsilon
-  *    A parameter passed for Brent / Brak
-  *
-  * @param ll
-  *   Linkage list
-  * 
-  * @param numberOfModels
-  *   Number of partitions for which we are optimizing 
-  *
-  * @param rateNumber
-  *  Index of the parameter to optimize 
-  *
-  * @param lim_inf
-  *  Lower bound for the rate assignment
-  *
-  * @param lim_sup
-  *  Upper bound for the rate assignment
-  *
-  * @param whichParameterType
-  *  Type of the model parameter. Possible values are \b ALPHA_F, \b RATE_F and \b FREQ_F
-  *
-  * @todo
-  *    Describe the modelEpsilon parameter in detail
-  */
-static void optParamGeneric(pllInstance *tr, partitionList * pr, double modelEpsilon, linkageList *ll, int numberOfModels, int rateNumber, double lim_inf, double lim_sup, int whichParameterType)
-{
-  int
-    l,
-    k, 
-    j, 
-    pos;
-
-  double
-    *startRates     = (double *)rax_malloc(sizeof(double) * numberOfModels * 4),
-    *startWeights   = (double *)rax_malloc(sizeof(double) * numberOfModels * 4),
-    *startExponents = (double *)rax_malloc(sizeof(double) * numberOfModels * 4),
-    *startValues = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *startLH     = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *endLH       = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *_a          = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *_b          = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *_c          = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *_fa         = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *_fb         = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *_fc         = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *_param      = (double *)rax_malloc(sizeof(double) * numberOfModels),
-    *_x          = (double *)rax_malloc(sizeof(double) * numberOfModels);
-   
-  pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-    if (whichParameterType == LXWEIGHT_F)
-        pllEvaluateLikelihood (tr, pr, tr->start, PLL_FALSE, PLL_FALSE);
-    else
-    {
-        pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-        if (whichParameterType == LXRATE_F)
-        {
-            int j;
-            for (j = 0; j < pr->numberOfPartitions; j++)
-                pr->partitionData[j]->lg4x_weightLikelihood = pr->partitionData[j]->partitionLH;
-        }
-    }
-  
-#ifdef  _DEBUG_MOD_OPT
-  double
-    initialLH = tr->likelihood;
-#endif
-
-  /* 
-     at this point here every worker has the traversal data it needs for the 
-     search 
-  */
-
-  /* store in startValues the values of the old parameters */
-  for(l = 0, pos = 0; ll && l < ll->entries; l++)
-    {
-      if(ll->ld[l].valid)
-        {
-          endLH[pos] = PLL_UNLIKELY;
-          startLH[pos] = 0.0;
-
-          for(j = 0; j < ll->ld[l].partitions; j++)
-            {
-              int 
-                index = ll->ld[l].partitionList[j];
-              
-              startLH[pos] += pr->partitionData[index]->partitionLH;
-              
-              switch(whichParameterType)
-                {
-                case ALPHA_F:
-                  startValues[pos] = pr->partitionData[index]->alpha;
-                  break;
-                case RATE_F:
-                  startValues[pos] = pr->partitionData[index]->substRates[rateNumber];      
-                  break;
-                case FREQ_F:
-                  startValues[pos] = pr->partitionData[index]->freqExponents[rateNumber];
-                  break;
-                case LXRATE_F:
-                    assert(rateNumber >= 0 && rateNumber < 4);
-                    startValues[pos] =
-                            pr->partitionData[index]->gammaRates[rateNumber];
-                    memcpy(&startRates[pos * 4],
-                            pr->partitionData[index]->gammaRates,
-                            4 * sizeof(double));
-                    memcpy(&startExponents[pos * 4],
-                            pr->partitionData[index]->lg4x_weightExponents,
-                            4 * sizeof(double));
-                    memcpy(&startWeights[pos * 4],
-                            pr->partitionData[index]->lg4x_weights,
-                            4 * sizeof(double));
-                    break;
-                case LXWEIGHT_F:
-                    assert(rateNumber >= 0 && rateNumber < 4);
-                    startValues[pos] =
-                            pr->partitionData[index]->lg4x_weightExponents[rateNumber];
-                    break;
-                default:
-                  assert(0);
-                }
-            }
-          pos++;
-        }
-    }  
-
-  assert(pos == numberOfModels);
-   
-  for(k = 0, pos = 0; ll && k < ll->entries; k++)
-    {
-      if(ll->ld[k].valid)
-        {
-          _a[pos] = startValues[pos] + 0.1;
-          _b[pos] = startValues[pos] - 0.1;
-
-          if(_a[pos] < lim_inf) 
-            _a[pos] = lim_inf;
-          
-          if(_a[pos] > lim_sup) 
-            _a[pos] = lim_sup;
-              
-          if(_b[pos] < lim_inf) 
-            _b[pos] = lim_inf;
-          
-          if(_b[pos] > lim_sup) 
-            _b[pos] = lim_sup;    
-
-          pos++;
-        }
-    }                                
-
-  assert(pos == numberOfModels);
-
-  brakGeneric(_param, _a, _b, _c, _fa, _fb, _fc, lim_inf, lim_sup, numberOfModels, rateNumber, whichParameterType, tr, pr, ll, modelEpsilon);
-      
-  for(k = 0; k < numberOfModels; k++)
-    {
-      assert(_a[k] >= lim_inf && _a[k] <= lim_sup);
-      assert(_b[k] >= lim_inf && _b[k] <= lim_sup);       
-      assert(_c[k] >= lim_inf && _c[k] <= lim_sup);         
-    }      
-
-  brentGeneric(_a, _b, _c, _fb, modelEpsilon, _x, endLH, numberOfModels, whichParameterType, rateNumber, tr,  pr, ll, lim_inf, lim_sup);
-        
-  for(k = 0, pos = 0; ll && k < ll->entries; k++)
-    {
-      if(ll->ld[k].valid)
-        { 
-          if(startLH[pos] > endLH[pos])
-            {
-              //if the initial likelihood was better than the likelihodo after optimization, we set the values back 
-              //to their original values 
-
-              for(j = 0; j < ll->ld[k].partitions; j++)
-                {
-                  int 
-                    index = ll->ld[k].partitionList[j];
-                  
-                  if (whichParameterType == LXRATE_F)
-                    {
-                        memcpy(pr->partitionData[index]->lg4x_weights,
-                                &startWeights[pos * 4], sizeof(double) * 4);
-                        memcpy(pr->partitionData[index]->gammaRates,
-                                &startRates[pos * 4], sizeof(double) * 4);
-                        memcpy(pr->partitionData[index]->lg4x_weightExponents,
-                                &startExponents[pos * 4], 4 * sizeof(double));
-                    }
-
-                    changeModelParameters(index, rateNumber, startValues[pos], whichParameterType, tr, pr); 
-                }
-            }
-          else
-            {
-              //otherwise we set the value to the optimized value 
-              //this used to be a bug in standard RAxML, before I fixed it 
-              //I was not using _x[pos] as value that needs to be set 
-
-              for(j = 0; j < ll->ld[k].partitions; j++)
-                {
-                  int 
-                    index = ll->ld[k].partitionList[j];
-                  
-                  changeModelParameters(index, rateNumber, _x[pos], whichParameterType, tr, pr);
-
-                  if (whichParameterType == LXWEIGHT_F)
-                    {
-                        if (endLH[pos]
-                                > pr->partitionData[index]->lg4x_weightLikelihood)
-                        {
-                            memcpy(pr->partitionData[index]->lg4x_weightsBuffer,
-                                    pr->partitionData[index]->lg4x_weights,
-                                    sizeof(double) * 4);
-                            memcpy(
-                                    pr->partitionData[index]->lg4x_weightExponentsBuffer,
-                                    pr->partitionData[index]->lg4x_weightExponents,
-                                    sizeof(double) * 4);
-                            pr->partitionData[index]->lg4x_weightLikelihood =
-                                    endLH[pos];
-                        }
-                    }
-                    if (whichParameterType == LXRATE_F)
-                    {
-                        memcpy(pr->partitionData[index]->lg4x_weights,
-                                pr->partitionData[index]->lg4x_weightsBuffer,
-                                sizeof(double) * 4);
-                        memcpy(pr->partitionData[index]->lg4x_weightExponents,
-                                pr->partitionData[index]->lg4x_weightExponentsBuffer,
-                                sizeof(double) * 4);
-                    }
-                }
-            }
-          pos++;
-        }
-    }
-
-  #if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-      if (whichParameterType == LXRATE_F || whichParameterType == LXWEIGHT_F) {
-        pllMasterBarrier(tr, pr, PLL_THREAD_COPY_LG4X_RATES);
-      } else {
-        pllMasterBarrier(tr, pr, PLL_THREAD_COPY_RATES);
-      }
-
-//    switch(whichParameterType)
-//      {
-//      case FREQ_F:
-//      case RATE_F:
-//          pllMasterBarrier(tr, pr, PLL_THREAD_COPY_RATES);
-//        break;
-//      case ALPHA_F:
-//          pllMasterBarrier(tr, pr, PLL_THREAD_COPY_ALPHA);
-//        break;
-//      case LXRATE_F:
-//      case LXWEIGHT_F:
-//          pllMasterBarrier(tr, pr, PLL_THREAD_COPY_LG4X_RATES);
-//        break;
-//      default:
-//        assert(0);
-//      }
-
-  #endif    
-
-    
-  assert(pos == numberOfModels);
-
-  rax_free(startLH);
-  rax_free(endLH);
-  rax_free(_a);
-  rax_free(_b);
-  rax_free(_c);
-  rax_free(_fa);
-  rax_free(_fb);
-  rax_free(_fc);
-  rax_free(_param);
-  rax_free(_x);
-  rax_free(startValues);
-  rax_free(startRates);
-  rax_free(startWeights);
-  rax_free(startExponents);
-
-#ifdef _DEBUG_MOD_OPT
-  pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-
-  if(tr->likelihood < initialLH)
-    printf("%f %f\n", tr->likelihood, initialLH);
-  assert(tr->likelihood >= initialLH);
-#endif
-}
-
-//******************** rate optimization functions ***************************************************/
-
-/** @brief Wrapper function for optimizing base frequency rates
-  *
-  * Wrapper function for optimizing base frequency rates of \a numberOfModels partitions. 
-  * The function iteratively calls the function \a optParamGeneric for optimizing each of the \a states
-  * parameters
-  *
-  * @param tr
-  *   PLL instance
-  *
-  * @param pr
-  *   List of partitions
-  *
-  * @param modelEpsilon
-  *   Dont know yet
-  *
-  * @param ll
-  *   Linkage list
-  *
-  * @param numberOfModels
-  *   Number of partitions that we are optimizing
-  *
-  * @param states
-  *   Number of states
-  */
-static void optFreqs(pllInstance *tr, partitionList * pr, double modelEpsilon, linkageList *ll, int numberOfModels, int states)
-{ 
-  int 
-    rateNumber;
-
-  double
-    freqMin = -1000000.0,
-    freqMax = 200.0;
-  
-  for(rateNumber = 0; rateNumber < states; rateNumber++)
-    optParamGeneric(tr, pr, modelEpsilon, ll, numberOfModels, rateNumber, freqMin, freqMax, FREQ_F);   
-}
-
-/** @brief Optimize base frequencies 
- *  
- *  Wrapper function for optimizing base frequencies
- *
- *  @param tr
- *    PLL instance
- *
- *  @param pr
- *    List of partitions
- *
- *  @param modelEpsilon
- *    
- *
- *  @param ll
- *    Linkage list
- *
- */
-void pllOptBaseFreqs(pllInstance *tr, partitionList * pr, double modelEpsilon, linkageList *ll)
-{
-  int 
-    i,
-    states,
-    dnaPartitions = 0,
-    aaPartitions  = 0,
-    binPartitions = 0;
-
-  /* first do DNA */
-
-  /* Set the valid flag in linkage list to PLL_TRUE for all DNA partitions */
-  for(i = 0; ll && i < ll->entries; i++)
-    {
-      switch(pr->partitionData[ll->ld[i].partitionList[0]]->dataType)
-        {
-        case PLL_DNA_DATA:  
-          states = pr->partitionData[ll->ld[i].partitionList[0]]->states; 
-          if(pr->partitionData[ll->ld[i].partitionList[0]]->optimizeBaseFrequencies)
-            {
-              ll->ld[i].valid = PLL_TRUE;
-              dnaPartitions++;              
-            }
-          else
-             ll->ld[i].valid = PLL_FALSE;
-          break;       
-        case PLL_BINARY_DATA:
-        case PLL_AA_DATA:
-          ll->ld[i].valid = PLL_FALSE;
-          break;
-        default:
-          assert(0);
-        }      
-    }   
-
-  /* Optimize the frequency rates of all DNA partitions */
-  if(dnaPartitions > 0)
-    optFreqs(tr, pr, modelEpsilon, ll, dnaPartitions, states);
-  
-  /* then AA */
-
-  /* find all partitions that have frequency optimization enabled */ 
-  for(i = 0; ll && i < ll->entries; i++)
-    {
-      switch(pr->partitionData[ll->ld[i].partitionList[0]]->dataType)
-        {
-        case PLL_AA_DATA:
-          states = pr->partitionData[ll->ld[i].partitionList[0]]->states;             
-          if(pr->partitionData[ll->ld[i].partitionList[0]]->optimizeBaseFrequencies)
-            {
-              ll->ld[i].valid = PLL_TRUE;
-              aaPartitions++;           
-            }
-          else
-            ll->ld[i].valid = PLL_FALSE; 
-          break;
-        case PLL_DNA_DATA:      
-        case PLL_BINARY_DATA:
-          ll->ld[i].valid = PLL_FALSE;
-          break;
-        default:
-          assert(0);
-        }        
-    }
-
-  if(aaPartitions > 0)      
-    optFreqs(tr, pr, modelEpsilon, ll, aaPartitions, states);
-
-  /* then binary */
-  for(i = 0; i < ll->entries; i++)
-    {
-      switch(pr->partitionData[ll->ld[i].partitionList[0]]->dataType)
-	{
-	case PLL_BINARY_DATA:	  
-	  states = pr->partitionData[ll->ld[i].partitionList[0]]->states; 	      
-	  if(pr->partitionData[ll->ld[i].partitionList[0]]->optimizeBaseFrequencies)
-	    {
-	      ll->ld[i].valid = PLL_TRUE;
-	      binPartitions++;		
-	    }
-	  else
-	    ll->ld[i].valid = PLL_FALSE; 
-	  break;
-	case PLL_DNA_DATA:	  
-	case PLL_AA_DATA:      
-	case PLL_SECONDARY_DATA:
-	case PLL_SECONDARY_DATA_6:
-	case PLL_SECONDARY_DATA_7:
-	case PLL_GENERIC_32:
-	case PLL_GENERIC_64:	    
-	  ll->ld[i].valid = PLL_FALSE;
-	  break;
-	default:
-	  assert(0);
-	}	 
-    }
-
-  if(binPartitions > 0)      
-    optFreqs(tr, pr, modelEpsilon, ll, binPartitions, states);
-
-  /* done */
-
-  for(i = 0; ll && i < ll->entries; i++)
-    ll->ld[i].valid = PLL_TRUE;
-}
-
-
-
-/* new version for optimizing rates, an external loop that iterates over the rates */
-/** @brief Wrapper function for optimizing substitution rates
-  *
-  * Wrapper function for optimizing substitution rates of \a numberOfModels partitions. 
-  * The function determines the  number of free parameters and iteratively calls the 
-  * function \a optParamGeneric for optimizing each parameter
-  *
-  * @param tr
-  *   PLL instance
-  *
-  * @param pr
-  *   List of partitions
-  *
-  * @param modelEpsilon
-  *   Dont know yet
-  *
-  * @param ll
-  *   Linkage list
-  *
-  * @param numberOfModels
-  *   Number of partitions that we are optimizing
-  *
-  * @param states
-  *   Number of states
-  */
-static void optRates(pllInstance *tr, partitionList * pr, double modelEpsilon, linkageList *ll, int numberOfModels, int states)
-{
-  int
-    rateNumber,
-    numberOfRates = ((states * states - states) / 2) - 1;
-
-  for(rateNumber = 0; rateNumber < numberOfRates; rateNumber++)
-    optParamGeneric(tr, pr, modelEpsilon, ll, numberOfModels, rateNumber, PLL_RATE_MIN, PLL_RATE_MAX, RATE_F);
-}
-
-
-/* figure out if all AA models have been assigned a joint GTR matrix */
-
-/** @brief Check whether all protein partitions have been assigned a joint GTR matrix
-  *
-  * Check whether there exists at least one protein partition and whether all
-  * protein partitions have been assigned a joint GTR matrix.
-  *
-  * @param pr
-  *   List of partitions
-  *
-  * @return
-  *   Return \b PLL_TRUE in case there exists at least one protein partition and all of
-  *   protein partitions are assigned a joint GTR matrix. Otherwise return \b PLL_FALSE
-  */
-static pllBoolean AAisGTR(partitionList *pr)
-{
-  int i, count = 0;
-
-  for(i = 0; i < pr->numberOfPartitions; i++)
-    {
-      if(pr->partitionData[i]->dataType == PLL_AA_DATA)
-        {
-          count++;
-          if(pr->partitionData[i]->protModels != PLL_GTR)
-            return PLL_FALSE;
-        }
-    }
-
-  if(count == 0)
-    return PLL_FALSE;
-
-  return PLL_TRUE;
-}
-
-
-/* generic substitiution matrix (Q matrix) optimization */
-
-/** @brief Optimize substitution rates
-  *
-  * Generic routine for substitution matrix (Q matrix) optimization
-  *
-  * @param tr
-  *   PLL instance
-  *
-  * @param pr
-  *   List of partitions
-  *
-  * @param modelEpsilon
-  *   Don't know yet
-  *
-  * @param ll
-  *   Linkage list
-  */
-void pllOptRatesGeneric(pllInstance *tr, partitionList *pr, double modelEpsilon, linkageList *ll)
-{
-  int 
-    i,
-    dnaPartitions = 0,
-    aaPartitions  = 0,
-    states = -1;
-
-  /* assumes homogeneous super-partitions, that either contain DNA or AA partitions !*/
-  /* does not check whether AA are all linked */
-
-  /* 
-     first optimize all rates in DNA data partition matrices. That's where we use the valid field in the 
-     linkage list data structure. 
-   */
-
-  for(i = 0; ll && i < ll->entries; i++)
-    {
-      switch(pr->partitionData[ll->ld[i].partitionList[0]]->dataType)
-        {
-          case PLL_DNA_DATA:  
-            states = pr->partitionData[ll->ld[i].partitionList[0]]->states;
-	    if(pr->partitionData[ll->ld[i].partitionList[0]]->optimizeSubstitutionRates)
-	      {
-		ll->ld[i].valid = PLL_TRUE;
-		++ dnaPartitions;  
-	      }
-	    else	      
-	      ll->ld[i].valid = PLL_FALSE;	      
-            break;
-          case PLL_BINARY_DATA:
-          case PLL_AA_DATA:
-          case PLL_SECONDARY_DATA:
-          case PLL_SECONDARY_DATA_6:
-          case PLL_SECONDARY_DATA_7:
-          case PLL_GENERIC_32:
-          case PLL_GENERIC_64:
-            ll->ld[i].valid = PLL_FALSE;
-            break;
-          default:
-            assert(0);
-        }      
-    }   
-
-  /* if we have dna partitions in our dataset, let's optimize all 5 rates in their substitution matrices */
-
-  if(dnaPartitions > 0)
-    optRates(tr, pr, modelEpsilon, ll, dnaPartitions, states);
-  
-  /* AA partitions evolving under a GTR model do not need to be linked any more, this responsibility now remains 
-     with the library user !
-   */
-  
-  for(i = 0; ll && i < ll->entries; i++)
-    {
-      switch(pr->partitionData[ll->ld[i].partitionList[0]]->dataType)
-	{
-	case PLL_AA_DATA:
-	  states = pr->partitionData[ll->ld[i].partitionList[0]]->states;
-	  if(pr->partitionData[ll->ld[i].partitionList[0]]->optimizeSubstitutionRates)
-	    {
-	      ll->ld[i].valid = PLL_TRUE;
-	      aaPartitions++;
-	    }
-	  else
-	    ll->ld[i].valid = PLL_FALSE;
-	  break;
-	case PLL_DNA_DATA:          
-	case PLL_BINARY_DATA:
-	case PLL_SECONDARY_DATA:        
-	case PLL_SECONDARY_DATA_6:
-	case PLL_SECONDARY_DATA_7:
-	  ll->ld[i].valid = PLL_FALSE;
-	  break;
-	default:
-	  assert(0);
-	}    
-    }
-  
-  if(aaPartitions > 0)
-    optRates(tr, pr, modelEpsilon, ll, aaPartitions, states); 
-
-  /* done with all partitions, so we can set all entries in the linkage list to valid again :-) */
-
-  for(i = 0; ll && i < ll->entries; i++)
-    ll->ld[i].valid = PLL_TRUE;
-}
-
-
-
-
-
-/*********************FUNCTIONS FOR PSR/CAT model of rate heterogeneity ***************************************/
-
-
-
-
-
-
-static int catCompare(const void *p1, const void *p2)
-{
- rateCategorize *rc1 = (rateCategorize *)p1;
- rateCategorize *rc2 = (rateCategorize *)p2;
-
-  double i = rc1->accumulatedSiteLikelihood;
-  double j = rc2->accumulatedSiteLikelihood;
-  
-  if (i > j)
-    return (1);
-  if (i < j)
-    return (-1);
-  return (0);
-}
-
-
-static void categorizePartition(pllInstance *tr, partitionList *pr, rateCategorize *rc, int model, int lower, int upper)
-{
-  int
-    zeroCounter,
-    i, 
-    k;
-  
-  double 
-    diff, 
-    min;
-
-  for (i = lower, zeroCounter = 0; i < upper; i++, zeroCounter++) 
-      {
-        double
-          temp = tr->patrat[i];
-
-        int
-          found = 0;
-        
-        for(k = 0; k < pr->partitionData[model]->numberOfCategories; k++)
-          {
-            if(temp == rc[k].rate || (fabs(temp - rc[k].rate) < 0.001))
-              {
-                found = 1;
-                tr->rateCategory[i] = k; 
-                break;
-              }
-          }
-        
-        if(!found)
-          {
-            min = fabs(temp - rc[0].rate);
-            tr->rateCategory[i] = 0;
-
-            for(k = 1; k < pr->partitionData[model]->numberOfCategories; k++)
-              {
-                diff = fabs(temp - rc[k].rate);
-
-                if(diff < min)
-                  {
-                    min = diff;
-                    tr->rateCategory[i] = k;
-                  }
-              }
-          }
-      }
-
-  for(k = 0; k < pr->partitionData[model]->numberOfCategories; k++)
-    pr->partitionData[model]->perSiteRates[k] = rc[k].rate;
-}
-
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-
-void optRateCatPthreads(pllInstance *tr, partitionList *pr, double lower_spacing, double upper_spacing, double *lhs, int n, int tid)
-{
-  int 
-    model, 
-    i;
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-    {      
-      int 
-        localIndex = 0;
-
-      pllBoolean 
-        execute = ((tr->manyPartitions && isThisMyPartition(pr, tid, model)) || (!tr->manyPartitions));
-
-      if(execute)
-        for(i = pr->partitionData[model]->lower;  i < pr->partitionData[model]->upper; i++)
-          {
-            if(tr->manyPartitions || (i % n == tid))
-              {
-              
-                double initialRate, initialLikelihood, 
-                  leftLH, rightLH, leftRate, rightRate, v;
-                const double epsilon = 0.00001;
-                int k;        
-                
-                tr->patrat[i] = tr->patratStored[i];     
-                initialRate = tr->patrat[i];
-                
-                initialLikelihood = evaluatePartialGeneric(tr, pr, localIndex, initialRate, model); /* i is real i ??? */
-                
-                
-                leftLH = rightLH = initialLikelihood;
-                leftRate = rightRate = initialRate;
-                
-                k = 1;
-                
-                while((initialRate - k * lower_spacing > 0.0001) && 
-                      ((v = evaluatePartialGeneric(tr, pr, localIndex, initialRate - k * lower_spacing, model))
-                       > leftLH) && 
-                      (fabs(leftLH - v) > epsilon))  
-                  {       
-#ifndef WIN32
-                    if(isnan(v))
-                      assert(0);
-#endif
-                    
-                    leftLH = v;
-                    leftRate = initialRate - k * lower_spacing;
-                    k++;          
-                  }      
-                
-                k = 1;
-                
-                while(((v = evaluatePartialGeneric(tr, pr, localIndex, initialRate + k * upper_spacing, model)) > rightLH) &&
-                      (fabs(rightLH - v) > epsilon))            
-                  {
-#ifndef WIN32
-                    if(isnan(v))
-                      assert(0);
-#endif     
-                    rightLH = v;
-                    rightRate = initialRate + k * upper_spacing;         
-                    k++;
-                  }           
-                
-                if(rightLH > initialLikelihood || leftLH > initialLikelihood)
-                  {
-                    if(rightLH > leftLH)            
-                      {      
-                        tr->patrat[i] = rightRate;
-                        lhs[i] = rightLH;
-                      }
-                    else
-                      {       
-                        tr->patrat[i] = leftRate;
-                        lhs[i] = leftLH;
-                      }
-                  }
-                else
-                  lhs[i] = initialLikelihood;
-                
-                tr->patratStored[i] = tr->patrat[i];
-                localIndex++;
-              }
-          }
-      assert(localIndex == pr->partitionData[model]->width);
-    }
-}
-
-
-
-#else
-
-/** @brief Optimize rates for CAT model
- *
- *  @param tr
- *    PLL instance
- *
- *  @param pr
- *    List of partitions
- *
- *  @param model
- *    Partition index
- *
- *  @param lower_specing
- *
- *  @param upper_spacing
- *
- *  @param lhs
- */
-static void optRateCatModel(pllInstance *tr, partitionList *pr, int model, double lower_spacing, double upper_spacing, double *lhs)
-{
-  int lower = pr->partitionData[model]->lower;
-  int upper = pr->partitionData[model]->upper;
-  int i;
-  for(i = lower; i < upper; i++)
-    {
-      double initialRate, initialLikelihood, 
-        leftLH, rightLH, leftRate, rightRate, v;
-      const double epsilon = 0.00001;
-      int k;
-      
-      tr->patrat[i] = tr->patratStored[i];     
-      initialRate = tr->patrat[i];
-      
-      initialLikelihood = evaluatePartialGeneric(tr, pr, i, initialRate, model);
-      
-      
-      leftLH = rightLH = initialLikelihood;
-      leftRate = rightRate = initialRate;
-      
-      k = 1;
-      
-      while((initialRate - k * lower_spacing > 0.0001) && 
-            ((v = evaluatePartialGeneric(tr, pr, i, initialRate - k * lower_spacing, model))
-             > leftLH) && 
-            (fabs(leftLH - v) > epsilon))  
-        {         
-#ifndef WIN32
-          if(isnan(v))
-            assert(0);
-#endif
-          
-          leftLH = v;
-          leftRate = initialRate - k * lower_spacing;
-          k++;    
-        }      
-      
-      k = 1;
-      
-      while(((v = evaluatePartialGeneric(tr, pr, i, initialRate + k * upper_spacing, model)) > rightLH) &&
-            (fabs(rightLH - v) > epsilon))      
-        {
-#ifndef WIN32
-          if(isnan(v))
-            assert(0);
-#endif     
-          rightLH = v;
-          rightRate = initialRate + k * upper_spacing;   
-          k++;
-        }           
-  
-      if(rightLH > initialLikelihood || leftLH > initialLikelihood)
-        {
-          if(rightLH > leftLH)      
-            {        
-              tr->patrat[i] = rightRate;
-              lhs[i] = rightLH;
-            }
-          else
-            {         
-              tr->patrat[i] = leftRate;
-              lhs[i] = leftLH;
-            }
-        }
-      else
-        lhs[i] = initialLikelihood;
-      
-      tr->patratStored[i] = tr->patrat[i];
-    }
-
-}
-
-
-#endif
-
-
-
-/* 
-   set scaleRates to PLL_FALSE everywhere such that 
-   per-site rates are not scaled to obtain an overall mean rate 
-   of 1.0
-*/
-
-void updatePerSiteRates(pllInstance *tr, partitionList *pr, pllBoolean scaleRates)
-{
-  int 
-    i,
-    model;
-
-  if(pr->perGeneBranchLengths && pr->numberOfPartitions > 1)
-    {            
-      for(model = 0; model < pr->numberOfPartitions; model++)
-        {
-          int          
-            lower = pr->partitionData[model]->lower,
-            upper = pr->partitionData[model]->upper;
-          
-          if(scaleRates)
-            {
-              double 
-                scaler = 0.0,       
-                accRat = 0.0; 
-
-              int 
-                accWgt     = 0;
-              
-              for(i = lower; i < upper; i++)
-                {
-                  int 
-                    w = tr->aliaswgt[i];
-                  
-                  double
-                    rate = pr->partitionData[model]->perSiteRates[tr->rateCategory[i]];
-                  
-                  assert(0 <= tr->rateCategory[i] && tr->rateCategory[i] < tr->maxCategories);
-                  
-                  accWgt += w;
-                  
-                  accRat += (w * rate);
-                }          
-          
-              accRat /= ((double)accWgt);
-          
-              scaler = 1.0 / ((double)accRat);
-                  
-              for(i = 0; i < pr->partitionData[model]->numberOfCategories; i++)
-                pr->partitionData[model]->perSiteRates[i] *= scaler;
-
-              accRat = 0.0;      
-              
-              for(i = lower; i < upper; i++)
-                {
-                  int 
-                    w = tr->aliaswgt[i];
-                  
-                  double
-                    rate = pr->partitionData[model]->perSiteRates[tr->rateCategory[i]];
-                  
-                  assert(0 <= tr->rateCategory[i] && tr->rateCategory[i] < tr->maxCategories);        
-                  
-                  accRat += (w * rate);
-                }                
-
-              accRat /= ((double)accWgt);         
-
-              assert(PLL_ABS(1.0 - accRat) < 1.0E-5);
-            }
-          else
-            {
-              double               
-                accRat = 0.0; 
-
-              int 
-                accWgt     = 0;
-              
-              for(i = lower; i < upper; i++)
-                {
-                  int 
-                    w = tr->aliaswgt[i];
-                  
-                  double
-                    rate = pr->partitionData[model]->perSiteRates[tr->rateCategory[i]];
-                  
-                  assert(0 <= tr->rateCategory[i] && tr->rateCategory[i] < tr->maxCategories);
-                  
-                  accWgt += w;
-                  
-                  accRat += (w * rate);
-                }          
-          
-              accRat /= ((double)accWgt);
-              
-              assert(PLL_ABS(1.0 - accRat) < 1.0E-5);
-            }
-
-          
-#if NOT (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-          {
-            int 
-              localCount = 0;
-            
-            for(i = lower, localCount = 0; i < upper; i++, localCount++)
-              {               
-                pr->partitionData[model]->rateCategory[localCount] = tr->rateCategory[i];
-              }
-          }
-#endif
-        }
-    }
-  else
-    {
-      int
-        accWgt = 0;
-
-      double 
-        scaler = 0.0,       
-        accRat = 0.0; 
-
-      if(scaleRates)
-        {
-          for(model = 0, accRat = 0.0, accWgt = 0; model < pr->numberOfPartitions; model++)
-            {
-              int 
-                localCount = 0,
-                lower = pr->partitionData[model]->lower,
-                upper = pr->partitionData[model]->upper;
-              
-              for(i = lower, localCount = 0; i < upper; i++, localCount++)
-                {
-                  int 
-                    w = tr->aliaswgt[i];
-                  
-                  double
-                    rate = pr->partitionData[model]->perSiteRates[tr->rateCategory[i]];
-                  
-                  assert(0 <= tr->rateCategory[i] && tr->rateCategory[i] < tr->maxCategories);
-                  
-                  accWgt += w;
-                  
-                  accRat += (w * rate);
-                }
-            }
-          
-          accRat /= ((double)accWgt);
-          
-          scaler = 1.0 / ((double)accRat);
-          
-          for(model = 0; model < pr->numberOfPartitions; model++)
-            {
-              for(i = 0; i < pr->partitionData[model]->numberOfCategories; i++)
-                pr->partitionData[model]->perSiteRates[i] *= scaler;
-            }
-
-          for(model = 0, accRat = 0.0; model < pr->numberOfPartitions; model++)
-            {
-              int 
-                localCount = 0,
-                lower = pr->partitionData[model]->lower,
-                upper = pr->partitionData[model]->upper;
-              
-              for(i = lower, localCount = 0; i < upper; i++, localCount++)
-                {
-                  int 
-                    w = tr->aliaswgt[i];
-                  
-                  double
-                    rate = pr->partitionData[model]->perSiteRates[tr->rateCategory[i]];
-                  
-                  assert(0 <= tr->rateCategory[i] && tr->rateCategory[i] < tr->maxCategories);        
-                  
-                  accRat += (w * rate);
-                }
-            }           
-
-          accRat /= ((double)accWgt);     
-
-          assert(PLL_ABS(1.0 - accRat) < 1.0E-5);
-        }
-      else
-        {
-          for(model = 0, accRat = 0.0, accWgt = 0; model < pr->numberOfPartitions; model++)
-            {
-              int 
-                localCount = 0,
-                lower = pr->partitionData[model]->lower,
-                upper = pr->partitionData[model]->upper;
-              
-              for(i = lower, localCount = 0; i < upper; i++, localCount++)
-                {
-                  int 
-                    w = tr->aliaswgt[i];
-                  
-                  double
-                    rate = pr->partitionData[model]->perSiteRates[tr->rateCategory[i]];
-                  
-                  assert(0 <= tr->rateCategory[i] && tr->rateCategory[i] < tr->maxCategories);
-                  
-                  accWgt += w;
-                  
-                  accRat += (w * rate);
-                }
-            }
-          
-          accRat /=  (double)accWgt;
-
-          assert(PLL_ABS(1.0 - accRat) < 1.0E-5);
-        }
-         
-         /*
-       for(model = 0; model < pr->numberOfPartitions; model++)
-        {
-          int 
-            localCount = 0,
-            lower = pr->partitionData[model]->lower,
-            upper = pr->partitionData[model]->upper;
-
-        }  */       
-#if NOT (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-      for(model = 0; model < pr->numberOfPartitions; model++)
-        {                        
-          int 
-            localCount,
-            lower = pr->partitionData[model]->lower,
-            upper = pr->partitionData[model]->upper;
-          
-          for(i = lower, localCount = 0; i < upper; i++, localCount++)
-              pr->partitionData[model]->rateCategory[localCount] = tr->rateCategory[i];
-        }
-#endif
-    }
-  
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-  pllMasterBarrier(tr, pr, PLL_THREAD_COPY_RATE_CATS);
-#endif               
-}
-
-/** @brief Optimize rate categories for CAT model
- *
- *  Optimize rate categories for CAT model
- *
- *  @param tr
- *    PLL instance
- *
- *  @param pr
- *    List of partitions
- *
- *  @param _maxCategories
- *    Number of categories
- */
-static void optimizeRateCategories(pllInstance *tr, partitionList *pr, int _maxCategories)
-{
-  assert(_maxCategories > 0);
-
-  if(_maxCategories > 1)
-    {
-      double  
-        temp,  
-        lower_spacing, 
-        upper_spacing,
-        initialLH = tr->likelihood,     
-        *ratStored = (double *)rax_malloc(sizeof(double) * tr->originalCrunchedLength),
-        /**lhs =       (double *)malloc(sizeof(double) * tr->originalCrunchedLength),*/
-        **oldCategorizedRates = (double **)rax_malloc(sizeof(double *) * pr->numberOfPartitions);
-
-      int  
-        i,
-        k,
-        maxCategories = _maxCategories,
-        *oldCategory =  (int *)rax_malloc(sizeof(int) * tr->originalCrunchedLength),
-        model,
-        *oldNumbers = (int *)rax_malloc(sizeof(int) * pr->numberOfPartitions);
-  
-      assert(isTip(tr->start->number, tr->mxtips));         
-      
-      pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-
-      if(tr->optimizeRateCategoryInvocations == 1)
-        {
-          lower_spacing = 0.5 / ((double)(tr->optimizeRateCategoryInvocations));
-          upper_spacing = 1.0 / ((double)(tr->optimizeRateCategoryInvocations));
-        }
-      else
-        {
-          lower_spacing = 0.05 / ((double)(tr->optimizeRateCategoryInvocations));
-          upper_spacing = 0.1 / ((double)(tr->optimizeRateCategoryInvocations));
-        }
-      
-      if(lower_spacing < 0.001)
-        lower_spacing = 0.001;
-      
-      if(upper_spacing < 0.001)
-        upper_spacing = 0.001;
-      
-      tr->optimizeRateCategoryInvocations = tr->optimizeRateCategoryInvocations + 1;
-
-      memcpy(oldCategory, tr->rateCategory, sizeof(int) * tr->originalCrunchedLength);       
-      memcpy(ratStored,   tr->patratStored, sizeof(double) * tr->originalCrunchedLength);
-
-      for(model = 0; model < pr->numberOfPartitions; model++)
-        {
-          oldNumbers[model]          = pr->partitionData[model]->numberOfCategories;
-
-          oldCategorizedRates[model] = (double *)rax_malloc(sizeof(double) * tr->maxCategories);
-          
-          memcpy(oldCategorizedRates[model], pr->partitionData[model]->perSiteRates, tr->maxCategories * sizeof(double));
-        }      
-      
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-      /*tr->lhs = lhs;*/
-      tr->lower_spacing = lower_spacing;
-      tr->upper_spacing = upper_spacing;
-      pllMasterBarrier(tr, pr, PLL_THREAD_RATE_CATS);
-#else      
-      for(model = 0; model < pr->numberOfPartitions; model++)
-        optRateCatModel(tr, pr, model, lower_spacing, upper_spacing, tr->lhs);
-#endif     
-
-      for(model = 0; model < pr->numberOfPartitions; model++)
-        {     
-          int 
-            where = 1,
-            found = 0,
-            width = pr->partitionData[model]->upper -  pr->partitionData[model]->lower,
-            upper = pr->partitionData[model]->upper,
-            lower = pr->partitionData[model]->lower;
-            
-          rateCategorize 
-            *rc = (rateCategorize *)rax_malloc(sizeof(rateCategorize) * width);          
-        
-          for (i = 0; i < width; i++)
-            {
-              rc[i].accumulatedSiteLikelihood = 0.0;
-              rc[i].rate = 0.0;
-            }  
-        
-          rc[0].accumulatedSiteLikelihood = tr->lhs[lower];
-          rc[0].rate = tr->patrat[lower];
-        
-          tr->rateCategory[lower] = 0;
-        
-          for (i = lower + 1; i < upper; i++) 
-            {
-              temp = tr->patrat[i];
-              found = 0;
-            
-              for(k = 0; k < where; k++)
-                {
-                  if(temp == rc[k].rate || (fabs(temp - rc[k].rate) < 0.001))
-                    {
-                      found = 1;                                                
-                      rc[k].accumulatedSiteLikelihood += tr->lhs[i];    
-                      break;
-                    }
-                }
-            
-              if(!found)
-                {           
-                  rc[where].rate = temp;            
-                  rc[where].accumulatedSiteLikelihood += tr->lhs[i];        
-                  where++;
-                }
-            }
-        
-          qsort(rc, where, sizeof(rateCategorize), catCompare);
-        
-          if(where < maxCategories)
-            {
-              pr->partitionData[model]->numberOfCategories = where;
-              categorizePartition(tr, pr, rc, model, lower, upper);
-            }
-          else
-            {
-              pr->partitionData[model]->numberOfCategories = maxCategories;
-              categorizePartition(tr, pr, rc, model, lower, upper);
-            }
-        
-          rax_free(rc);
-        }
-                
-      updatePerSiteRates(tr, pr, PLL_TRUE);
-
-      pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-      
-      if(tr->likelihood < initialLH)
-        {                         
-          for(model = 0; model < pr->numberOfPartitions; model++)
-            {
-              pr->partitionData[model]->numberOfCategories = oldNumbers[model];
-              memcpy(pr->partitionData[model]->perSiteRates, oldCategorizedRates[model], tr->maxCategories * sizeof(double));
-            }         
-          
-          memcpy(tr->patratStored, ratStored, sizeof(double) * tr->originalCrunchedLength);
-          memcpy(tr->rateCategory, oldCategory, sizeof(int) * tr->originalCrunchedLength);           
-          
-          updatePerSiteRates(tr, pr, PLL_FALSE);
-          
-          pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-
-          /* printf("REVERT: %1.40f %1.40f\n", initialLH, tr->likelihood); */
-
-          assert(initialLH == tr->likelihood);
-        }
-          
-      for(model = 0; model < pr->numberOfPartitions; model++)
-        rax_free(oldCategorizedRates[model]);
-                   
-      rax_free(oldCategorizedRates);
-      rax_free(oldCategory);
-      rax_free(ratStored);       
-      /*     rax_free(lhs); */
-      rax_free(oldNumbers);
-    }
-}
-  
-
-/************************* end of functions for CAT model of rate heterogeneity */
-
-
-
-
-/*****************************************************************************************************/
-
-/* reset all branche lengths in tree to default values */
-
-/** @brief Reset all branch lengths to default values
-  
-    Reset all branch lengths in the tree instance to default values (\b PLL_DEFAULTZ)
-
-    @param tr
-      PLL instance
-  */
-void resetBranches(pllInstance *tr)
-{
-  nodeptr  p, q;
-  int  nodes, i;
-  
-  nodes = tr->mxtips  +  3 * (tr->mxtips - 2);
-  p = tr->nodep[1];
-  while (nodes-- > 0) 
-    {   
-      for(i = 0; i < PLL_NUM_BRANCHES; i++)
-        p->z[i] = PLL_DEFAULTZ;
-        
-      q = p->next;
-      while(q != p)
-        {       
-          for(i = 0; i < PLL_NUM_BRANCHES; i++)
-            q->z[i] = PLL_DEFAULTZ;         
-          q = q->next;
-        }
-      p++;
-    }
-}
-
-/**
- * @brief Adjust frequencies in case some base frequency is close to zero.
- */
-static void smoothFrequencies(double *frequencies, int numberOfFrequencies) {
-	int countScale = 0, l, loopCounter = 0;
-
-	for (l = 0; l < numberOfFrequencies; l++)
-		if (frequencies[l] < PLL_FREQ_MIN)
-			countScale++;
-
-	if (countScale > 0) {
-		while (countScale > 0) {
-			double correction = 0.0, factor = 1.0;
-
-			for (l = 0; l < numberOfFrequencies; l++) {
-				if (frequencies[l] == 0.0)
-					correction += PLL_FREQ_MIN;
-				else if (frequencies[l] < PLL_FREQ_MIN) {
-					correction += (PLL_FREQ_MIN - frequencies[l]);
-					factor -= (PLL_FREQ_MIN - frequencies[l]);
-				}
-			}
-
-			countScale = 0;
-
-			for (l = 0; l < numberOfFrequencies; l++) {
-				if (frequencies[l] >= PLL_FREQ_MIN)
-					frequencies[l] = frequencies[l] - (frequencies[l] * correction * factor);
-				else
-					frequencies[l] = PLL_FREQ_MIN;
-
-				if (frequencies[l] < PLL_FREQ_MIN)
-					countScale++;
-			}
-			assert(loopCounter < 100);
-			loopCounter++;
-		}
-	}
-}
-
-/**
- * @brief Evaluate all possible protein models
- */
-static void optimizeProteinModels(pllInstance *tr, partitionList * pr, int *bestIndex, double *bestScores, pllBoolean empiricalFreqs)
-{
-	int modelIndex, partitionIndex,
-	    numProteinModels = PLL_AUTO;
-
-	for (partitionIndex = 0; partitionIndex < pr->numberOfPartitions; partitionIndex++) {
-		bestIndex[partitionIndex] = -1;
-		bestScores[partitionIndex] = PLL_UNLIKELY;
-	}
-
-	if (empiricalFreqs) {
-		double ** freqs = pllBaseFrequenciesInstance(tr, pr);
-		for (partitionIndex = 0; partitionIndex < pr->numberOfPartitions; partitionIndex++) {
-			smoothFrequencies(freqs[partitionIndex], PLL_NUM_AA_STATES);
-			memcpy(pr->partitionData[partitionIndex]->empiricalFrequencies, freqs[partitionIndex], PLL_NUM_AA_STATES*sizeof(double));
-		}
-		free(freqs);
-	}
-
-	for (modelIndex = 0; modelIndex < numProteinModels; modelIndex++) {
-		for (partitionIndex = 0; partitionIndex < pr->numberOfPartitions; partitionIndex++) {
-			if (pr->partitionData[partitionIndex]->protModels == PLL_AUTO) {
-
-				pr->partitionData[partitionIndex]->autoProtModels = modelIndex;
-				pr->partitionData[partitionIndex]->protUseEmpiricalFreqs =
-						empiricalFreqs;
-
-				assert(!pr->partitionData[partitionIndex]->optimizeBaseFrequencies);
-
-				pllInitReversibleGTR(tr, pr, partitionIndex);
-			}
-		}
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-		pllMasterBarrier (tr, pr, PLL_THREAD_COPY_RATES);
-#endif
-
-		/* optimize branch lengths */
-		resetBranches(tr);
-		pllEvaluateLikelihood(tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-		pllOptimizeBranchLengths(tr, pr, 16);
-
-		for (partitionIndex = 0; partitionIndex < pr->numberOfPartitions; partitionIndex++) {
-			if (pr->partitionData[partitionIndex]->protModels == PLL_AUTO) {
-				if (pr->partitionData[partitionIndex]->partitionLH > bestScores[partitionIndex]) {
-					/* improved best score */
-					bestScores[partitionIndex] = pr->partitionData[partitionIndex]->partitionLH;
-					bestIndex[partitionIndex] = modelIndex;
-				}
-			}
-		}
-	}
-}
-
-/* 
-   automatically compute the best protein substitution model for the dataset at hand.
- */
-
-/** @brief Compute the best protein substitution model
-  *
-  * Automatically compute the best protein substitution model for the dataset
-  * at hand
-  *
-  * @param tr
-  *   The PLL instance
-  *
-  * @param pr
-  *   List of partitions
-  *
-  */
-static void autoProtein(pllInstance *tr, partitionList *pr)
-{
-	int countAutos = 0, partitionIndex;
-
-	/* count the number of partitions with model set to PLL_AUTO */
-	for (partitionIndex = 0; partitionIndex < pr->numberOfPartitions; partitionIndex++)
-		if (pr->partitionData[partitionIndex]->protModels == PLL_AUTO)
-			countAutos++;
-
-	/* if there are partitions with model set to PLL_AUTO compute the best model */
-	if (countAutos > 0) {
-		int *bestIndex = (int*) rax_malloc(
-				sizeof(int) * pr->numberOfPartitions),
-		    *bestIndexEmpFreqs = (int*) rax_malloc(
-				sizeof(int) * pr->numberOfPartitions),
-		    *oldIndex =
-				(int*) rax_malloc(sizeof(int) * pr->numberOfPartitions);
-
-		pllBoolean *oldFreqs = (pllBoolean*) malloc(
-				sizeof(pllBoolean) * pr->numberOfPartitions);
-
-		double startLH,
-		      *bestScores = (double*) rax_malloc(
-				sizeof(double) * pr->numberOfPartitions),
-			  *bestScoresEmpFreqs = (double*) rax_malloc(
-				sizeof(double) * pr->numberOfPartitions);
-
-		topolRELL_LIST *rl = (topolRELL_LIST *) rax_malloc(
-				sizeof(topolRELL_LIST));
-
-		initTL(rl, tr, 1);
-		saveTL(rl, tr, 0);
-
-		pllEvaluateLikelihood(tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-
-		/* store the initial likelihood of the tree with the currently assigned protein models */
-		startLH = tr->likelihood;
-
-		/* save the currently assigned protein model for each PLL_AUTO partition */
-		for (partitionIndex = 0; partitionIndex < pr->numberOfPartitions; partitionIndex++) {
-			oldIndex[partitionIndex] = pr->partitionData[partitionIndex]->autoProtModels;
-			oldFreqs[partitionIndex] = pr->partitionData[partitionIndex]->protUseEmpiricalFreqs;
-			bestIndex[partitionIndex] = -1;
-			bestScores[partitionIndex] = PLL_UNLIKELY;
-		}
-
-		/* evaluate all models with fixed base frequencies */
-		optimizeProteinModels(tr, pr, bestIndex, bestScores, PLL_FALSE);
-		/* evaluate all models with fixed empirical frequencies */
-		optimizeProteinModels(tr, pr, bestIndexEmpFreqs, bestScoresEmpFreqs, PLL_TRUE);
-
-		/* model selection */
-		for (partitionIndex = 0; partitionIndex < pr->numberOfPartitions; partitionIndex++) {
-			if (pr->partitionData[partitionIndex]->protModels == PLL_AUTO) {
-				int bestIndexFixed = bestIndex[partitionIndex],
-				    bestIndexEmp = bestIndexEmpFreqs[partitionIndex];
-
-				double bestLhFixed = bestScores[partitionIndex],
-					   bestLhEmp = bestScoresEmpFreqs[partitionIndex],
-					   samples = 0.0,
-					   freeParamsFixed = 0.0,
-					   freeParamsEmp = 0.0;
-
-				samples = pr->partitionData[partitionIndex]->partitionWeight;
-				assert(samples > 0.0 && samples >= pr->partitionData[partitionIndex]->width);
-
-				assert(tr->ntips == tr->mxtips);
-				freeParamsFixed = freeParamsEmp = (2 * tr->ntips - 3);
-				freeParamsEmp += 19.0;
-
-				switch (tr->rateHetModel) {
-				case PLL_CAT:
-					freeParamsFixed +=
-							(double) pr->partitionData[partitionIndex]->numberOfCategories;
-					freeParamsEmp +=
-							(double) pr->partitionData[partitionIndex]->numberOfCategories;
-					break;
-				case PLL_GAMMA:
-					freeParamsFixed += 1.0;
-					freeParamsEmp += 1.0;
-					break;
-				default:
-					assert(0);
-				}
-
-				switch (tr->autoProteinSelectionType) {
-				case PLL_AUTO_ML:
-					if (bestLhFixed > bestLhEmp) {
-						pr->partitionData[partitionIndex]->autoProtModels =
-								bestIndexFixed;
-						pr->partitionData[partitionIndex]->protUseEmpiricalFreqs = 0;
-					} else {
-						pr->partitionData[partitionIndex]->autoProtModels = bestIndexEmp;
-						pr->partitionData[partitionIndex]->protUseEmpiricalFreqs = 1;
-					}
-					break;
-				case PLL_AUTO_BIC: {
-					//BIC: -2 * lnL + k * ln(n)
-					double bicFixed = -2.0 * bestLhFixed
-							+ freeParamsFixed * log(samples),
-						   bicEmp = -2.0
-							* bestLhEmp + freeParamsEmp * log(samples);
-
-					if (bicFixed < bicEmp) {
-						pr->partitionData[partitionIndex]->autoProtModels =
-								bestIndexFixed;
-						pr->partitionData[partitionIndex]->protUseEmpiricalFreqs = 0;
-					} else {
-						pr->partitionData[partitionIndex]->autoProtModels = bestIndexEmp;
-						pr->partitionData[partitionIndex]->protUseEmpiricalFreqs = 1;
-					}
-				}
-					break;
-				case PLL_AUTO_AIC: {
-					//AIC: 2 * (k - lnL)
-					double aicFixed = 2.0 * (freeParamsFixed - bestLhFixed),
-							aicEmp = 2.0 * (freeParamsEmp - bestLhEmp);
-
-					if (aicFixed < aicEmp) {
-						pr->partitionData[partitionIndex]->autoProtModels =
-								bestIndexFixed;
-						pr->partitionData[partitionIndex]->protUseEmpiricalFreqs = 0;
-					} else {
-						pr->partitionData[partitionIndex]->autoProtModels = bestIndexEmp;
-						pr->partitionData[partitionIndex]->protUseEmpiricalFreqs = 1;
-					}
-				}
-					break;
-				case PLL_AUTO_AICC: {
-					//AICc: AIC + (2 * k * (k + 1))/(n - k - 1)
-					double aiccFixed, aiccEmp;
-
-					/*
-					 * Even though samples and freeParamsFixed are fp variables, they are actually integers.
-					 * That's why we are comparing with a 0.5 threshold.
-					 */
-
-					if (fabs(samples - freeParamsFixed - 1.0) < 0.5)
-						aiccFixed = 0.0;
-					else
-						aiccFixed = (2.0 * (freeParamsFixed - bestLhFixed))
-								+ ((2.0 * freeParamsFixed
-										* (freeParamsFixed + 1.0))
-										/ (samples - freeParamsFixed - 1.0));
-
-					if (fabs(samples - freeParamsEmp - 1.0) < 0.5)
-						aiccEmp = 0.0;
-					else
-						aiccEmp = (2.0 * (freeParamsEmp - bestLhEmp))
-								+ ((2.0 * freeParamsEmp * (freeParamsEmp + 1.0))
-										/ (samples - freeParamsEmp - 1.0));
-
-					if (aiccFixed < aiccEmp) {
-						pr->partitionData[partitionIndex]->autoProtModels =
-								bestIndexFixed;
-						pr->partitionData[partitionIndex]->protUseEmpiricalFreqs = 0;
-					} else {
-						pr->partitionData[partitionIndex]->autoProtModels = bestIndexEmp;
-						pr->partitionData[partitionIndex]->protUseEmpiricalFreqs = 1;
-					}
-				}
-					break;
-				default:
-					assert(0);
-				}
-
-				pllInitReversibleGTR(tr, pr, partitionIndex);
-			}
-		}
-
-		resetBranches(tr);
-		pllEvaluateLikelihood(tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-		pllOptimizeBranchLengths(tr, pr, 64);
-
-		/* set the protein model of PLL_AUTO partitions to the best computed and reset model parameters */
-		for (partitionIndex = 0; partitionIndex < pr->numberOfPartitions; partitionIndex++) {
-			if (pr->partitionData[partitionIndex]->protModels == PLL_AUTO) {
-				pr->partitionData[partitionIndex]->autoProtModels = bestIndex[partitionIndex];
-				pllInitReversibleGTR(tr, pr, partitionIndex);
-			}
-		}
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-		pllMasterBarrier(tr, pr, PLL_THREAD_COPY_RATES);
-#endif
-
-		/* compute again the likelihood of the tree */
-		resetBranches(tr);
-		pllEvaluateLikelihood(tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-		pllOptimizeBranchLengths(tr, pr, 64);
-
-		/* check if the likelihood of the tree with the new protein models assigned to PLL_AUTO partitions is better than the with the old protein models */
-		if (tr->likelihood < startLH) {
-			for (partitionIndex = 0; partitionIndex < pr->numberOfPartitions; partitionIndex++) {
-				if (pr->partitionData[partitionIndex]->protModels == PLL_AUTO) {
-					pr->partitionData[partitionIndex]->autoProtModels = oldIndex[partitionIndex];
-					pllInitReversibleGTR(tr, pr, partitionIndex);
-				}
-			}
-
-			//this barrier needs to be called in the library
-			//#ifdef _USE_PTHREADS
-			//pllMasterBarrier(tr, pr, PLL_THREAD_COPY_RATES);
-			//#endif
-
-			/* Restore the topology. rl holds the topology before the optimization. However,
-			 since the topology doesn't change - only the branch lengths do - maybe we
-			 could write a new routine that will store only the branch lengths and restore them */
-			restoreTL(rl, tr, 0,
-					pr->perGeneBranchLengths ? pr->numberOfPartitions : 1);
-			pllEvaluateLikelihood(tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-		}
-
-		assert(tr->likelihood >= startLH);
-
-		freeTL(rl);
-		rax_free(rl);
-
-		rax_free(oldIndex);
-		rax_free(bestIndex);
-		rax_free(bestIndexEmpFreqs);
-		rax_free(bestScores);
-		rax_free(bestScoresEmpFreqs);
-	}
-}
-
-
-/* iterative procedure for optimizing all model parameters */
-
-/* @brief Optimize all model parameters
- *
- * Iterative procedure for optimizing all model parameters
- *
- * @param tr
- *   PLL instance
- *
- * @param pr
- *   List of partitions
- *
- * @param likelihoodEpsilon
- *   Optimize model parameters until we get a difference of \a likelihoodEpsilon
- *
- * @todo
- *   Describe likelihoodEpsilon. Understand the TODO marked blocks.
- */
-void modOpt(pllInstance *tr, partitionList *pr, double likelihoodEpsilon)
-{ 
-  int catOpt = 0; 
-  double 
-    inputLikelihood,
-    currentLikelihood,
-    modelEpsilon = 0.0001;
-
-  /* linkage lists for alpha, p-invar has actually been ommitted in this version of the code 
-     and the GTR subst matrices */
-
-  linkageList
-    *alphaList = pr->alphaList,
-    *rateList  = pr->rateList,
-    *freqList  = pr->freqList;
-
-  modelEpsilon = 0.0001;
-
-  // test code for library
-  if (0)
-   {
-     
-      //assuming that we have three partitions for testing here 
-
-      //alphaList = initLinkageListString("0,1,2", pr);
-      //rateList  = initLinkageListString("0,1,1", pr);
-    
-      //init_Q_MatrixSymmetries("0,1,2,3,4,5", pr, 0);
-      //init_Q_MatrixSymmetries("0,1,2,3,4,4", pr, 1);
-      //init_Q_MatrixSymmetries("0,1,1,2,3,4", pr, 2);
-      
-      //function that checks that partitions that have linked Q matrices as in our example above
-      //will not have different configurations of the Q matrix as set by the init_Q_MatrixSymmetries() function
-      //e.g., on would have HKY and one would have GTR, while the user claimes that they are linked
-      //in our example, the Q matrices of partitions 1 and 2 are linked 
-      //but we set different matrix symmetries via 
-      // init_Q_MatrixSymmetries("0,1,2,3,4,4", tr, 1);
-      // and
-      // init_Q_MatrixSymmetries("0,1,1,2,3,4", tr, 2);
-      //
-      //the function just let's assertions fail for the time being .....
-
-      //checkMatrixSymnmetriesAndLinkage(pr, rateList);
-
-  /* alpha parameters and p-invar parameters are unlinked.
-     this is the point where I actually hard-coded this in RAxML */
-
-  /* call the dedicated function for linking the GTR matrix across all AA data partitions 
-     If we have only DNA data all GTR matrix estimates will be unlinked.
-     */
-   }
-  else
-   {
-     //alphaList = initLinkageList(unlinked, pr);
-     //freqList  = initLinkageList(unlinked, pr);
-     //rateList  = initLinkageListGTR(pr);
-   }
-
-  tr->start = tr->nodep[1];
-
-  /* This check is here to make sure that the likelihood 
-     computed prior to entering modOpt() is consistent 
-     with the likelihood when entering modOpt().
-     This allows us to ensure that we didn't forget to update anything prior 
-     to entereing this function.
-   */
-  inputLikelihood = tr->likelihood;
-  pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-  assert (inputLikelihood == tr->likelihood);
-
-  do
-  {           
-    //printBothOpen("cur LH: %f\n", tr->likelihood);
-    currentLikelihood = tr->likelihood;     
-
-#ifdef _DEBUG_MOD_OPT
-      printf ("start: %f\n", currentLikelihood);
-#endif
-
-    pllOptRatesGeneric(tr, pr, modelEpsilon, rateList);
-
-    pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-
-#ifdef _DEBUG_MOD_OPT
-    printf ("after rates %f\n", tr->likelihood);
-#endif
-
-    autoProtein(tr, pr);
-
-    pllOptimizeBranchLengths(tr, pr, 2); // 0.0625 * 32 = 2.0
-
-#ifdef _DEBUG_MOD_OPT
-    pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-    printf("after br-len 1 %f\n", tr->likelihood); 
-#endif
-
-    pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-
-    pllOptBaseFreqs(tr, pr, modelEpsilon, freqList);
-    
-    pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-    
-    pllOptimizeBranchLengths(tr, pr, 2); // 0.0625 * 32 = 2.0
-
-#ifdef _DEBUG_MOD_OPT
-    pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE); 
-    printf("after pllOptBaseFreqs 1 %f\n", tr->likelihood);
-#endif 
-
-    switch(tr->rateHetModel)
-    {
-      case PLL_GAMMA:      
-        pllOptAlphasGeneric (tr, pr, modelEpsilon, alphaList);
-        pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-
-#ifdef _DEBUG_MOD_OPT
-          printf("after alphas %f\n", tr->likelihood); 
-#endif
-
-        pllOptimizeBranchLengths(tr, pr, 3); // 0.1 * 32 = 3.2
-
-#ifdef _DEBUG_MOD_OPT
-          pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);  
-          printf("after br-len 2 %f\n", tr->likelihood); 
-#endif
-        break;
-      case PLL_CAT:
-        if(catOpt < 3)
-        {                            
-          pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);  
-          optimizeRateCategories(tr, pr, tr->categories);
-#ifdef _DEBUG_MOD_OPT
-            pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);  
-            printf("after cat-opt %f\n", tr->likelihood); 
-#endif
-          catOpt++;
-        }
-        break;    
-      default:
-        assert(0);
-    }                   
-
-    if(tr->likelihood < currentLikelihood)
-     {
-      printf("%.20f %.20f\n", tr->likelihood, currentLikelihood);
-      printf("Difference: %.20f\n",tr->likelihood - currentLikelihood);
-    }
-    assert (tr->likelihood - currentLikelihood > 0.000000000000001);
-    //assert(tr->likelihood > currentLikelihood);
-
-  }
-  while(fabs(currentLikelihood - tr->likelihood) > likelihoodEpsilon);  
-  /* TODO: Why do we check the computed likelihood with the currentLikelihood which is the likelihood before THIS optimization loop? Why dont we
-     rather check it with the initial likelihood (the one before calling modOpt)? Isn't it possible to have a deadlock? */
-
-  
-}
-
diff --git a/pllrepo/src/parsePartition.c b/pllrepo/src/parsePartition.c
deleted file mode 100644
index 1ae92af..0000000
--- a/pllrepo/src/parsePartition.c
+++ /dev/null
@@ -1,388 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file parsePartition.c
- * @brief Collection of routines for parsing and processing a partition (model) file
- *
- * @defgroup parsePartitionFileGroup Reading and parsing partition (model) files
- * This set of functions handles the reading and parsing of partition files, i.e.
- * files that contain alignment partition definitions and corresponding models.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include <math.h>
-#include <ctype.h>
-
-#include "pll.h"
-#include "pllInternal.h"
-
-extern const char *protModels[PLL_NUM_PROT_MODELS];
-
-static void destroy_model_names(pllHashTable * hashTable)
-{
-  pllHashDestroy (&hashTable, rax_free);
-}
-
-static pllHashTable * init_model_names (void)
-{
-  int i;
-  int * item;
-
-  pllHashTable * hashTable;
-  hashTable = pllHashInit (PLL_NUM_PROT_MODELS);
-
-  for (i = 0; i < PLL_NUM_PROT_MODELS; ++ i)
-   {
-     item  = (int *) rax_malloc (sizeof (int));
-     *item = i;
-     pllHashAdd (hashTable, pllHashString(protModels[i], hashTable->size), protModels[i], (void *) item);
-   }
-  return hashTable;
-}
-
-/** @ingroup parsePartitionFileGroup
-    @brief Destroy queue structure that contains parsed information from a partition file
-
-    Destroys the structure, and therefore frees allocated memory, that holds parsed information
-    from a partition (model) file
-
-    @param partitions
-      Queue structure with parsed info
-*/
-void pllQueuePartitionsDestroy (pllQueue ** partitions)
-{
-  pllPartitionInfo * pi;
-  pllPartitionRegion * region;
-
-  while (pllQueueRemove (*partitions, (void **)&pi))
-   {
-     while (pllQueueRemove (pi->regionList, (void **) &region))
-      {
-        rax_free (region);
-      }
-     rax_free (pi->regionList);
-     rax_free (pi->partitionName);
-     rax_free (pi->partitionModel);
-     rax_free (pi);
-   }
-  rax_free (*partitions);
-}
-
-static pllQueue * parse_partition (int * inp, pllHashTable * proteinModelsHash)
-{
-  int input, i;
-  pllLexToken token;
-  int lines = 0;
-  pllQueue * partitions;
-  pllPartitionInfo * pi;
-  pllPartitionRegion * region;
-  int * protIndexPtr;
-  char * modelptr;
-
-  input  = *inp;
-
-  NEXT_TOKEN
-
-  pllQueueInit (&partitions);
-  while (token.tokenType != PLL_TOKEN_EOF)
-  {
-    ++ lines;
-    pi = (pllPartitionInfo *) rax_calloc (1, sizeof (pllPartitionInfo));
-    pllQueueInit (&(pi->regionList));
-    pllQueueAppend (partitions, (void *)pi);
-    CONSUME (PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
-
-
-    /* read partition type */
-    if (token.tokenType != PLL_TOKEN_STRING) 
-     {
-       pllQueuePartitionsDestroy (&partitions);
-       return (0);
-     }
-    pi->partitionModel = my_strndup (token.lexeme, token.len);
-    for (i = 0; i < token.len; ++i) pi->partitionModel[i] = toupper(pi->partitionModel[i]);
-
-    // check partition model
-    pi->protModels              = -1;
-    pi->protUseEmpiricalFreqs   = PLL_FALSE;
-    pi->ascBias                 = PLL_FALSE;
-    pi->optimizeBaseFrequencies = PLL_FALSE;
-
-    /* check if the model contains Asc bias */
-    if (!strncmp(pi->partitionModel, "ASC_", 4))
-      {
-        pi->ascBias = PLL_TRUE;
-        modelptr    = pi->partitionModel + 4;
-      }
-     else
-        modelptr    = pi->partitionModel;
-
-    /* check first for BINARY */
-    if (!strcmp(modelptr, "BIN") || !strcmp(modelptr, "BINX"))
-     {
-       pi->dataType = PLL_BINARY_DATA;
-
-       if (!strcmp(modelptr, "BINX"))
-         pi->optimizeBaseFrequencies = PLL_TRUE;
-     }  /* now for DNA */
-    else if (!strcmp(modelptr, "DNA") || !strcmp(modelptr, "DNAX"))
-     {
-       pi->dataType   = PLL_DNA_DATA;
-
-       if (!strcmp(modelptr, "DNAX")) 
-         pi->optimizeBaseFrequencies = PLL_TRUE; 
-     }
-    else
-     {                  /* and  protein data */
-       pi->dataType  = PLL_AA_DATA;
-
-       if (pllHashSearch (proteinModelsHash, modelptr, (void **) &protIndexPtr))
-        {
-          pi->protModels              = *protIndexPtr;
-          pi->protUseEmpiricalFreqs   = PLL_FALSE;
-          pi->optimizeBaseFrequencies = PLL_FALSE;
-        }
-       else
-        {
-          if (modelptr[token.len - 1] == 'X')
-           {
-             modelptr[token.len - 1] = '\0';
-             if (pllHashSearch (proteinModelsHash, modelptr, (void **) &protIndexPtr))
-              {
-                pi->protModels              = *protIndexPtr;
-                pi->optimizeBaseFrequencies = PLL_TRUE;
-              }
-             modelptr[token.len - 1] = 'X';
-           }
-          else if (modelptr[token.len - 1] == 'F')
-           {
-             modelptr[token.len - 1] = '\0';
-             if (pllHashSearch (proteinModelsHash, modelptr, (void **) &protIndexPtr))
-              {
-                pi->protModels              = *protIndexPtr;
-                pi->protUseEmpiricalFreqs   = PLL_TRUE;
-              }
-             modelptr[token.len - 1] = 'F';
-           }
-          else
-           {
-             pllQueuePartitionsDestroy (&partitions);
-             return (0);
-           }
-        }
-     }
-
-    NEXT_TOKEN
-    CONSUME(PLL_TOKEN_WHITESPACE)
-
-    if (token.tokenType != PLL_TOKEN_COMMA) 
-     {
-       pllQueuePartitionsDestroy (&partitions);
-       return (0);
-     }
-    NEXT_TOKEN
-    CONSUME(PLL_TOKEN_WHITESPACE)
-
-    /* read partition name */
-    if (token.tokenType != PLL_TOKEN_STRING) 
-     {
-       pllQueuePartitionsDestroy (&partitions);
-       return (0);
-     }
-    pi->partitionName = my_strndup (token.lexeme, token.len);
-
-    NEXT_TOKEN
-    CONSUME(PLL_TOKEN_WHITESPACE)
-
-    /* read equal sign */
-    if (token.tokenType != PLL_TOKEN_EQUAL)
-     {
-       pllQueuePartitionsDestroy (&partitions);
-       return (0);
-     }
-    NEXT_TOKEN
-    CONSUME(PLL_TOKEN_WHITESPACE)
-
-    /* read rhs */
-    while (1)
-    {
-      region = (pllPartitionRegion *) rax_malloc (sizeof (pllPartitionRegion));
-      if (token.tokenType != PLL_TOKEN_NUMBER) 
-       {
-         pllQueuePartitionsDestroy (&partitions);
-         return (0);
-       }
-      region->start  = region->end = atoi (token.lexeme);  
-      region->stride = 1;
-      NEXT_TOKEN
-      CONSUME(PLL_TOKEN_WHITESPACE)
-      
-      if  (token.tokenType == PLL_TOKEN_DASH)
-       {
-         NEXT_TOKEN
-         CONSUME(PLL_TOKEN_WHITESPACE)
-         if (token.tokenType != PLL_TOKEN_NUMBER) 
-          {
-            pllQueuePartitionsDestroy (&partitions);
-            return (0);
-          }
-         region->end = atoi (token.lexeme);
-         if (region->end < region->start)
-          {
-            pllQueuePartitionsDestroy (&partitions);
-            return (0);
-          }
-         NEXT_TOKEN
-         CONSUME(PLL_TOKEN_WHITESPACE)
-         if (token.tokenType == PLL_TOKEN_SLASH)
-          {
-            NEXT_TOKEN
-            CONSUME(PLL_TOKEN_WHITESPACE)
-            if (token.tokenType != PLL_TOKEN_NUMBER) 
-             {
-               pllQueuePartitionsDestroy (&partitions);
-               return (0);
-             }
-            region->stride = atoi (token.lexeme);
-            NEXT_TOKEN
-          }
-         CONSUME(PLL_TOKEN_WHITESPACE)
-       }
-       pllQueueAppend (pi->regionList, (void *)region);
-      
-      if (token.tokenType != PLL_TOKEN_COMMA) break;
-      NEXT_TOKEN
-      CONSUME(PLL_TOKEN_WHITESPACE)
-    }
-   CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
-  }
- 
- return (partitions);
-} 
-
-/** @ingroup parsePartitionFileGroup
-    @brief Dump a parsed partition file in the console
-
-    Prints the parsed contents of a partition file to the console
-
-    @param partitions Queue structure containing parsed information
-*/
-void pllPartitionDump (pllQueue * partitions)
-{
-   struct pllQueueItem * elm;
-   struct pllQueueItem * regionList;
-   pllPartitionInfo * pi;
-   pllPartitionRegion * region;
-
-   elm = partitions->head;
-
-   while (elm)
-    {
-      pi  = (pllPartitionInfo *) elm->item;
-      printf ("%s, %s = ", pi->partitionModel, pi->partitionName);
-      regionList = pi->regionList->head;
-      while (regionList)
-       {
-         region = (pllPartitionRegion *) regionList->item;
-         printf ("%d", region->start);
-         if (region->start != region->end)
-          {
-            printf ("-%d", region->end);
-            if (region->stride != 1) printf ("/%d", region->stride);
-          }
-         regionList = regionList->next;
-         if (regionList) printf (", ");
-       }
-      printf ("\n");
-
-      elm = elm->next;
-    }
-}
-
-/** @ingroup parsePartitionFileGroup
-    @brief Parse a partition (model) file
-
-    Parses the partition file \a filename and stores the information in a queue
-    structure ::pllQueue
-
-    @param filename Name of the partition file
-    @return Queue structure with parsed information
-*/
-pllQueue * pllPartitionParse (const char * filename)
-{
-  long n;
-  char * rawdata;
-  int input;
-  pllQueue * partitions;
-
-  rawdata = pllReadFile (filename, &n);
-  if (!rawdata)
-   {
-     fprintf (stderr, "Error while opening/reading file %s\n", filename);
-     return (0);
-   }
-
-  n = strlen (rawdata);
-
-  init_lexan (rawdata, n);
-  input = get_next_symbol();
-
-  pllHashTable * model_names = init_model_names();
-  partitions  = parse_partition (&input, model_names);
-  destroy_model_names(model_names);
-  
-  rax_free (rawdata);
-  return (partitions);
-}
-
-/** @ingroup parsePartitionFileGroup
-    @brief Parse a partition (model) file
-
-    Parses the partition information stored in string \a p and stores the
-    information in a queue structure ::pllQueue
-
-    @param p Partition information string
-    @return  Queue structure with parsed information
-*/
-pllQueue * pllPartitionParseString (const char * p)
-{
-  long n;
-  int input;
-  pllQueue * partitions;
-
-  n = strlen(p);
-  init_lexan (p, n);
-  input = get_next_symbol();
-
-  pllHashTable * model_names;
-  model_names = init_model_names();
-  partitions = parse_partition (&input, model_names);
-  destroy_model_names(model_names);
-  
-  return (partitions);
-}
diff --git a/pllrepo/src/parsePartition.h b/pllrepo/src/parsePartition.h
deleted file mode 100644
index 47799d9..0000000
--- a/pllrepo/src/parsePartition.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file part.h
- */
-#ifndef __pll_PART__
-#define __pll_PART__
-#include "queue.h"
-
-typedef struct
-{
-  int start;
-  int end;
-  int stride;
-} pllPartitionRegion;
-
-typedef struct 
-{
-  char * partitionName;
-  char * partitionModel;
-  int protModels;
-  int protUseEmpiricalFreqs;
-  int dataType;
-  int ascBias;
-  int optimizeBaseFrequencies;
-  pllQueue * regionList;
-} pllPartitionInfo;
-#endif
diff --git a/pllrepo/src/parsimony.c b/pllrepo/src/parsimony.c
deleted file mode 100644
index 1fae471..0000000
--- a/pllrepo/src/parsimony.c
+++ /dev/null
@@ -1,865 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file parsimony.c
- */
-#include "mem_alloc.h"
-
-#ifndef WIN32
-#include <sys/times.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <unistd.h>  
-#endif
-
-#include <limits.h>
-#include <math.h>
-#include <time.h> 
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <stdint.h>
-#include <assert.h>
-
-#if defined(__MIC_NATIVE)
-
-#include <immintrin.h>
-
-#define INTS_PER_VECTOR 16
-#define LONG_INTS_PER_VECTOR 8
-#define INT_TYPE __m512i
-#define CAST double*
-#define SET_ALL_BITS_ONE _mm512_set1_epi32(0xFFFFFFFF)
-#define SET_ALL_BITS_ZERO _mm512_setzero_epi32()
-#define VECTOR_LOAD _mm512_load_epi32
-#define VECTOR_STORE  _mm512_store_epi32
-#define VECTOR_BIT_AND _mm512_and_epi32
-#define VECTOR_BIT_OR  _mm512_or_epi32
-#define VECTOR_AND_NOT _mm512_andnot_epi32
-
-#elif defined(__AVX)
-
-#include <xmmintrin.h>
-#include <immintrin.h>
-#include <pmmintrin.h>
-
-#define ULINT_SIZE 64
-#define INTS_PER_VECTOR 8
-#define LONG_INTS_PER_VECTOR 4
-#define INT_TYPE __m256d
-#define CAST double*
-#define SET_ALL_BITS_ONE (__m256d)_mm256_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)
-#define SET_ALL_BITS_ZERO (__m256d)_mm256_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000)
-#define VECTOR_LOAD _mm256_load_pd
-#define VECTOR_BIT_AND _mm256_and_pd
-#define VECTOR_BIT_OR  _mm256_or_pd
-#define VECTOR_STORE  _mm256_store_pd
-#define VECTOR_AND_NOT _mm256_andnot_pd
-
-#elif (defined(__SSE3))
-
-#include <xmmintrin.h>
-#include <pmmintrin.h>
-  
-#define INTS_PER_VECTOR 4
-#ifdef __i386__
-#define ULINT_SIZE 32
-#define LONG_INTS_PER_VECTOR 4
-#else
-#define ULINT_SIZE 64
-#define LONG_INTS_PER_VECTOR 2
-#endif
-#define INT_TYPE __m128i
-#define CAST __m128i*
-#define SET_ALL_BITS_ONE _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)
-#define SET_ALL_BITS_ZERO _mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000000)
-#define VECTOR_LOAD _mm_load_si128
-#define VECTOR_BIT_AND _mm_and_si128
-#define VECTOR_BIT_OR  _mm_or_si128
-#define VECTOR_STORE  _mm_store_si128
-#define VECTOR_AND_NOT _mm_andnot_si128
-
-#endif
-
-#include "pll.h"
-#include "pllInternal.h"
-
-extern const unsigned int mask32[32]; 
-
-static __inline unsigned int vectorPopcount(INT_TYPE v)
-{
-  unsigned long
-    counts[LONG_INTS_PER_VECTOR] __attribute__ ((aligned (PLL_BYTE_ALIGNMENT)));
-
-  int    
-    i,
-    sum = 0;
-
-  VECTOR_STORE((CAST)counts, v);
-
-  for(i = 0; i < LONG_INTS_PER_VECTOR; i++)
-     sum += __builtin_popcountl(counts[i]);
-
-  return ((unsigned int)sum);
-}
-
-static __inline void storePerSiteScores (partitionList * pr, int model, INT_TYPE v, unsigned int offset)
-{
-  unsigned long
-    counts[LONG_INTS_PER_VECTOR] __attribute__ ((aligned (PLL_BYTE_ALIGNMENT)));
-  parsimonyNumber * buf;
-
-  int    
-    i,
-    j;
-  
-  VECTOR_STORE((CAST)counts, v);
-
-  for (i = 0; i < LONG_INTS_PER_VECTOR; ++i)
-   {
-     buf = &(pr->partitionData[model]->perSiteParsScores[offset * PLL_PCF + i * ULINT_SIZE]);
-     for (j = 0; j < ULINT_SIZE; ++ j)
-        buf[j] += ((counts[i] >> j) & 1);
-   }
-  
-}
-
-static void getxnodeLocal (nodeptr p)
-{
-  nodeptr  s;
-
-  if((s = p->next)->xPars || (s = s->next)->xPars)
-    {
-      p->xPars = s->xPars;
-      s->xPars = 0;
-    }
-
-  assert(p->next->xPars || p->next->next->xPars || p->xPars);
-
-}
-
-static void computeTraversalInfoParsimony(nodeptr p, int *ti, int *counter, int maxTips, pllBoolean full)
-{        
-  nodeptr 
-    q = p->next->back,
-    r = p->next->next->back;
-  
-  if(! p->xPars)
-    getxnodeLocal(p);  
-  
-  if(full)
-    {
-       if(q->number > maxTips) 
-         computeTraversalInfoParsimony(q, ti, counter, maxTips, full);
-      
-      if(r->number > maxTips) 
-        computeTraversalInfoParsimony(r, ti, counter, maxTips, full);
-    }
-  else
-    {
-      if(q->number > maxTips && !q->xPars) 
-        computeTraversalInfoParsimony(q, ti, counter, maxTips, full);
-      
-      if(r->number > maxTips && !r->xPars) 
-        computeTraversalInfoParsimony(r, ti, counter, maxTips, full);
-    }
-  
-  
-  ti[*counter]     = p->number;
-  ti[*counter + 1] = q->number;
-  ti[*counter + 2] = r->number;
-  *counter = *counter + 4;
-}
-
-/* check whether site contains at least 2 different letters, i.e.
-   whether it will generate a score */
-static pllBoolean isInformative(pllInstance *tr, int dataType, int site)
-{
-  int
-    informativeCounter = 0,
-    check[256],   
-    j,   
-    undetermined = getUndetermined(dataType);
-
-  const unsigned int
-    *bitVector = getBitVector(dataType);
-
-  unsigned char
-    nucleotide;
-  
-        
-  for(j = 0; j < 256; j++)
-    check[j] = 0;
-  
-  for(j = 1; j <= tr->mxtips; j++)
-    {      
-      nucleotide = tr->yVector[j][site];            
-      check[nucleotide] = 1;
-      assert(bitVector[nucleotide] > 0);                   
-    }
-  
-  for(j = 0; j < undetermined; j++)
-    {
-      if(check[j] > 0)
-        informativeCounter++;    
-    } 
-          
-  if(informativeCounter > 1)
-    return PLL_TRUE;    
-
-  return PLL_FALSE;          
-}
-
-static void compressDNA(pllInstance *tr, partitionList *pr, int *informative, int perSiteScores)
-{
-  size_t
-    totalNodes,
-    i,
-    model;
-   
-  totalNodes = 2 * (size_t)tr->mxtips;
-
- 
-
-  for(model = 0; model < (size_t) pr->numberOfPartitions; model++)
-    {
-      size_t
-        k,
-        states = (size_t)pr->partitionData[model]->states,
-        compressedEntries,
-        compressedEntriesPadded,
-        entries = 0, 
-        lower = pr->partitionData[model]->lower,
-        upper = pr->partitionData[model]->upper;
-
-      parsimonyNumber 
-        **compressedTips = (parsimonyNumber **)rax_malloc(states * sizeof(parsimonyNumber*)),
-        *compressedValues = (parsimonyNumber *)rax_malloc(states * sizeof(parsimonyNumber));
-      
-      for(i = lower; i < upper; i++)    
-        if(informative[i])
-          entries += (size_t)tr->aliaswgt[i];     
-  
-      compressedEntries = entries / PLL_PCF;
-
-      if(entries % PLL_PCF != 0)
-        compressedEntries++;
-
-#if (defined(__SSE3) || defined(__AVX))
-      if(compressedEntries % INTS_PER_VECTOR != 0)
-        compressedEntriesPadded = compressedEntries + (INTS_PER_VECTOR - (compressedEntries % INTS_PER_VECTOR));
-      else
-        compressedEntriesPadded = compressedEntries;
-#else
-      compressedEntriesPadded = compressedEntries;
-#endif     
-
-      
-      rax_posix_memalign ((void **) &(pr->partitionData[model]->parsVect), PLL_BYTE_ALIGNMENT, (size_t)compressedEntriesPadded * states * totalNodes * sizeof(parsimonyNumber));
-      if (perSiteScores)
-       {
-         rax_posix_memalign ((void **) &(pr->partitionData[model]->perSiteParsScores), PLL_BYTE_ALIGNMENT, (size_t)pr->partitionData[model]->width* sizeof (parsimonyNumber));
-         for (i = 0; i < (size_t)pr->partitionData[model]->width; ++i) pr->partitionData[model]->perSiteParsScores[i] = 0;
-       }
-
-     
-      for(i = 0; i < compressedEntriesPadded * states * totalNodes; i++)      
-        pr->partitionData[model]->parsVect[i] = 0;
-
-      for(i = 0; i < (size_t)tr->mxtips; i++)
-        {
-          size_t
-            w = 0,
-            compressedIndex = 0,
-            compressedCounter = 0,
-            index = 0;
-
-          for(k = 0; k < states; k++)
-            {
-              compressedTips[k] = &(pr->partitionData[model]->parsVect[(compressedEntriesPadded * states * (i + 1)) + (compressedEntriesPadded * k)]);
-              compressedValues[k] = 0;
-            }                
-              
-          for(index = lower; index < (size_t)upper; index++)
-            {
-              if(informative[index])
-                {
-                  const unsigned int 
-                    *bitValue = getBitVector(pr->partitionData[model]->dataType);
-
-                  parsimonyNumber 
-                    value = bitValue[tr->yVector[i + 1][index]];          
-              
-                  for(w = 0; w < (size_t)tr->aliaswgt[index]; w++)
-                    {      
-                      for(k = 0; k < states; k++)
-                        {
-                          if(value & mask32[k])
-                            compressedValues[k] |= mask32[compressedCounter];
-                        }
-                     
-                      compressedCounter++;
-                  
-                      if(compressedCounter == PLL_PCF)
-                        {
-                          for(k = 0; k < states; k++)
-                            {
-                              compressedTips[k][compressedIndex] = compressedValues[k];
-                              compressedValues[k] = 0;
-                            }                    
-                          
-                          compressedCounter = 0;
-                          compressedIndex++;
-                        }
-                    }
-                }
-            }
-          
-          for(;compressedIndex < compressedEntriesPadded; compressedIndex++)
-            {   
-              for(;compressedCounter < PLL_PCF; compressedCounter++)              
-                for(k = 0; k < states; k++)
-                  compressedValues[k] |= mask32[compressedCounter];               
-          
-              for(k = 0; k < states; k++)
-                {
-                  compressedTips[k][compressedIndex] = compressedValues[k];
-                  compressedValues[k] = 0;
-                }                     
-              
-              compressedCounter = 0;
-            }           
-        }
-  
-      pr->partitionData[model]->parsimonyLength = compressedEntriesPadded;
-
-      rax_free(compressedTips);
-      rax_free(compressedValues);
-    }
-  
-  rax_posix_memalign ((void **) &(tr->parsimonyScore), PLL_BYTE_ALIGNMENT, sizeof(unsigned int) * totalNodes);  
-          
-  for(i = 0; i < totalNodes; i++) 
-    tr->parsimonyScore[i] = 0;
-}
-
-static void determineUninformativeSites(pllInstance *tr, partitionList *pr, int *informative)
-{
-  int 
-    model,
-    number = 0,
-    i;
-
-  /* 
-     Not all characters are useful in constructing a parsimony tree. 
-     Invariant characters, those that have the same state in all taxa, 
-     are obviously useless and are ignored by the method. Characters in 
-     which a state occurs in only one taxon are also ignored. 
-     All these characters are called parsimony uninformative.
-
-     Alternative definition: informative columns contain at least two types
-     of nucleotides, and each nucleotide must appear at least twice in each 
-     column. Kind of a pain if we intend to check for this when using, e.g.,
-     amibiguous DNA encoding.
-  */
-
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-    {
-      for(i = pr->partitionData[model]->lower; i < pr->partitionData[model]->upper; i++)
-        {
-           if(isInformative(tr, pr->partitionData[model]->dataType, i))
-             informative[i] = 1;
-           else
-             {
-               informative[i] = 0;
-               number++;
-             }  
-        }      
-    }
-
-  /* printf("Uninformative Patterns: %d\n", number); */
-}
-
-void pllInitParsimonyStructures(pllInstance *tr, partitionList *pr, pllBoolean perSiteScores)
-{
-  int 
-    i,
-    *informative = (int *)rax_malloc(sizeof(int) * (size_t)tr->originalCrunchedLength);
-
-  for (i = 0; i < pr->numberOfPartitions; ++ i)
-     rax_free (pr->partitionData[i]->parsVect);
-
-  rax_free (tr->parsimonyScore);
- 
-  determineUninformativeSites(tr, pr, informative);
-
-  compressDNA(tr, pr, informative, perSiteScores);
-
-  for(i = tr->mxtips + 1; i <= tr->mxtips + tr->mxtips - 1; i++)
-    {
-      nodeptr 
-        p = tr->nodep[i];
-
-      p->xPars             = 1;
-      p->next->xPars       = 0;
-      p->next->next->xPars = 0;
-    }
-
-  tr->ti = (int*)rax_malloc(sizeof(int) * 4 * (size_t)tr->mxtips);  
-
-  rax_free(informative); 
-}
-
-static void newviewParsimonyIterativeFast(pllInstance *tr, partitionList *pr, pllBoolean perSiteScores)
-{    
-  INT_TYPE
-    allOne = SET_ALL_BITS_ONE;
-
-  int 
-    model,
-    *ti = tr->ti,
-    count = ti[0],
-    index; 
-
-  for(index = 4; index < count; index += 4)
-    {      
-      unsigned int
-        totalScore = 0;
-
-      size_t
-        pNumber = (size_t)ti[index],
-        qNumber = (size_t)ti[index + 1],
-        rNumber = (size_t)ti[index + 2];
-      
-      for(model = 0; model < pr->numberOfPartitions; model++)
-        {
-          size_t
-            k,
-            states = pr->partitionData[model]->states,
-            width = pr->partitionData[model]->parsimonyLength;
-            
-          unsigned int  
-            i;      
-                 
-          switch(states)
-            {
-            case 2:       
-              {
-                parsimonyNumber
-                  *left[2],
-                  *right[2],
-                  *this[2];
-
-                for(k = 0; k < 2; k++)
-                  {
-                    left[k]  = &(pr->partitionData[model]->parsVect[(width * 2 * qNumber) + width * k]);
-                    right[k] = &(pr->partitionData[model]->parsVect[(width * 2 * rNumber) + width * k]);
-                    this[k]  = &(pr->partitionData[model]->parsVect[(width * 2 * pNumber) + width * k]);
-                  }
-
-                for(i = 0; i < width; i += INTS_PER_VECTOR)
-                  {               
-                    INT_TYPE
-                      s_r, s_l, v_N,
-                      l_A, l_C,
-                      v_A, v_C;          
-                    
-                    s_l = VECTOR_LOAD((CAST)(&left[0][i]));
-                    s_r = VECTOR_LOAD((CAST)(&right[0][i]));
-                    l_A = VECTOR_BIT_AND(s_l, s_r);
-                    v_A = VECTOR_BIT_OR(s_l, s_r);
-                    
-                    s_l = VECTOR_LOAD((CAST)(&left[1][i]));
-                    s_r = VECTOR_LOAD((CAST)(&right[1][i]));
-                    l_C = VECTOR_BIT_AND(s_l, s_r);
-                    v_C = VECTOR_BIT_OR(s_l, s_r);                                                                
-                    
-                    v_N = VECTOR_BIT_OR(l_A, l_C);
-                    
-                    VECTOR_STORE((CAST)(&this[0][i]), VECTOR_BIT_OR(l_A, VECTOR_AND_NOT(v_N, v_A)));
-                    VECTOR_STORE((CAST)(&this[1][i]), VECTOR_BIT_OR(l_C, VECTOR_AND_NOT(v_N, v_C)));                                                                    
-                    
-                    v_N = VECTOR_AND_NOT(v_N, allOne);
-                    
-                    totalScore += vectorPopcount(v_N);            
-                    if (perSiteScores)
-                       storePerSiteScores (pr, model, v_N, i);
-                  }
-              }
-              break;
-            case 4:
-              {
-                parsimonyNumber
-                  *left[4],
-                  *right[4],
-                  *this[4];
-
-                for(k = 0; k < 4; k++)
-                  {
-                    left[k]  = &(pr->partitionData[model]->parsVect[(width * 4 * qNumber) + width * k]);
-                    right[k] = &(pr->partitionData[model]->parsVect[(width * 4 * rNumber) + width * k]);
-                    this[k]  = &(pr->partitionData[model]->parsVect[(width * 4 * pNumber) + width * k]);
-                  }
-                for(i = 0; i < width; i += INTS_PER_VECTOR)
-                  {               
-                    INT_TYPE
-                      s_r, s_l, v_N,
-                      l_A, l_C, l_G, l_T,
-                      v_A, v_C, v_G, v_T;                
-                    
-                    s_l = VECTOR_LOAD((CAST)(&left[0][i]));
-                    s_r = VECTOR_LOAD((CAST)(&right[0][i]));
-                    l_A = VECTOR_BIT_AND(s_l, s_r);
-                    v_A = VECTOR_BIT_OR(s_l, s_r);
-                    
-                    s_l = VECTOR_LOAD((CAST)(&left[1][i]));
-                    s_r = VECTOR_LOAD((CAST)(&right[1][i]));
-                    l_C = VECTOR_BIT_AND(s_l, s_r);
-                    v_C = VECTOR_BIT_OR(s_l, s_r);
-                    
-                    s_l = VECTOR_LOAD((CAST)(&left[2][i]));
-                    s_r = VECTOR_LOAD((CAST)(&right[2][i]));
-                    l_G = VECTOR_BIT_AND(s_l, s_r);
-                    v_G = VECTOR_BIT_OR(s_l, s_r);
-                    
-                    s_l = VECTOR_LOAD((CAST)(&left[3][i]));
-                    s_r = VECTOR_LOAD((CAST)(&right[3][i]));
-                    l_T = VECTOR_BIT_AND(s_l, s_r);
-                    v_T = VECTOR_BIT_OR(s_l, s_r);
-                    
-                    v_N = VECTOR_BIT_OR(VECTOR_BIT_OR(l_A, l_C), VECTOR_BIT_OR(l_G, l_T));                                
-                    
-                    VECTOR_STORE((CAST)(&this[0][i]), VECTOR_BIT_OR(l_A, VECTOR_AND_NOT(v_N, v_A)));
-                    VECTOR_STORE((CAST)(&this[1][i]), VECTOR_BIT_OR(l_C, VECTOR_AND_NOT(v_N, v_C)));
-                    VECTOR_STORE((CAST)(&this[2][i]), VECTOR_BIT_OR(l_G, VECTOR_AND_NOT(v_N, v_G)));
-                    VECTOR_STORE((CAST)(&this[3][i]), VECTOR_BIT_OR(l_T, VECTOR_AND_NOT(v_N, v_T)));                                                    
-                    
-                    v_N = VECTOR_AND_NOT(v_N, allOne);
-                    
-                    totalScore += vectorPopcount(v_N);  
-                    
-                    if (perSiteScores)
-                       storePerSiteScores (pr, model, v_N, i);
-                  }
-              }
-              break;
-            case 20:
-              {
-                parsimonyNumber
-                  *left[20],
-                  *right[20],
-                  *this[20];
-
-                for(k = 0; k < 20; k++)
-                  {
-                    left[k]  = &(pr->partitionData[model]->parsVect[(width * 20 * qNumber) + width * k]);
-                    right[k] = &(pr->partitionData[model]->parsVect[(width * 20 * rNumber) + width * k]);
-                    this[k]  = &(pr->partitionData[model]->parsVect[(width * 20 * pNumber) + width * k]);
-                  }
-
-                for(i = 0; i < width; i += INTS_PER_VECTOR)
-                  {               
-                    size_t j;
-                    
-                    INT_TYPE
-                      s_r, s_l, 
-                      v_N = SET_ALL_BITS_ZERO,
-                      l_A[20], 
-                      v_A[20];           
-                    
-                    for(j = 0; j < 20; j++)
-                      {
-                        s_l = VECTOR_LOAD((CAST)(&left[j][i]));
-                        s_r = VECTOR_LOAD((CAST)(&right[j][i]));
-                        l_A[j] = VECTOR_BIT_AND(s_l, s_r);
-                        v_A[j] = VECTOR_BIT_OR(s_l, s_r);
-                        
-                        v_N = VECTOR_BIT_OR(v_N, l_A[j]);
-                      }
-                    
-                    for(j = 0; j < 20; j++)                 
-                      VECTOR_STORE((CAST)(&this[j][i]), VECTOR_BIT_OR(l_A[j], VECTOR_AND_NOT(v_N, v_A[j])));                                                                    
-                    
-                    v_N = VECTOR_AND_NOT(v_N, allOne);
-                    
-                    totalScore += vectorPopcount(v_N);
-
-                    if (perSiteScores)
-                       storePerSiteScores (pr, model, v_N, i);
-                  }
-              }
-              break;
-            default:
-              {
-                parsimonyNumber
-                  *left[32], 
-                  *right[32],
-                  *this[32];
-
-                assert(states <= 32);
-                
-                for(k = 0; k < states; k++)
-                  {
-                    left[k]  = &(pr->partitionData[model]->parsVect[(width * states * qNumber) + width * k]);
-                    right[k] = &(pr->partitionData[model]->parsVect[(width * states * rNumber) + width * k]);
-                    this[k]  = &(pr->partitionData[model]->parsVect[(width * states * pNumber) + width * k]);
-                  }
-
-                for(i = 0; i < width; i += INTS_PER_VECTOR)
-                  {               
-                    size_t j;
-                    
-                    INT_TYPE
-                      s_r, s_l, 
-                      v_N = SET_ALL_BITS_ZERO,
-                      l_A[32], 
-                      v_A[32];           
-                    
-                    for(j = 0; j < states; j++)
-                      {
-                        s_l = VECTOR_LOAD((CAST)(&left[j][i]));
-                        s_r = VECTOR_LOAD((CAST)(&right[j][i]));
-                        l_A[j] = VECTOR_BIT_AND(s_l, s_r);
-                        v_A[j] = VECTOR_BIT_OR(s_l, s_r);
-                        
-                        v_N = VECTOR_BIT_OR(v_N, l_A[j]);
-                      }
-                    
-                    for(j = 0; j < states; j++)             
-                      VECTOR_STORE((CAST)(&this[j][i]), VECTOR_BIT_OR(l_A[j], VECTOR_AND_NOT(v_N, v_A[j])));                                                                    
-                    
-                    v_N = VECTOR_AND_NOT(v_N, allOne);
-                    
-                    totalScore += vectorPopcount(v_N);
-
-                    if (perSiteScores)
-                       storePerSiteScores (pr, model, v_N, i);
-                  }                             
-              }
-            }            
-        }
-      tr->parsimonyScore[pNumber] = totalScore + tr->parsimonyScore[rNumber] + tr->parsimonyScore[qNumber];      
-    }
-}
-
-static unsigned int evaluateParsimonyIterativeFast(pllInstance *tr, partitionList *pr, pllBoolean perSiteScores)
-{
-  INT_TYPE 
-    allOne = SET_ALL_BITS_ONE;
-
-  size_t 
-    pNumber = (size_t)tr->ti[1],
-    qNumber = (size_t)tr->ti[2];
-
-  int
-    model;
-
-  unsigned int 
-    bestScore = tr->bestParsimony,    
-    sum;
-
-  if(tr->ti[0] > 4)
-    newviewParsimonyIterativeFast(tr, pr, perSiteScores);
-
-  sum = tr->parsimonyScore[pNumber] + tr->parsimonyScore[qNumber];
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-    {
-      size_t
-        k,
-        states = pr->partitionData[model]->states,
-        width  = pr->partitionData[model]->parsimonyLength,
-        i;
-
-       switch(states)
-         {
-         case 2:
-           {
-             parsimonyNumber
-               *left[2],
-               *right[2];
-             
-             for(k = 0; k < 2; k++)
-               {
-                 left[k]  = &(pr->partitionData[model]->parsVect[(width * 2 * qNumber) + width * k]);
-                 right[k] = &(pr->partitionData[model]->parsVect[(width * 2 * pNumber) + width * k]);
-               }     
-             
-             for(i = 0; i < width; i += INTS_PER_VECTOR)
-               {                                               
-                 INT_TYPE      
-                   l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[0][i])), VECTOR_LOAD((CAST)(&right[0][i]))),
-                   l_C = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[1][i])), VECTOR_LOAD((CAST)(&right[1][i]))),            
-                   v_N = VECTOR_BIT_OR(l_A, l_C);
-                 
-                 v_N = VECTOR_AND_NOT(v_N, allOne);
-                 
-                 sum += vectorPopcount(v_N);
-                  if (perSiteScores)
-                    storePerSiteScores (pr, model, v_N, i);
-               }
-           }
-           break;
-         case 4:
-           {
-             parsimonyNumber
-               *left[4],
-               *right[4];
-      
-             for(k = 0; k < 4; k++)
-               {
-                 left[k]  = &(pr->partitionData[model]->parsVect[(width * 4 * qNumber) + width * k]);
-                 right[k] = &(pr->partitionData[model]->parsVect[(width * 4 * pNumber) + width * k]);
-               }        
-
-             for(i = 0; i < width; i += INTS_PER_VECTOR)
-               {                                                
-                 INT_TYPE      
-                   l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[0][i])), VECTOR_LOAD((CAST)(&right[0][i]))),
-                   l_C = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[1][i])), VECTOR_LOAD((CAST)(&right[1][i]))),
-                   l_G = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[2][i])), VECTOR_LOAD((CAST)(&right[2][i]))),
-                   l_T = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[3][i])), VECTOR_LOAD((CAST)(&right[3][i]))),
-                   v_N = VECTOR_BIT_OR(VECTOR_BIT_OR(l_A, l_C), VECTOR_BIT_OR(l_G, l_T));     
-                 
-                 v_N = VECTOR_AND_NOT(v_N, allOne);
-                 
-                 sum += vectorPopcount(v_N);
-                  if (perSiteScores)
-                    storePerSiteScores (pr, model, v_N, i);
-               }                 
-           }
-           break;
-         case 20:
-           {
-             parsimonyNumber
-               *left[20],
-               *right[20];
-             
-              for(k = 0; k < 20; k++)
-                {
-                  left[k]  = &(pr->partitionData[model]->parsVect[(width * 20 * qNumber) + width * k]);
-                  right[k] = &(pr->partitionData[model]->parsVect[(width * 20 * pNumber) + width * k]);
-                }  
-           
-              for(i = 0; i < width; i += INTS_PER_VECTOR)
-                {                              
-                  int 
-                    j;
-                  
-                  INT_TYPE      
-                    l_A,
-                    v_N = SET_ALL_BITS_ZERO;     
-                  
-                  for(j = 0; j < 20; j++)
-                    {
-                      l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[j][i])), VECTOR_LOAD((CAST)(&right[j][i])));
-                      v_N = VECTOR_BIT_OR(l_A, v_N);
-                    }
-                  
-                  v_N = VECTOR_AND_NOT(v_N, allOne);
-                  
-                  sum += vectorPopcount(v_N);          
-                  if (perSiteScores)
-                    storePerSiteScores (pr, model, v_N, i);
-                }
-           }
-           break;
-         default:
-           {
-             parsimonyNumber
-               *left[32],  
-               *right[32]; 
-
-             assert(states <= 32);
-
-             for(k = 0; k < states; k++)
-               {
-                 left[k]  = &(pr->partitionData[model]->parsVect[(width * states * qNumber) + width * k]);
-                 right[k] = &(pr->partitionData[model]->parsVect[(width * states * pNumber) + width * k]);
-               }  
-           
-             for(i = 0; i < width; i += INTS_PER_VECTOR)
-               {                               
-                 size_t
-                   j;
-                 
-                 INT_TYPE      
-                   l_A,
-                   v_N = SET_ALL_BITS_ZERO;     
-                 
-                 for(j = 0; j < states; j++)
-                   {
-                     l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[j][i])), VECTOR_LOAD((CAST)(&right[j][i])));
-                     v_N = VECTOR_BIT_OR(l_A, v_N);
-                   }
-                 
-                 v_N = VECTOR_AND_NOT(v_N, allOne);
-                 
-                 sum += vectorPopcount(v_N);           
-                 if (perSiteScores)
-                   storePerSiteScores (pr, model, v_N, i);
-               }
-           }
-         }
-    }
-  
-  return sum;
-}
-
-unsigned int pllEvaluateParsimony(pllInstance *tr, partitionList *pr, nodeptr p, pllBoolean full, pllBoolean perSiteScores)
-{
-  volatile unsigned int result;
-  nodeptr q = p->back;
-  int
-    *ti = tr->ti,
-    counter = 4;
-  
-  ti[1] = p->number;
-  ti[2] = q->number;
-
-  if(full)
-    {
-      if(p->number > tr->mxtips)
-        computeTraversalInfoParsimony(p, ti, &counter, tr->mxtips, full);
-      if(q->number > tr->mxtips)
-        computeTraversalInfoParsimony(q, ti, &counter, tr->mxtips, full); 
-    }
-  else
-    {
-      if(p->number > tr->mxtips && !p->xPars)
-        computeTraversalInfoParsimony(p, ti, &counter, tr->mxtips, full);
-      if(q->number > tr->mxtips && !q->xPars)
-        computeTraversalInfoParsimony(q, ti, &counter, tr->mxtips, full); 
-    }
-
-  ti[0] = counter;
-
-  result = evaluateParsimonyIterativeFast(tr, pr, perSiteScores);
-
-  return result;
-}
diff --git a/pllrepo/src/pll.h b/pllrepo/src/pll.h
deleted file mode 100644
index 065ddc5..0000000
--- a/pllrepo/src/pll.h
+++ /dev/null
@@ -1,1692 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- *
- * ABSTRACT
- * 
- * PLL is a highly optimized, parallelized software library to ease the
- * development of new software tools dealing with phylogenetic inference. Among
- * the functions included in PLL are 
- *
- * DOCUMENTATION
- *
- * Extensive documentation for using PLL is available online at
- * 
- *                 http://www.libpll.org
- *
- *
- * USAGE
- *
- * To use PLL, 
- *
- * @file pll.h
- * @brief Data structures for tree and model 
- *
- * @author Tomas Flouri
- * @author Fernando Izquierdo-Carrasco
- * @author Andre Aberer
- * @author Alexandros Stamatakis
- */
-
-#ifndef __pll__
-#define __pll__
-
-#include <stdint.h>
-#include <stdio.h>
-#include <errno.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef __MIC_NATIVE
-#define PLL_BYTE_ALIGNMENT 64
-#define PLL_VECTOR_WIDTH 8
-#elif defined (__AVX)
-
-#include <xmmintrin.h>
-#include <immintrin.h>
-#include <pmmintrin.h>
-
-#define PLL_BYTE_ALIGNMENT 32
-#define PLL_VECTOR_WIDTH 4
-
-#elif defined (__SSE3)
-
-#include <xmmintrin.h>
-#include <pmmintrin.h>
-
-#define PLL_BYTE_ALIGNMENT 16
-#define PLL_VECTOR_WIDTH 2
-
-#else
-#define PLL_BYTE_ALIGNMENT 1
-#define PLL_VECTOR_WIDTH 1
-#endif
-
-#ifdef _MSC_VER
-	#define PLL_ALIGN_BEGIN __declspec(align(PLL_BYTE_ALIGNMENT))
-	#define PLL_ALIGN_END
-#else
-	#define PLL_ALIGN_BEGIN
-	#define PLL_ALIGN_END __attribute__((aligned(PLL_BYTE_ALIGNMENT)))
-#endif
-
-
-#include "stack.h"
-#include "newick.h"
-#include "queue.h"
-
-#define PLL_MAX_TIP_EV                          0.999999999 /* max tip vector value, sum of EVs needs to be smaller than 1.0, otherwise the numerics break down */
-#define PLL_MAX_LOCAL_SMOOTHING_ITERATIONS      32          /** @brief maximum iterations of smoothings per insert in the */
-#define PLL_ITERATIONS                          10          /* maximum iterations of iterations per insert */
-#define PLL_NEWZPERCYCLE                        10           /* iterations of makenewz per tree traversal */
-#define PLL_NMLNGTH                             256         /* number of characters in species name */
-#define PLL_DELTAZ                              0.00001     /* test of net branch length change in update */
-#define PLL_DEFAULTZ                            0.9         /* value of z assigned as starting point */
-#define PLL_UNLIKELY                            -1.0E300    /* low likelihood for initialization */
-#define PLL_SUMMARIZE_LENGTH                    -3
-#define PLL_SUMMARIZE_LH                        -2
-#define PLL_NO_BRANCHES                         -1
-#define PLL_MASK_LENGTH                         32
-#define PLL_ZMIN                                1.0E-15  /* max branch prop. to -log(PLL_ZMIN) (= 34) */
-#define PLL_ZMAX                                (1.0 - 1.0E-6) /* min branch prop. to 1.0-zmax (= 1.0E-6) */
-#define PLL_TWOTOTHE256                         115792089237316195423570985008687907853269984665640564039457584007913129639936.0  /*  2**256 (exactly)  */
-#define PLL_MINLIKELIHOOD                       (1.0/PLL_TWOTOTHE256)
-#define PLL_MINUSMINLIKELIHOOD                  -PLL_MINLIKELIHOOD
-
-
-#define PLL_FORMAT_PHYLIP                       1 
-#define PLL_FORMAT_FASTA                        2
-#define PLL_FORMAT_NEWICK                       3
-
-#define PLL_NNI_P_NEXT                          1       /**< Use p->next for the NNI move */
-#define PLL_NNI_P_NEXTNEXT                      2       /**< Use p->next->next for the NNI move */
-
-#define PLL_BADREAR                             -1
-
-#define PLL_NUM_BRANCHES                        1024
-
-#define PLL_TRUE                                1
-#define PLL_FALSE                               0
-
-#define PLL_REARRANGE_SPR                       0
-#define PLL_REARRANGE_TBR                       1
-#define PLL_REARRANGE_NNI                       2
-
-#define PLL_AA_SCALE                            10.0
-#define PLL_AA_SCALE_PLUS_EPSILON               10.001
-
-/* ALPHA_MIN is critical -> numerical instability, eg for 4 discrete rate cats                    */
-/* and alpha = 0.01 the lowest rate r_0 is                                                        */
-/* 0.00000000000000000000000000000000000000000000000000000000000034878079110511010487             */
-/* which leads to numerical problems Table for alpha settings below:                              */
-/*                                                                                                */
-/* 0.010000 0.00000000000000000000000000000000000000000000000000000000000034878079110511010487    */
-/* 0.010000 yielded nasty numerical bugs in at least one case !                                   */
-/* 0.020000 0.00000000000000000000000000000044136090435925743185910935350715027016962154188875    */
-/* 0.030000 0.00000000000000000000476844846859006690412039180149775802624789852441798419292220    */
-/* 0.040000 0.00000000000000049522423236954066431210260930029681736928018820007024736185030633    */
-/* 0.050000 0.00000000000050625351310359203371872643495343928538368616365517027588794007897377    */
-/* 0.060000 0.00000000005134625283884191118711474021861409372524676086868566926568746566772461    */
-/* 0.070000 0.00000000139080650074206434685544624965062437960128249869740102440118789672851562    */
-/* 0.080000 0.00000001650681201563587066858709818343436959153791576682124286890029907226562500    */
-/* 0.090000 0.00000011301977332931251259273962858978301859735893231118097901344299316406250000    */
-/* 0.100000 0.00000052651925834844387815526344648331402709118265192955732345581054687500000000    */
-
-#define PLL_ALPHA_MIN                           0.02
-#define PLL_ALPHA_MAX                           1000.0
-
-#define PLL_RATE_MIN                            0.0000001
-#define PLL_RATE_MAX                            1000000.0
-
-#define PLL_LG4X_RATE_MIN                       0.0000001
-#define PLL_LG4X_RATE_MAX                       1000.0
-
-#define PLL_FREQ_MIN                            0.001
-
-#define PLL_NUM_AA_STATES                       20
-#define PLL_NUM_DNA_STATES                      4
-
-/* 
-   previous values between 0.001 and 0.000001
-
-   TO AVOID NUMERICAL PROBLEMS WHEN FREQ == 0 IN PARTITIONED MODELS, ESPECIALLY WITH AA 
-   previous value of FREQ_MIN was: 0.000001, but this seemed to cause problems with some 
-   of the 7-state secondary structure models with some rather exotic small toy test datasets,
-   on the other hand 0.001 caused problems with some of the 16-state secondary structure models
-
-   For some reason the frequency settings seem to be repeatedly causing numerical problems
-*/
-
-#define PLL_ITMAX                               100    /* max number of iterations in brent's algorithm */
-
-#define PLL_SHFT(a,b,c,d)                       (a)=(b);(b)=(c);(c)=(d);
-#define PLL_SIGN(a,b)                           ((b) > 0.0 ? fabs(a) : -fabs(a))
-#define PLL_ABS(x)                              (((x)<0)   ?  (-(x)) : (x))
-#define PLL_MIN(x,y)                            (((x)<(y)) ?    (x)  : (y))
-#define PLL_MAX(x,y)                            (((x)>(y)) ?    (x)  : (y))
-#define PLL_SWAP(x,y)                           do{ __typeof__ (x) _t = x; x = y; y = _t; } while(0)
-#define PLL_SWAP_PTR(x,y) do{ char* _t = x; x = y; y = _t; } while(0)
-#define PLL_SWAP_INT(x,y) do{ int _t = x; x = y; y = _t; } while(0)
-
-#define PLL_POINT_GAMMA(prob,alpha,beta)        PointChi2(prob,2.0*(alpha))/(2.0*(beta))
-
-#define PLL_LIB_NAME                            "PLL"
-#define PLL_LIB_VERSION                         "1.0.1"
-#define PLL_LIB_DATE                            "November 3 2014"
-
-/* aminoacid substitution models */
-#define PLL_DAYHOFF                             0
-#define PLL_DCMUT                               1
-#define PLL_JTT                                 2
-#define PLL_MTREV                               3
-#define PLL_WAG                                 4
-#define PLL_RTREV                               5
-#define PLL_CPREV                               6
-#define PLL_VT                                  7
-#define PLL_BLOSUM62                            8
-#define PLL_MTMAM                               9
-#define PLL_LG                                  10
-#define PLL_MTART                               11
-#define PLL_MTZOA                               12
-#define PLL_PMB                                 13
-#define PLL_HIVB                                14
-#define PLL_HIVW                                15
-#define PLL_JTTDCMUT                            16
-#define PLL_FLU                                 17 
-#define PLL_AUTO                                18
-#define PLL_LG4M                                 19
-#define PLL_LG4X                                20
-#define PLL_GTR                                 21  /* GTR always needs to be the last one */
-#define PLL_NUM_PROT_MODELS                     22
-
-/* information criteria for auto protein model selection */
-#define PLL_AUTO_ML   0
-#define PLL_AUTO_BIC  1
-#define PLL_AUTO_AIC  2
-#define PLL_AUTO_AICC 3
-
-/* bipartition stuff */
-#define PLL_BIPARTITIONS_RF                     4
-
-/* scenarios for likelihood computation */
-#define PLL_TIP_TIP                             0
-#define PLL_TIP_INNER                           1
-#define PLL_INNER_INNER                         2
-
-
-/* available data types in PLL */
-#define PLL_MIN_MODEL                          -1
-#define PLL_BINARY_DATA                         0
-#define PLL_DNA_DATA                            1
-#define PLL_AA_DATA                             2
-#define PLL_SECONDARY_DATA                      3
-#define PLL_SECONDARY_DATA_6                    4
-#define PLL_SECONDARY_DATA_7                    5
-#define PLL_GENERIC_32                          6
-#define PLL_GENERIC_64                          7
-#define PLL_MAX_MODEL                           8
-
-#define PLL_SEC_6_A                             0
-#define PLL_SEC_6_B                             1
-#define PLL_SEC_6_C                             2
-#define PLL_SEC_6_D                             3
-#define PLL_SEC_6_E                             4
-
-#define PLL_SEC_7_A                             5
-#define PLL_SEC_7_B                             6
-#define PLL_SEC_7_C                             7
-#define PLL_SEC_7_D                             8
-#define PLL_SEC_7_E                             9
-#define PLL_SEC_7_F                             10
-
-#define PLL_SEC_16                              11
-#define PLL_SEC_16_A                            12
-#define PLL_SEC_16_B                            13
-#define PLL_SEC_16_C                            14
-#define PLL_SEC_16_D                            15
-#define PLL_SEC_16_E                            16
-#define PLL_SEC_16_F                            17
-#define PLL_SEC_16_I                            18
-#define PLL_SEC_16_J                            19
-#define PLL_SEC_16_K                            20
-
-#define PLL_ORDERED_MULTI_STATE                 0
-#define PLL_MK_MULTI_STATE                      1
-#define PLL_GTR_MULTI_STATE                     2
-
-
-/* available models of rate heterogeneity in PLL */
-#define PLL_CAT                                 0
-#define PLL_GAMMA                               1
-
-/* recomp */
-#define PLL_SLOT_UNUSED                        -2  /* value to mark an available vector */
-#define PLL_NODE_UNPINNED                      -3  /* marks an inner node as not available in RAM */
-#define PLL_INNER_NODE_INIT_STLEN              -1  /* initialization */
-
-#define PLL_MIN_RECOM_FRACTION     0.1 /* at least this % of inner nodes will be allocated in RAM */
-#define PLL_MAX_RECOM_FRACTION     1.0 /* always 1, just there for boundary checks */
-
-
-typedef  int pllBoolean;
-
-/* @brief PLL instance attribute structure */
-typedef struct
-{
-  int rateHetModel;
-  int fastScaling;
-  int saveMemory;
-  int useRecom;
-  long randomNumberSeed;
-  int numberOfThreads;
-} pllInstanceAttr;
-
-/** @brief Stores the recomputation-state of likelihood vectors  */
-typedef struct
-{
-  int numVectors;      /**< Number of inner vectors allocated in RAM*/
-  int *iVector;        /**< size: numVectors, stores node id || PLL_SLOT_UNUSED  */
-  int *iNode;          /**< size: inner nodes, stores slot id || PLL_NODE_UNPINNED */
-  int *stlen;          /**< Number of tips behind the current orientation of the indexed inner node (subtree size/cost) */ 
-  int *unpinnable;     /**< size:numVectors , TRUE if we dont need the vector */
-  int maxVectorsUsed;  
-  pllBoolean allSlotsBusy; /**< on if all slots contain an ancesctral node (the usual case after first full traversal) */ 
-} recompVectors;
-/* E recomp */
-
-/** @brief ???
- * @todo add explanation, is this ever used?  */
- 
-typedef unsigned int hashNumberType;
-
-
-
-/*typedef uint_fast32_t parsimonyNumber;*/
-
-#define PLL_PCF 32
-
-/** @brief ???Hash tables 
- * @todo add explanation of all hash tables  */
-typedef struct pllBipartitionEntry
-{
-  unsigned int *bitVector;
-  unsigned int *treeVector;
-  unsigned int amountTips;
-  int *supportVector;
-  unsigned int bipNumber;
-  unsigned int bipNumber2;
-  unsigned int supportFromTreeset[2]; 
-  struct pllBipartitionEntry *next;
-} pllBipartitionEntry;
-
-//typedef struct
-//{
-//  hashNumberType tableSize;
-//  entry **table;
-//  hashNumberType entryCount;
-//}
-//  hashtable;
-//struct stringEnt
-//{
-//  int nodeNumber;
-//  char *word;
-//  struct stringEnt *next;
-//};
-//
-//typedef struct stringEnt stringEntry;
-//typedef struct
-//{
-//  hashNumberType tableSize;
-//  stringEntry **table;
-//}
-//  stringHashtable;
-
-typedef struct pllHashItem
-{
-  void * data;
-  char * str;
-  struct pllHashItem * next;
-} pllHashItem;
-
-typedef struct pllHashTable
-{
-  unsigned int size;
-  struct pllHashItem ** Items;
-  unsigned int entries;
-} pllHashTable;
-
-
-
-
-/** @brief Per-site Rate category entry: likelihood per-site and CAT rate applied ???
-  *
-  */
-typedef struct ratec
-{
-  double accumulatedSiteLikelihood;
-  double rate;
-}rateCategorize;
-
-/** @brief Traversal descriptor entry.
-  * 
-  * Contains the information required to execute an operation in a step of the tree traversal.
-  * q   r
-  *  \ /
-  *   p
-  *
-  * The entry defines 2 input/parent nodes (q and r) and one output/child node (p)
-  * qz represents the branch length(s) of the branch connecting q and p
-  * rz represents the branch length(s) of the branch connecting r and p
-  * PLL_TIP_TIP     Both p and r are tips
-  * PLL_INNER_INNER Both p and r are inner nodes
-  * @note PLL_TIP_INNER   q is a tip and r is an inner node (by convention, flip q and r if required)
-  */
-typedef struct
-{
-  int tipCase;                  /**< Type of entry, must be PLL_TIP_TIP PLL_TIP_INNER or PLL_INNER_INNER */
-  int pNumber;                  /**< should exist in some nodeptr p->number */
-  int qNumber;                  /**< should exist in some nodeptr q->number */
-  int rNumber;                  /**< should exist in some nodeptr r->number */
-  double qz[PLL_NUM_BRANCHES];
-  double rz[PLL_NUM_BRANCHES];
-  /* recom */
-  int slot_p;                   /**< In recomputation mode, the RAM slot index for likelihood vector of node p, otherwise unused */
-  int slot_q;                   /**< In recomputation mode, the RAM slot index for likelihood vector of node q, otherwise unused */
-  int slot_r;                   /**< In recomputation mode, the RAM slot index for likelihood vector of node r, otherwise unused */
-  /* E recom */
-} traversalInfo;
-
-/** @brief Traversal descriptor.
-  * 
-  * Describes the state of a traversal descriptor
-  */
-typedef struct
-{
-  traversalInfo *ti;              /**< list of traversal steps */
-  int count;                      /**< number of traversal steps */
-  int functionType;
-  pllBoolean traversalHasChanged;   
-  pllBoolean *executeModel;           
-  double  *parameterValues;
-} traversalData;
-
-/** @brief Node record structure
-  * 
-  * Each inner node is a trifurcation in the tree represented as a circular list containing 3 node records. One node record uniquely identifies a subtree, and the orientation of the likelihood vector within a node
-  *
-  * p1 -------> p2 ----> to the next node
-  * ^           |
-  * |-----p3<---|          
-  * 
-  */
-struct noderec;
-
-/** @brief Branch length information.
-  * 
-  * @todo add relevant info on where this is used ???
-  */
-typedef struct
-{
-  unsigned int *vector; 
-  int support;   
-  struct noderec *oP;
-  struct noderec *oQ;
-} branchInfo;
-
-
-
-
-
-/** @brief Linkage of partitions.
-  * 
-  * @todo add relevant info on where this is used ???
-  */
-typedef struct
-{
-  pllBoolean valid;
-  int partitions;  
-  int *partitionList;
-}
-  linkageData;
-typedef struct
-{
-  int entries;
-  linkageData* ld;
-}
-  linkageList;
-
-
-
-  /** 
-   *
-   * the data structure below is fundamental for representing trees 
-     in the library!
-
-     Inner nodes are represented by three instances of the nodeptr data structure that is linked 
-     via a cyclic list using the next pointer.
-
-     So for building an inner node of the tree we need to allocate three nodeptr 
-     data structures and link them together, e.g.:
-
-     assuming that we have allocated space for an inner node 
-     for nodeptr pointers p1, p2, p3, 
-
-     we would then link them like this:
-
-     p1->next = p2;
-     p2->next = p3;
-     p3->next = p1;
-
-     also note that the node number that identifies the inner node 
-     needs to be set to the same value.
-
-     for n taxa, tip nodes are enumarated/indexed from 1....n,
-     and inner node inbdices start at n+1. Assuming that we have 10 taxa 
-     and this is our first inner node, we'd initialize the number as follows:
-
-     p1->number = 11;
-     p2->number = 11;
-     p3->number = 11;
-
-     Note that the node number is important for indexing tip sequence data as well as inner likelihood vectors 
-     and that it is this number (the index) that actually gets stored in the traversal descriptor.
-
-     Tip nodes are non-cyclic nodes that simply consist of one instance/allocation of nodeptr.
-
-     if we have allocated a tip data structure nodeptr t1, 
-     we would initialize it as follows:
-
-     t1->number = 1;
-
-     t1->next = NULL;
-
-     now let's assume that we want to build a four taxon tree with tips t1, t2, t3, t4 
-     and inner nodes (p1,p2,p3) and (q1,q2,q3).
-
-     we first build the tips:
-
-     t1->number = 1;
-     t1->next = NULL;
-     
-     t2->number = 2;
-     t2->next = NULL;
-
-     t3->number = 3;
-     t3->next = NULL;
-
-     t4->number = 4;
-     t4->next = NULL;
-     
-     now the first inner node
-
-     p1->next = p2;
-     p2->next = p3;
-     p3->next = p1;    
-
-     p1->number = 5;
-     p2->number = 5;
-     p3->number = 5;
-
-     and the second inner node.
-
-     q1->next = q2;
-     q2->next = q3;
-     q3->next = q1;    
-
-     q1->number = 6;
-     q2->number = 6;
-     q3->number = 6;
-     
-     now we need to link the nodes together such that they form a tree, let's assume we want ((t1,t2), (t3, t4));
-
-     we will have to link the nodes via the so-called back pointer,
-     i.e.:
-
-     let's connect node p with t1 and t2
-
-     t1->back = p1;
-     t2->back = p2;
-
-     and vice versa:
-
-     p1->back = t1;
-     p2->back = t2;
-
-     let's connect node p with node q:
-
-     p3->back = q3;
-
-     and vice versa:
-
-     q3->back = p3;
-
-     and now let's connect node q with tips t3 and t4:
-
-     q1->back = t3;
-     q2->back = t4;
-
-     and vice versa:
-
-     t3->back = q1;
-     t4->back = q2;
-
-     What remains to be done is to set up the branch lengths.
-     Using the data structure below, we always have to store the 
-     branch length twice for each "topological branch" unfortunately.
-
-     Assuming that we are only estimating a single branch across all partitions 
-     we'd just set the first index of the branch length array z[PLL_NUM_BRANCHES].
-
-     e.g., 
-
-     t3->z[0] = q1->z[0] = 0.9;
-
-     the above operation for connecting nodes is implemented in functions hookup() which will set 
-     the back pointers of two nodes that are to be connected as well as the branch lengths.
-
-     The branchInfo data field is a pointer to a data-structure that stores meta-data and requires 
-     the tree not to change while it is being used.
-     
-     Also, this pointer needs to be set by doing a full tree traversal on the tree.
-
-     Note that q1->bInf == t3->bInf in the above example.
-
-     The hash number is used for mapping bipartitions to a hash table as described in the following paper:
-
-     A. Aberer, N. Pattengale, A. Stamatakis: "Parallelized phylogenetic post-analysis on multi-core architectures". Journal of Computational Science 1, 107-114, 2010.
-     
-     The support data field stores the support value for the branch associated with each nodeptr structure.
-     Note that support always refers to branches. 
-
-     Thus for consistency, q3->support must be equal to p3->support;
-
-     Finally, the three char fields x, xPars and xBips are very very important!
-
-     They are used to denote the presence/absence or if you want, direction of the 
-     parsimony, bipartition, or likelihood vector at a node with respect to the virtual root.
-
-     Essentially, they are just used as single presence/absence bits and ONLY for inner nodes!
-
-     When setting up new inner nodes, one of the three pointers in the cyclic list must 
-     have x = 1 and the other two x = 0;
-
-     in the above example we could set:
-
-     p1->x = 0;
-     p2->x = 0;
-     p3->x = 1;
-
-     q1->x = 0;
-     q2->x = 0;
-     q3->x = 1;
-
-     This would mean that the virtual root is located at the inner branch of the four taxon tree ((t1,t2),(t3,t4));
-
-     When we re-root the tree at some other branch we need to update the location of the x pointer that is set to 1.
-
-     This means if we root the tree at the branch leading to t1 we would set 
-
-     p1->x = 1;
-     p2->x = 0;
-     p3->x = 0;
-
-     the values for q remaon unchanged since q3 is still pointing toward the root.
-
-     When we re-locate the root to branch p1 <-> t1 the fact that we have to "rotate" the x value that is set to 1
-     to another node of the cyclic list representing the abstract topological node p, also tells us that we 
-     need to re-compute the conditional likelihood array for p. 
-
-     Note that, only one likelihood or parsimony array is stored per inner node and the location of x essentially tells us which subtree 
-     it summarizes, if p1->x == 1, it summarizes subtree (t2, (t3, t4)), if p3->x = 1 the likelihood vector associated with 
-     node p summarizes subtree (t1, t2).
-
-     @todo I think we should rename the back pointer. It's not back, it can be forward depending on the orientation. We should renmae it to outer. Back is too confusing, I would assume it's the opposite of next, i.e. previous.
-
-     @struct noderec
-
-     @brief Tree node record
-
-     A node in a tree is a structure which contains a cyclic list of pointers to 3 nodes which we call a \e roundabout. The first node is the structure itself, and the other two nodes are accessed via \a noderec->next and \a noderec->next->next. To access the outer node with which each of the 3 nodes forms an edge one has to use the \a back pointer
-
-     @var noderec::next
-     @brief Next node in the roundabout
-
-     @var noderec::back
-     @brief Outer node
-
-     @var noderec::number
-     @brief Node identifier
-
-     In general, tips (i.e. leaves) are numbered from 1 to \e n where \e n is the number of taxa. Identifiers for internal nodes start from \e n + 1. Note
-     that for a given inner node, the identifier must be the same for all 3 nodes that compose it.
-
-     @var info::z
-     @brief The branch lengths per partition for the main node in the roundabout
-
-     @todo Append an image
-  */
-typedef  struct noderec
-{
- 
-  branchInfo      *bInf;
-  double           z[PLL_NUM_BRANCHES];
-  struct noderec  *next;        
-  struct noderec  *back;       
-  hashNumberType   hash;
-  int              support;
-  int              number;    
-  char             x;
-  char             xPars;
-  char             xBips;
-}
-  node, *nodeptr;
-
-typedef unsigned int parsimonyNumber;
-
-/* @brief Alignment, transition model, model of rate heterogenety and likelihood vectors for one partition.
-  * 
-  * @todo De-couple into smaller data structures
-  *
-  * ALIGNMENT DATA 
-  * This depends only on the type of data in this partition of the alignment 
-  *
-  * MODEL OF RATE HETEROGENETY, We use either GAMMA or PSR 
-  * Rate heterogenety: Per Site Categories (PSR) model aka CAT, 
-  * Rate of site i is given by perSiteRates[rateCategory[i]]
-  *
-  * TRANSITION MODEL: We always assume General Time Reversibility 
-  * Transistion probability matrix: P(t) = exp(Qt)
-  * Branch length t is the expected number of substitutions per site 
-  * Pij(t) is the probability of going from state i to state j in a branch of length t 
-  * Relative substitution rates (Entries in the Q matrix) 
-  * In GTR we can write Q = S * D, where S is a symmetrical matrix and D a diagonal with the state frequencies 
-
-    @var protModels
-    @brief Protein models
-
-    @detail Detailed protein models descriptiopn
-
-    @var autoProtModels
-    @brief Auto prot models
-    @detail Detailed auto prot models
-  */
- 
-
-
-/** @struct pInfo
-    
-    @brief Partition information structure
-
-    This data structure encapsulates all properties and auxiliary variables that together
-    consist a partition.
-
-    @var pInfo::dataType
-    @brief Type of data this partition contains
-
-    Can be DNA (\b PLL_DNA_DATA) or AminoAcid (\b PLL_AA_DATA) data
-
-    @var pInfo::states
-    @brief Number of states
-
-    Number of states this type of data can consist of
-
-    @var pInfo::maxTipStates
-    @brief Number of undetermined states (possible states at the tips)
-
-    This is the total number of possible states that can appear in the alignment. This includes degenerate (undetermined) bases
-
-    @var pInfo::partitionName
-    @brief Name of partition
-
-    A null-terminated string describing the name of partition
-
-    @var pInfo::lower
-    @brief Position of the first site in the alignment that is part of this partition [1, tr->originalCrunchedLength]
-
-    @var pInfo::upper
-    @brief Position of the last site that is part of this partition plus one (i.e. position of the first site that is not part of this partition) 
-
-    @var pInfo::width
-    @brief Number of sites in the partition (i.e. \a upper - \a lower)
-
-    @var pInfo::wgt
-    @brief Weight of site
-
-    Number of times this particular site appeared in the partition before the duplicates were removed and replaced by this weight
-
-    @var pInfo::empiricalFrequencies
-    @brief Empirical frequency of each state in the current partition
-
-    @var pInfo::perSiteRates
-    @brief Per Site Categories (PSR) or aka CAT values for each rate
-
-    @var pInfo::rateCategory
-    @brief CAT category index for each site
-
-    @var pInfo::numberOfCategories
-    @brief CAT size of the set of possible categories
-
-    @var pInfo::alpha
-    @brief Gamma parameter to be optimized
-    
-    @var pInfo::gammaRates
-    @brief Values of the 4 gamma categories (rates) computed given an alpha
-
-    @var pInfo::substRates
-    @brief Entries of substitution matrix, e.g. 6 free parameters in DNA
-
-    In GTR we can write \f$ Q = S * D \f$, where \f$ S \f$ is a symmetrical matrix and \f$ D \f$ a diagonal with the state frequencies,
-    which is represented by the array \a frequencies. The symmetrical matrix is the array \a substRates
-
-    @var pInfo::frequencies
-    @brief State frequencies, entries in D are initialized as empiricalFrequencies
-    
-    In GTR we can write \f$ Q = S * D \f$, where \f$ S \f$ is a symmetrical matrix and \f$ D \f$ a diagonal with the state frequencies,
-    which is represented by the array \a frequencies. The symmetrical matrix is the array \a substRates
-
-    @var pInfo::freqExponents
-
-    @var pInfo::EIGN
-    @brief Eigenvalues of Q matrix
-
-    @var pInfo::EV
-    @brief Eigenvectors of Q matrix
-
-    @var pInfo::EI
-    @brief Inverse eigenvectors of Q matrix
-
-    @var pInfo::left
-    @brief P matrix for the left term of the conditional likelihood equation
-
-    @var pInfo::right
-    @brief P matrix for the right term of the conditional likelihood equation
-
-    @var pInfo::tipVector
-    @brief Precomputed (based on current P matrix) conditional likelihood vectors for every possible base 
-
-    @var pInfo::EIGN_LG4
-    @brief Eigenvalues of Q matrix for the LG4 model
-
-    @var pInfo::EV_LG4
-    @brief Eigenvectors of Q matrix for the LG4 model
-
-    @var pInfo::EI_LG4
-    @brief Inverse eigenvectors of Q matrix for the LG4 model
-    
-    @var pInfo::frequencies_LG4
-    @brief State frequencies for the LG4 model
-
-    @var pInfo::tipVector_LG4
-    @brief Precomputed (based on current P matrix) conditional likelihood vectors for every possible base for the LG4 model
-
-    @var pInfo::substRates_LG4
-    @brief Entries of substitution matrix for the LG4 model
-
-    @var pInfo::protModels
-    @brief Protein model for current partition
-
-    In case \a pInfo::dataType is set to \a PLL_AA_DATA then \a protModels indicates the index in the global array \a protModels
-    of the protein model that the current partition uses.
-
-    @var pInfo::autoProtModels
-    @brief Best fitted protein model for the \b PLL_AUTO partitions
-
-    If \a protModels is set to \b PLL_AUTO then \a autoProtModels holds the currently detected best fitting protein model for the partition
-
-    @var pInfo::protUseEmpiricalFreqs
-
-    @var pInfo::nonGTR
-
-    @var pInfo::optimizeBaseFrequencies
-
-    @var pInfo::optimizeAlphaParameter
-
-    @var pInfo::optimizeSubstitutionRates
-
-    @var pInfo::symmetryVector
-
-    @var pInfo::frequencyGrouping
-
-
-    @todo
-      Document freqExponents
-
-*/
-
-
-
-typedef struct {
-  int     dataType;
-  int     states;
-  int     maxTipStates;
-  char   *partitionName;
-  int     lower;
-  int     upper;
-  int     width;
-  int    *wgt;
-  double *empiricalFrequencies; 
-
-
-  /* MODEL OF RATE HETEROGENETY, We use either GAMMA or PSR */
-  /* Rate heterogenety: Per Site Categories (PSR) model aka CAT, see updatePerSiteRates() */
-  /* Rate of site i is given by perSiteRates[rateCategory[i]] */
-  double *perSiteRates;
-  int    *rateCategory;
-  int     numberOfCategories;
-  /* Rate heterogenety: GAMMA model of rate heterogenety */
-  double alpha;
-  double *gammaRates;
-
-
-  /* TRANSITION MODEL: We always assume General Time Reversibility */
-  /* Transistion probability matrix: P(t) = exp(Qt)*/
-  /* Branch length t is the expected number of substitutions per site */
-  /* Pij(t) is the probability of going from state i to state j in a branch of length t */
-  /* Relative substitution rates (Entries in the Q matrix) */
-  /* In GTR we can write Q = S * D, where S is a symmetrical matrix and D a diagonal with the state frequencies */
-  double *substRates;       /**< TRANSITION MODEL Entries in S, e.g. 6 free parameters in DNA */   
-  double *frequencies;      /**< State frequencies, entries in D, are initialized as empiricalFrequencies */
-  double *freqExponents;
-  /* Matrix decomposition: @todo map this syntax to Explanation of the mathematical background */
-  double *EIGN;
-  double *EV;
-  double *EI;
-  double *left;
-  double *right;
-  double *tipVector;
-
-
-  /* asc bias */
-  pllBoolean       ascBias;
-  int           ascOffset;
-  int         * ascExpVector;
-  double      * ascSumBuffer;
-  double      * ascVector;
-  double        ascScaler[64];
-  
-  /* LG4 */
-
-  double *EIGN_LG4[4];
-  double *EV_LG4[4];
-  double *EI_LG4[4];
-
-  double *frequencies_LG4[4];
-  double *tipVector_LG4[4];
-  double *substRates_LG4[4];
-  
-  /* LG4X */
-
-  double lg4x_weights[4];
-  double lg4x_weightExponents[4];
-  double lg4x_weightsBuffer[4];
-  double lg4x_weightExponentsBuffer[4];
-  double lg4x_weightLikelihood;
-  
-  /* Protein specific */
-  int     protModels;			/**< Empirical model matrix */
-  int     autoProtModels;		/**< Model selected with "auto" protein model */
-  int     protUseEmpiricalFreqs;	/**< Whether to use empirical frequencies for protein model */
-
-  pllBoolean nonGTR;
-  pllBoolean optimizeBaseFrequencies;	/**< Whether to optimize base frequencies */
-  pllBoolean optimizeAlphaParameter;	/**< Whether to optimize alpha parameters and gamma rates */
-  pllBoolean optimizeSubstitutionRates;	/**< Whether to optimize substitution rates */
-  int    *symmetryVector;		/**< Specify linkage between substitution rate parameters */
-  int    *frequencyGrouping;
-
-  /* LIKELIHOOD VECTORS */
-
-  /* partial LH Inner vectors  ancestral vectors, we have 2*tips - 3 inner nodes */
-  double          **xVector;          /**< Conditional likelihood vectors for inner nodes */
-  unsigned char   **yVector;          /**< Tip entries (sequence) for tip nodes */
-  unsigned int     *globalScaler;     /**< Counters for scaling operations done at node i */
-
-  /* data structures for conducting per-site likelihood scaling.
-     this allows to compute the per-site log likelihood scores 
-     needed for RELL-based bootstrapping and all sorts of statistical 
-     tests for comparing trees ! */
-  int              **expVector;     /**< @brief An entry per inner node. Each element is an array of size the number of sites in the current partition and represents how many times the respective site has been scaled in the subtree rooted at the current node */
-  size_t           *expSpaceVector; /**< @brief Each entry represents an inner node and states the size of the corresponding element in \a expVector, which is the number of sites for the current partition */
-
-  /* These are for the saveMemory option (tracking gaps to skip computations and memory) */
-  size_t           *xSpaceVector;       /* Size of conditional likelihood vectors per inner node */
-  int               gapVectorLength;    /** Length of \a gapVector bitvector in unsigned integers assuming that \a unsigned \a int is 32bits. It is set to partition size / 32 */
-  unsigned int     *gapVector;          /** A bit vector of size \a gapVectorLength * 32 bits. A bit is set to 1 if the corresponding */
-  double           *gapColumn; 
-
-  /* Parsimony vectors at each node */
-  size_t parsimonyLength;
-  parsimonyNumber *parsVect; 
-  parsimonyNumber *perSiteParsScores;
-
-  /* This buffer of size width is used to store intermediate values for the branch length optimization under 
-     newton-raphson. The data in here can be re-used for all iterations irrespective of the branch length.
-   */
-  double *sumBuffer; 
-
-  /* Buffer to store the per-site log likelihoods */
-  double *perSiteLikelihoods;
-
-  /* This buffer of size width is used to store the ancestral state at a node of the tree. */
-  double *ancestralBuffer;
-
-  /* From tree */
-  pllBoolean executeModel;
-  double fracchange;
-  double rawFracchange;
-  double partitionContribution;
-  double partitionWeight;
-  double partitionLH;
-
-// #if (defined(_USE_PTHREADS) || defined(_FINE_GRAIN_MPI))
-  int partitionAssignment;
-// #endif
-
-} pInfo;
-
-typedef struct
- {
-   pInfo **partitionData;
-   int numberOfPartitions;
-   pllBoolean perGeneBranchLengths;
-   pllBoolean dirty;
-   linkageList *alphaList;
-   linkageList *rateList;
-   linkageList *freqList;
- }  partitionList;
-
-
-
-#define PLL_REARR_SETTING 1
-#define PLL_FAST_SPRS     2
-#define PLL_SLOW_SPRS     3
-
-
-/** @brief Checkpointing states. 
- * 
- * @todo Raxml specific 
-  */
-typedef struct {
- 
-  int state;
-
-  /*unsigned int vLength;*/
-  double accumulatedTime;  
-  int rearrangementsMax;
-  int rearrangementsMin;
-  int thoroughIterations;
-  int fastIterations;
-  int mintrav;
-  int maxtrav;
-  int bestTrav;
-  double startLH; 
-  double lh;
-  double previousLh;
-  double difference;
-  double epsilon;  
-  pllBoolean impr;
-  pllBoolean cutoff;  
-       
-  double tr_startLH;
-  double tr_endLH;
-  double tr_likelihood;
-  double tr_bestOfNode;  
-  double tr_lhCutoff;
-  double tr_lhAVG;
-  double tr_lhDEC;
-  int    tr_NumberOfCategories;
-  int    tr_itCount;  
-  int    tr_doCutoff;
-  int    tr_thoroughInsertion;
-  int    tr_optimizeRateCategoryInvocations;
- 
-  /* prevent users from doing stupid things */
- 
-  int searchConvergenceCriterion;
-  int rateHetModel;
-  int maxCategories;
-  int NumberOfModels;
-  int numBranches;
-  int originalCrunchedLength;    
-  int mxtips;
-  char seq_file[1024];
-} checkPointState;
-
-
-
-/* recomp */
-#ifdef _DEBUG_RECOMPUTATION
-typedef struct {
-  unsigned long int numTraversals;
-  unsigned long int tt;
-  unsigned long int ti;
-  unsigned long int ii;
-  unsigned int *travlenFreq;
-} traversalCounter;
-#endif
-/* E recomp */
-
-
-/** @brief Tree topology.
- * 
- * @todo Apart from the topology this structure contains several fields that act like global variables in raxml
-  */
-typedef  struct  {
-
-  int *ti;
-
-  /* recomp */
-  recompVectors *rvec;            /**< this data structure tracks which vectors store which nodes */
-  float maxMegabytesMemory;       /**< User says how many MB in main memory should be used */
-  float vectorRecomFraction;      /**< vectorRecomFraction ~= 0.8 * maxMegabytesMemory  */
-  pllBoolean useRecom;               /**< ON if we apply recomputation of ancestral vectors*/
-#ifdef _DEBUG_RECOMPUTATION 
-  traversalCounter *travCounter;
-  double stlenTime;
-#endif
-  /* E recomp */
-  
-  pllBoolean fastScaling;
-  pllBoolean saveMemory;
-  int              startingTree;
-  long             randomNumberSeed;
-
-  double          *lhs;         /**< Array to store per-site log likelihoods of \a originalCrunchedLength (compressed) sites */
-  double          *patrat;      /**< rates per pattern */
-  double          *patratStored; 
-  int             *rateCategory;
-  int             *aliaswgt;    /**< weight by pattern */ 
-  pllBoolean    manyPartitions;
-
-  pllBoolean grouped;              /**< No idea what this is, but is always set to PLL_FALSE */
-  pllBoolean constrained;          /**< No idea what this is, but is always set to PLL_FALSE */
-  int threadID;
-  volatile int numberOfThreads;
-
-//#if (defined(_USE_PTHREADS) || defined(_FINE_GRAIN_MPI))
- 
-  unsigned char *y_ptr; 
-  
-  double lower_spacing;
-  double upper_spacing; 
-
-  double *ancestralVector;
-
-//#endif
-  
-  pllHashTable     *nameHash;
-  char           ** tipNames;
-
-  char             *secondaryStructureInput;
-
-  traversalData    td[1];
-
-  int              maxCategories;
-  int              categories;
-
-  double           coreLZ[PLL_NUM_BRANCHES];
-  
- 
-  branchInfo       *bInf;
-
-  int              multiStateModel;
-
-
-  pllBoolean curvatOK[PLL_NUM_BRANCHES];
-
-  /* the stuff below is shared among DNA and AA, span does
-     not change depending on datatype */
-
-  /* model stuff end */
-  unsigned char    **yVector;        /**< list of raw sequences (parsed from the alignment)*/
-
-  int              secondaryStructureModel;
-  int              originalCrunchedLength; /**< Length of alignment after removing duplicate sites in each partition */
-
-  int              *secondaryStructurePairs;
-
-  double            fracchange;      /**< Average substitution rate */
-  double            rawFracchange;
-  double            lhCutoff;
-  double            lhAVG;
-  unsigned long     lhDEC;
-  unsigned long     itCount;
-  int               numberOfInvariableColumns;
-  int               weightOfInvariableColumns;
-  int               rateHetModel;
-
-  double           startLH;
-  double           endLH;
-  double           likelihood;           /**< last likelihood value evaluated for the current topology */
- 
-  node           **nodep;                /**< pointer to the list of nodes, which describe the current topology */
-  nodeptr          nodeBaseAddress;
-  node            *start;                /**< starting node by default for full traversals (must be a tip contained in the tree we are operating on) */
-  int              mxtips;  /**< Number of tips in the topology */
-
-  int              *constraintVector;   /**< @todo What is this? */
-  int              numberOfSecondaryColumns;
-  pllBoolean          searchConvergenceCriterion;
-  int              ntips;
-  int              nextnode;  
-
-  pllBoolean          bigCutoff;
-  pllBoolean          partitionSmoothed[PLL_NUM_BRANCHES];
-  pllBoolean          partitionConverged[PLL_NUM_BRANCHES];
-  pllBoolean          rooted;
-  pllBoolean          doCutoff;
- 
-  double         gapyness;
-
-  char **nameList;     /**< list of tips names (read from the phylip file) */
-  char *tree_string;   /**< the newick representaion of the topology */
-  char *tree0;
-  char *tree1;
-  int treeStringLength;
- 
-  unsigned int bestParsimony;
-  unsigned int *parsimonyScore;
-  
-  double bestOfNode;
-  nodeptr removeNode;   /**< the node that has been removed. Together with \a insertNode represents an SPR move */
-  nodeptr insertNode;   /**< the node where insertion should take place . Together with \a removeNode represents an SPR move*/
-
-  double zqr[PLL_NUM_BRANCHES];
-  double currentZQR[PLL_NUM_BRANCHES];
-
-  double currentLZR[PLL_NUM_BRANCHES];
-  double currentLZQ[PLL_NUM_BRANCHES];
-  double currentLZS[PLL_NUM_BRANCHES];
-  double currentLZI[PLL_NUM_BRANCHES];
-  double lzs[PLL_NUM_BRANCHES];
-  double lzq[PLL_NUM_BRANCHES];
-  double lzr[PLL_NUM_BRANCHES];
-  double lzi[PLL_NUM_BRANCHES];
-
-
-  unsigned int **bitVectors;
-
-  unsigned int vLength;
-
-  pllHashTable *h;                 /**< hashtable for ML convergence criterion */
-  //hashtable *h;
- 
-  int optimizeRateCategoryInvocations;
-
-  checkPointState ckp;
-  pllBoolean thoroughInsertion; /**< true if the neighbor branches should be optimized when a subtree is inserted (slower)*/
-  pllBoolean useMedian;
-
-  int autoProteinSelectionType;
-
-  pllStack * rearrangeHistory;
-
-
-  /* analdef defines */
-  /* TODO: Do some initialization */
-  int              bestTrav;            /**< best rearrangement radius */
-  int              max_rearrange;       /**< max. rearrangemenent radius */
-  int              stepwidth;           /**< step in rearrangement radius */
-  int              initial;             /**< user defined rearrangement radius which also sets bestTrav if initialSet is set */
-  pllBoolean          initialSet;          /**< set bestTrav according to initial */
-  int              mode;                /**< candidate for removal */
-  pllBoolean        perGeneBranchLengths;
-  pllBoolean        permuteTreeoptimize;   /**< randomly select subtrees for SPR moves */
-  pllBoolean        compressPatterns;
-  double         likelihoodEpsilon;
-  pllBoolean        useCheckpoint;
-
-} pllInstance;
-
-/** @brief Stores data related to a NNI move  */
-typedef struct {
-        pllInstance * tr;
-        nodeptr p;
-        int nniType;
-        double z[PLL_NUM_BRANCHES]; // optimize branch lengths
-        double z0[PLL_NUM_BRANCHES]; // unoptimized branch lengths
-        double likelihood;
-        double deltaLH;
-} nniMove;
-
-/***************************************************************/
-
-typedef struct {
-  int partitionNumber;
-  int partitionLength;
-} partitionType;
-
-typedef struct
-{
-  double z[PLL_NUM_BRANCHES];
-  nodeptr p, q;
-  int cp, cq;
-}
-  connectRELL, *connptrRELL;
-
-typedef  struct
-{
-  connectRELL     *connect; 
-  int             start;
-  double          likelihood;
-}
-  topolRELL;
-
-
-typedef  struct
-{
-  int max;
-  topolRELL **t;
-}
-  topolRELL_LIST;
-
-/**************************************************************/
-
-/** @brief Connection within a topology.
-*   */
-typedef struct conntyp {
-    double           z[PLL_NUM_BRANCHES];           /**< branch length */
-    node            *p, *q;       /**< parent and child sectors */
-    void            *valptr;      /**< pointer to value of subtree */
-    int              descend;     /**< pointer to first connect of child */
-    int              sibling;     /**< next connect from same parent */
-    } pllConnect, *connptr;
-
-/** @brief Single Topology
-*   */
-typedef  struct {
-    double           likelihood;
-    int              initialTreeNumber;
-    pllConnect         *links;       /**< pointer to first connect (start) */
-    node            *start;
-    int              nextlink;    /**< index of next available connect */
-                                  /**< tr->start = tpl->links->p */
-    int              ntips;
-    int              nextnode;    /**< next available inner node for tree parsing */
-    int              scrNum;      /**< position in sorted list of scores */
-    int              tplNum;      /**< position in sorted list of trees */
-    } topol;
-
-/** @brief small helper data structure for printing out/downstream use of marginal ancestral probability vectors.
-*
-* it is allocated as an array that has the same length as the input alignment and can be used to 
-*   index the ancestral states for each position/site/pattern 
-*   */
-typedef struct {
-  double *probs; /**< marginal ancestral states */
-  char c; /**< most likely stated, i.e. max(probs[i]) above */
-  int states; /**< number of states for this position */
-} ancestralState;
-
-/** @brief List of topologies
-*
-*   */
-typedef struct {
-    double           best;        /**< highest score saved */
-    double           worst;       /**< lowest score saved */
-    topol           *start;       /**< starting tree for optimization */
-    topol          **byScore;
-    topol          **byTopol;
-    int              nkeep;       /**< maximum topologies to save */
-    int              nvalid;      /**< number of topologies saved */
-    int              ninit;       /**< number of topologies initialized */
-    int              numtrees;    /**< number of alternatives tested */
-    pllBoolean          improved;
-    } bestlist;
-
-/** @brief  This is used to look up some hard-coded data for each data type 
-*   */
-typedef struct 
-{
-  int leftLength;         /**< s^2 */
-  int rightLength;/**< s^2 */
-  int eignLength;/**<  s */
-  int evLength;
-  int eiLength;
-  int substRatesLength;   /**< (s^2 - s)/2 free model parameters for matrix Q i.e. substitution rates */
-  int frequenciesLength;  /**< s frequency of each state */ 
-  int tipVectorLength;    /* ??? */
-  int symmetryVectorLength;
-  int frequencyGroupingLength;
-
-  pllBoolean nonGTR;
-  pllBoolean optimizeBaseFrequencies;
-
-  int undetermined;
-
-  const char *inverseMeaning;
-
-  int states;   /* s */
-
-  pllBoolean smoothFrequencies;
-
-  const unsigned  int *bitVector;
-
-} partitionLengths;
-
-typedef struct
-{
-  int rearrangeType;
-  double  likelihood;
-
-  union {
-    struct {
-      double * zp;
-      double * zpn;
-      double * zpnn;
-      double * zqr;
-      nodeptr pn;
-      nodeptr pnn;
-      nodeptr r;
-      nodeptr p;
-      nodeptr q;
-    } SPR;
-    struct {
-      nodeptr origin;
-      int swapType;
-      double z[PLL_NUM_BRANCHES];
-    } NNI;
-  };
-} pllRollbackInfo;
-
-
-/** @struct pllRearrangeAttr
- 
-    @brief Structure holding attributes for searching possible tree rearrangements
-    
-    Holds the attributes for performing tree rearrangements.
-
-    @var pllRearrangeAttr
-      The origin node where the search should start
-
-    @var pllRearrangeAttr:mintrav
-      The minimum radius around the origin node \a p for which nodes should be tested
-
-    @var pllRearrangeAttr:maxtrav
-      The maximum radius around the origin node \a p for which nodes should be tested
-
-    @var pllRearrangeAttr:max
-      Maximum number of results to be returned
-*/
-typedef struct
- {
-   nodeptr p;
-   int mintrav;
-   int maxtrav;
- } pllRearrangeAttr;
-
-/** @typedef pllRearrangeInfo
-    
-    @brief Tree rearrangement information structure
-
-    Holds information for conducting tree arrangements. This structure
-    is the result of a tree arrangement search under given search
-    attributes.
-
-    @var pllRearrangeInfo::rearrangeType
-      Type of rearrangement. Can be \b PLL_REARRANGE_SPR, \b PLL_REARRANGE_NNI or
-      \b PLL_REARRANGE_TBR
-    
-    @var pllRearrangeInfo::likelihood
-      Holds the computed likelihood for the addressed rearrangement
-
-    @var pllRearrangeInfo::SPR::removeNode
-      Node where to perform subtree pruning
-
-    @var pllRearrangeInfo::SPR::insertNode
-      Node where to place the pruned subtree
-
-    @var pllRearrangeInfo::zqr
-      Holds the computed branch lengths after the SPR
-*/
-typedef struct
- {
-   int rearrangeType;
-   double  likelihood;
-   union {
-     struct {
-       nodeptr removeNode;
-       nodeptr insertNode;
-       double  zqr[PLL_NUM_BRANCHES];
-     } SPR;
-     struct {
-       nodeptr originNode;
-       int     swapType;
-     } NNI;
-   };
- } pllRearrangeInfo;
-
-
-typedef struct
- {
-   int max_entries;
-   int entries;
-   pllRearrangeInfo * rearr;
- } pllRearrangeList;
-
-/** @brief Generic structure for storing a multiple sequence alignment */
-typedef struct
- {
-   int              sequenceCount;      /**< @brief Number of sequences */
-   int              sequenceLength;     /**< @brief Length of sequences */
-   int              originalSeqLength;  /**< @brief Original length of sequences (not modified after removing duplicates) */
-   char          ** sequenceLabels;     /**< @brief An array of where the \a i-th element is the name of the \a i-th sequence */
-   unsigned char ** sequenceData;       /**< @brief The actual sequence data */
-   int            * siteWeights;        /**< @brief An array where the \a i-th element indicates how many times site \a i appeared (prior to duplicates removal) in the alignment */
- } pllAlignmentData;
-
-
-/******************** START OF API FUNCTION DESCRIPTIONS ********************/
-
-#if (defined(_USE_PTHREADS) || defined(_FINE_GRAIN_MPI))
-pllBoolean isThisMyPartition(partitionList *pr, int tid, int model);
-void printParallelTimePerRegion(void); 
-#endif
-
-#ifdef _FINE_GRAIN_MPI
-extern void pllFinalizeMPI (void);
-#endif
-
-
-
-/**
- * @brief Create the main instance of PLL
- *   
- * Create an instance of the phylogenetic likelihood library
- *
- * @param rateHetModel   Rate heterogeneity model
- * @param fastScaling    TODO: explain fastScaling here
- * @param saveMemory     TODO: explain saveMemory here
- * @param useRecom       If set to \b PLL_TRUE, enables ancestral state recomputation
- * 
- * @todo                 Document fastScaling, rate heterogeneity and saveMemory and useRecom
- *
- * @note                 Do not set \a saveMemory to when using \a useRecom as memory saving 
- *                       techniques are not yet implemented for ancestral state recomputation. 
- * 
- * @return               On success returns an instance to PLL, otherwise \b NULL
- */
-extern pllInstance * pllCreateInstance (pllInstanceAttr * pInst);
-
-/** 
- *  @ingroup instanceLinkingGroup
- *  @brief Load alignment to the PLL instance
- *   
- *   Loads (copies) the parsed alignment \a alignmentData to the PLL instance
- *   as a deep copy.
- * 
- *    @param tr              The library instance
- *    @param alignmentData   The multiple sequence alignment
- *    @param partitions      List of partitions
- *
- *    @return Returns 1 in case of success, 0 otherwise.
- */
-extern int pllLoadAlignment (pllInstance * tr, 
-                             pllAlignmentData * alignmentData, 
-                             partitionList * pList);
-
-/**
- * @brief Compute the empirical base frequencies for all partitions
- *
- * Compute the empirical base frequencies for all partitions in the list \a pl.
- *
- * @param pl                Partition list
- * @param alignmentData     Multiple sequence alignment
- *
- * @return   A list of \a pl->numberOfPartitions arrays each of size
-             \a pl->partitionData[i]->states, where \a i is the \a i-th partition
-*/
-extern double ** pllBaseFrequenciesAlignment (pllAlignmentData * alignmentData, partitionList * pl);
-extern double ** pllBaseFrequenciesInstance (pllInstance * tr, partitionList * pl);
-
-/* pthreads and MPI */
-extern void pllStartPthreads (pllInstance *tr, partitionList *pr);
-extern void pllStopPthreads (pllInstance * tr);
-extern void pllLockMPI (pllInstance * tr);
-extern void pllInitMPI(int * argc, char **argv[]);
-
-
-/* handling branch lengths*/
-extern double pllGetBranchLength (pllInstance *tr, nodeptr p, int partition_id);
-extern void pllSetBranchLength (pllInstance *tr, nodeptr p, int partition_id, double bl);
-extern int pllNniSearch(pllInstance * tr, partitionList *pr, int estimateModel);
-extern void pllOptimizeBranchLengths ( pllInstance *tr, partitionList *pr, int maxSmoothIterations );
-
-
-extern void pllEvaluateLikelihood (pllInstance *tr, partitionList *pr, nodeptr p, pllBoolean fullTraversal, pllBoolean getPerSiteLikelihoods);
-extern void pllUpdatePartials (pllInstance *tr, partitionList *pr, nodeptr p, pllBoolean masked);
-extern void pllUpdatePartialsAncestral(pllInstance *tr, partitionList *pr, nodeptr p);
-extern void pllNewviewIterative(pllInstance *tr, partitionList *pr, int startIndex);
-extern void pllEvaluateIterative(pllInstance *tr, partitionList *pr, pllBoolean getPerSiteLikelihoods);
-
-/* newick parser declarations */
-extern pllNewickTree * pllNewickParseString (const char * newick);
-extern pllNewickTree * pllNewickParseFile (const char * filename);
-extern int pllValidateNewick (pllNewickTree *);
-extern void pllNewickParseDestroy (pllNewickTree **);
-extern int pllNewickUnroot (pllNewickTree * t);
-extern char * pllTreeToNewick ( char *treestr, pllInstance *tr, partitionList *pr, nodeptr p,
-      pllBoolean printBranchLengths, pllBoolean printNames, pllBoolean printLikelihood,
-      pllBoolean rellTree, pllBoolean finalPrint, int perGene,
-      pllBoolean branchLabelSupport, pllBoolean printSHSupport);
-
-/* partition parser declarations */
-extern void  pllQueuePartitionsDestroy (pllQueue ** partitions);
-extern pllQueue * pllPartitionParse (const char * filename);
-extern pllQueue * pllPartitionParseString (const char * p);
-extern void pllPartitionDump (pllQueue * partitions);
-void pllBaseSubstitute (pllInstance * tr, partitionList * partitions);
-//void pllBaseSubstitute (pllAlignmentData * tr, partitionList * partitions);
-partitionList * pllPartitionsCommit (pllQueue * parts, pllAlignmentData * alignmentData);
-int pllPartitionsValidate (pllQueue * parts, pllAlignmentData * alignmentData);
-extern void pllAlignmentRemoveDups (pllAlignmentData * alignmentData, partitionList * pl);
-void pllPartitionsDestroy (pllInstance *, partitionList **);
-
-/* alignment data declarations */
-extern void pllAlignmentDataDestroy (pllAlignmentData *);
-extern int pllAlignmentDataDumpFile (pllAlignmentData *, int, const char *);
-extern void pllAlignmentDataDumpConsole (pllAlignmentData * alignmentData);
-extern pllAlignmentData * pllInitAlignmentData (int, int);
-extern pllAlignmentData * pllParseAlignmentFile (int fileType, const char *);
-extern pllAlignmentData *pllParsePHYLIPString (const char *rawdata, long filesize);
-
-
-/* model management */
-int pllInitModel (pllInstance *, partitionList *);
-void pllInitReversibleGTR(pllInstance * tr, partitionList * pr, int model);
-void pllMakeGammaCats(double alpha, double *gammaRates, int K, pllBoolean useMedian);
-int pllLinkAlphaParameters(char *string, partitionList *pr);
-int pllLinkFrequencies(char *string, partitionList *pr);
-int pllLinkRates(char *string, partitionList *pr);
-int pllSetSubstitutionRateMatrixSymmetries(char *string, partitionList * pr, int model);
-void pllSetFixedAlpha(double alpha, int model, partitionList * pr, pllInstance *tr);
-void pllSetFixedBaseFrequencies(double *f, int length, int model, partitionList * pr, pllInstance *tr);
-int  pllSetOptimizeBaseFrequencies(int model, partitionList * pr, pllInstance *tr);
-void pllSetSubstitutionMatrix(double *q, int length, int model, partitionList * pr,  pllInstance *tr);
-void pllSetFixedSubstitutionMatrix(double *q, int length, int model, partitionList * pr,  pllInstance *tr);
-int pllGetInstRateMatrix (partitionList * pr, int model, double * outBuffer);
-int pllOptimizeModelParameters(pllInstance *tr, partitionList *pr, double likelihoodEpsilon);
-double pllGetAlpha (partitionList * pr, int pid);
-void pllGetGammaRates (partitionList * pr, int pid, double * outBuffer);
-extern void pllGetBaseFrequencies(partitionList * pr, int model, double * outBuffer);
-extern void pllGetSubstitutionMatrix (partitionList * pr, int model, double * outBuffer);
-void pllEmpiricalFrequenciesDestroy (double *** empiricalFrequencies, int models);
-extern void pllOptRatesGeneric(pllInstance *tr, partitionList *pr, double modelEpsilon, linkageList *ll);
-extern void pllOptBaseFreqs(pllInstance *tr, partitionList * pr, double modelEpsilon, linkageList *ll);
-extern void pllOptAlphasGeneric(pllInstance *tr, partitionList * pr, double modelEpsilon, linkageList *ll);
-extern void pllOptLG4X(pllInstance *tr, partitionList * pr, double modelEpsilon, linkageList *ll, int numberOfModels);
-
-/* tree topology */
-void pllTreeInitTopologyNewick (pllInstance *, pllNewickTree *, int);
-void pllTreeInitTopologyRandom (pllInstance * tr, int tips, char ** nameList);
-void pllTreeInitTopologyForAlignment (pllInstance * tr, pllAlignmentData * alignmentData);
-extern void pllMakeRandomTree ( pllInstance *tr);
-void pllMakeParsimonyTree(pllInstance *tr);
-extern void pllMakeParsimonyTreeFast(pllInstance *tr, partitionList *pr, int sprDist);
-void pllComputeRandomizedStepwiseAdditionParsimonyTree(pllInstance * tr, partitionList * partitions, int sprDist);
-nodeptr pllGetRandomSubtree(pllInstance *);
-extern void pllFreeParsimonyDataStructures(pllInstance *tr, partitionList *pr);
-void pllDestroyInstance (pllInstance *);
-extern void pllGetAncestralState(pllInstance *tr, partitionList *pr, nodeptr p, double * outProbs, char * outSequence);
-unsigned int pllEvaluateParsimony(pllInstance *tr, partitionList *pr, nodeptr p, pllBoolean full, pllBoolean perSiteScores);
-void pllInitParsimonyStructures(pllInstance *tr, partitionList *pr, pllBoolean perSiteScores);
-
-/* rearrange functions (NNI and SPR) */
-pllRearrangeList * pllCreateRearrangeList (int max);
-void pllDestroyRearrangeList (pllRearrangeList ** bestList);
-void pllRearrangeSearch (pllInstance * tr, partitionList * pr, int rearrangeType, nodeptr p, int mintrav, int maxtrav, pllRearrangeList * bestList);
-void pllRearrangeCommit (pllInstance * tr, partitionList * pr, pllRearrangeInfo * rearr, int saveRollbackInfo);
-int pllRearrangeRollback (pllInstance * tr, partitionList * pr);
-void pllClearRearrangeHistory (pllInstance * tr);
-int pllRaxmlSearchAlgorithm (pllInstance * tr, partitionList * pr, pllBoolean estimateModel);
-int pllGetTransitionMatrix (pllInstance * tr, partitionList * pr, nodeptr p, int model, int rate, double * outBuffer);
-void pllGetTransitionMatrix2 (pllInstance * tr, partitionList * pr, int model, nodeptr p, double * outBuffer);
-int pllGetCLV (pllInstance * tr, partitionList * pr, nodeptr p, int partition, double * outProbs);
-extern int pllTopologyPerformNNI(pllInstance * tr, nodeptr p, int swap);
-
-/* hash functions */
-unsigned int pllHashString (const char * s, unsigned int size);
-int pllHashAdd  (pllHashTable * hTable, unsigned int hash, const char * s, void * item);
-pllHashTable * pllHashInit (unsigned int n);
-int pllHashSearch (struct pllHashTable * hTable, char * s, void ** item);
-void pllHashDestroy (struct pllHashTable ** hTable, void (*cbDealloc)(void *));
-
-/* node specific functions */
-nodeptr pllGetOrientedNodePointer (pllInstance * pInst, nodeptr p);
-
-/* other functions */
-extern char * pllReadFile (const char *, long *);
-extern int * pllssort1main (char ** x, int n);
-extern node ** pllGetInnerBranchEndPoints (pllInstance * tr);
-
-/* ---------------- */
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
diff --git a/pllrepo/src/pllInternal.h b/pllrepo/src/pllInternal.h
deleted file mode 100644
index 1b6e0ac..0000000
--- a/pllrepo/src/pllInternal.h
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- * pllInternal.h
- *
- *  Created on: Feb 17, 2014
- *      Author: diego
- */
-
-#ifndef PLLINTERNAL_H_
-#define PLLINTERNAL_H_
-
-#include "pll.h"
-#include "genericParallelization.h"
-#include "errcodes.h"
-#include "lexer.h"
-#include "parsePartition.h"
-#include "mem_alloc.h"
-
-//extern int lookupWord(char *s, stringHashtable *h);
-
-extern void getDataTypeString(pllInstance *tr, pInfo *partitionInfo, char typeOfData[1024]);
-extern int countTips(nodeptr p, int numsp);
-extern unsigned int precomputed16_bitcount(unsigned int n, char *bits_in_16bits);
-
-extern size_t discreteRateCategories(int rateHetModel);
-
-extern const partitionLengths * getPartitionLengths(pInfo *p);
-extern pllBoolean getSmoothFreqs(int dataType);
-extern const unsigned int *getBitVector(int dataType);
-extern int getUndetermined(int dataType);
-extern int getStates(int dataType);
-extern char getInverseMeaning(int dataType, unsigned char state);
-extern double gettime ( void );
-extern int gettimeSrand ( void );
-extern double randum ( long *seed );
-
-extern void getxnode ( nodeptr p );
-extern void hookup ( nodeptr p, nodeptr q, double *z, int numBranches);
-extern void hookupFull ( nodeptr p, nodeptr q, double *z);
-extern void hookupDefault ( nodeptr p, nodeptr q);
-extern pllBoolean whitechar ( int ch );
-extern void printLog ( pllInstance *tr);
-extern double LnGamma ( double alpha );
-extern double IncompleteGamma ( double x, double alpha, double ln_gamma_alpha );
-extern double PointNormal ( double prob );
-extern double PointChi2 ( double prob, double v );
-extern void initModel ( pllInstance *tr, double **empiricalFrequencies, partitionList * partitions);
-
-extern void resetBranches ( pllInstance *tr );
-extern void modOpt ( pllInstance *tr, partitionList *pr, double likelihoodEpsilon);
-
-extern void initializePartitionData(pllInstance *localTree, partitionList * localPartitions);
-extern void initMemorySavingAndRecom(pllInstance *tr, partitionList *pr);
-
-extern void nodeRectifier ( pllInstance *tr );
-extern void allocateParsimonyDataStructures(pllInstance *tr, partitionList *pr);
-
-extern FILE *myfopen(const char *path, const char *mode);
-
-extern pllBoolean initrav ( pllInstance *tr, partitionList *pr, nodeptr p );
-extern void initravPartition ( pllInstance *tr, nodeptr p, int model );
-extern void update ( pllInstance *tr, partitionList *pr, nodeptr p );
-extern void smooth ( pllInstance *tr, partitionList *pr, nodeptr p );
-extern void smoothTree ( pllInstance *tr, partitionList *pr, int maxtimes );
-extern void localSmooth ( pllInstance *tr, partitionList *pr, nodeptr p, int maxtimes );
-extern pllBoolean localSmoothMulti(pllInstance *tr, nodeptr p, int maxtimes, int model);
-
-extern void smoothRegion ( pllInstance *tr, partitionList *pr, nodeptr p, int region );
-extern void regionalSmooth ( pllInstance *tr, partitionList *pr, nodeptr p, int maxtimes, int region );
-extern nodeptr removeNodeBIG ( pllInstance *tr, partitionList *pr, nodeptr p, int numBranches);
-extern nodeptr removeNodeRestoreBIG ( pllInstance *tr, partitionList *pr, nodeptr p );
-extern pllBoolean insertBIG ( pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q);
-extern pllBoolean insertRestoreBIG ( pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q );
-extern pllBoolean testInsertBIG ( pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q );
-extern int NNI(pllInstance * tr, nodeptr p, int swap);
-extern void addTraverseBIG ( pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q, int mintrav, int maxtrav );
-extern int rearrangeBIG ( pllInstance *tr, partitionList *pr, nodeptr p, int mintrav, int maxtrav );
-extern void traversalOrder ( nodeptr p, int *count, nodeptr *nodeArray );
-extern pllBoolean testInsertRestoreBIG ( pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q );
-extern void restoreTreeFast ( pllInstance *tr, partitionList *pr );
-
-extern void initTL ( topolRELL_LIST *rl, pllInstance *tr, int n );
-extern void freeTL ( topolRELL_LIST *rl);
-extern void restoreTL ( topolRELL_LIST *rl, pllInstance *tr, int n, int numBranches );
-extern void resetTL ( topolRELL_LIST *rl );
-extern void saveTL ( topolRELL_LIST *rl, pllInstance *tr, int index );
-
-extern topol  *setupTopol (int maxtips);
-extern void saveTree (pllInstance *tr, topol *tpl, int numBranches);
-extern pllBoolean restoreTree (topol *tpl, pllInstance *tr, partitionList *pr);
-
-
-
-
-extern int  saveBestTree (bestlist *bt, pllInstance *tr, int numBranches);
-extern int  recallBestTree (bestlist *bt, int rank, pllInstance *tr, partitionList *pr);
-extern int initBestTree ( bestlist *bt, int newkeep, int numsp );
-extern void resetBestTree ( bestlist *bt );
-extern pllBoolean freeBestTree ( bestlist *bt );
-
-
-/* extern int treeReadLen (FILE *fp, pllInstance *tr, pllBoolean readBranches, pllBoolean readNodeLabels, pllBoolean topologyOnly);
-extern void getStartingTree (pllInstance *tr); 
-extern void treeReadTopologyString(char *treeString, pllInstance *tr);
-extern double treeLength (pllInstance *tr, int model);*/
-extern double evaluatePartialGeneric (pllInstance *, partitionList *pr, int i, double ki, int _model);
-extern void newviewAncestralIterative(pllInstance *tr, partitionList *pr);
-extern void printAncestralState(nodeptr p, pllBoolean printStates, pllBoolean printProbs, pllInstance *tr, partitionList *pr);
-extern void makenewzGeneric(pllInstance *tr, partitionList * pr, nodeptr p, nodeptr q, double *z0, int maxiter, double *result, pllBoolean mask);
-extern void makenewzGenericDistance(pllInstance *tr, int maxiter, double *z0, double *result, int taxon1, int taxon2);
-extern double evaluatePartitionGeneric (pllInstance *tr, nodeptr p, int model);
-extern void newviewPartitionGeneric (pllInstance *tr, nodeptr p, int model);
-extern double evaluateGenericVector (pllInstance *tr, nodeptr p);
-extern void categorizeGeneric (pllInstance *tr, nodeptr p);
-extern double makenewzPartitionGeneric(pllInstance *tr, nodeptr p, nodeptr q, double z0, int maxiter, int model);
-extern pllBoolean isTip(int number, int maxTips);
-
-/* recom functions */
-extern void computeTraversal(pllInstance *tr, nodeptr p, pllBoolean partialTraversal, int numBranches);
-extern void allocRecompVectorsInfo(pllInstance *tr);
-extern void allocTraversalCounter(pllInstance *tr);
-extern pllBoolean getxVector(recompVectors *rvec, int nodenum, int *slot, int mxtips);
-extern pllBoolean needsRecomp(pllBoolean recompute, recompVectors *rvec, nodeptr p, int mxtips);
-extern void unpinNode(recompVectors *v, int nodenum, int mxtips);
-extern void protectNode(recompVectors *rvec, int nodenum, int mxtips);
-
-/* Handling branch lengths*/
-extern void computeTraversalInfoStlen(nodeptr p, int maxTips, recompVectors *rvec, int *count);
-extern void computeFullTraversalInfoStlen(nodeptr p, int maxTips, recompVectors *rvec);
-extern void printTraversalInfo(pllInstance *tr);
-extern void countTraversal(pllInstance *tr);
-extern void storeExecuteMaskInTraversalDescriptor(pllInstance *tr, partitionList *pr);
-extern void storeValuesInTraversalDescriptor(pllInstance *tr, partitionList *pr, double *value);
-extern void makenewzIterative(pllInstance *, partitionList *pr);
-extern void execCore(pllInstance *, partitionList *pr, volatile double *dlnLdlz, volatile double *d2lnLdlz2);
-extern void makePermutation(int *perm, int n, pllInstance *tr);
-extern nodeptr findAnyTip(nodeptr p, int numsp);
-extern void putWAG(double *ext_initialRates);
-extern  unsigned int **initBitVector(int mxtips, unsigned int *vectorLength);
-//extern hashtable *initHashTable(unsigned int n);
-extern void cleanupHashTable(pllHashTable * h, int state);
-extern double convergenceCriterion(pllHashTable *h, int mxtips);
-extern void freeBitVectors(unsigned int **v, int n);
-//extern void freeHashTable(hashtable *h);
-//extern stringHashtable *initStringHashTable(hashNumberType n);
-//extern void addword(char *s, stringHashtable *h, int nodeNumber);
-extern void initRateMatrix(pllInstance *tr, partitionList *pr);
-extern void bitVectorInitravSpecial(unsigned int **bitVectors, nodeptr p, int numsp, unsigned int vectorLength, pllHashTable *h, int treeNumber, int function, branchInfo *bInf,
-                                    int *countBranches, int treeVectorLength, pllBoolean traverseOnly, pllBoolean computeWRF, int processID);
-extern  unsigned int bitcount_32_bit(unsigned int i);
-extern __inline unsigned int bitcount_64_bit(uint64_t i);
-extern void perSiteLogLikelihoods(pllInstance *tr, partitionList *pr, double *logLikelihoods);
-extern void updatePerSiteRates(pllInstance *tr, partitionList *pr, pllBoolean scaleRates);
-extern void restart(pllInstance *tr, partitionList *pr);
-
-//extern const unsigned int mask32[32];
-
-/** @brief Check whether the position \a pos in bitvector \a x is a gap
-
-    @param x
-      A bitvector represented by unsigned integers
-
-    @param pos
-      Position to check in \a x if it is set (i.e. it is a gap)
-
-    @return
-      Returns the value of the bit vector (\b 1 if set, \b 0 if not)
-*/
-//#ifndef __clang__
-//inline
-//#endif
-pllBoolean isGap(unsigned int *x, int pos);
-
-/** @brief Check whether the position \a pos in bitvector \a x is \b NOT a gap
-
-    @param x
-      A bitvector represented by unsigned integers
-
-    @param pos
-      Position to check in \a x if it is \b NOT set (i.e. it is \b NOT a gap)
-
-    @return
-      Returns the value of the bit vector (\b 1 if set, \b 0 if not)
-*/
-//#ifndef __clang__
-//inline
-//#endif
-pllBoolean noGap(unsigned int *x, int pos);
-
-//#ifndef __clang__
-//__inline
-//#endif
-//pllBoolean isGap(unsigned int *x, int pos);
-
-//#ifndef __clang__
-//__inline
-//#endif
-//pllBoolean noGap(unsigned int *x, int pos);
-
-/* from utils.h */
-linkageList* initLinkageList(int *linkList, partitionList *pr);
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS) )
-/* work tags for parallel regions */
-
-#define PLL_THREAD_NEWVIEW                  0
-#define PLL_THREAD_EVALUATE                 1
-#define PLL_THREAD_MAKENEWZ                 2
-#define PLL_THREAD_MAKENEWZ_FIRST           3
-#define PLL_THREAD_RATE_CATS                4
-#define PLL_THREAD_COPY_RATE_CATS           5
-#define PLL_THREAD_COPY_INIT_MODEL          6
-#define PLL_THREAD_INIT_PARTITION           7
-#define PLL_THREAD_OPT_ALPHA                8
-#define PLL_THREAD_OPT_RATE                 9
-#define PLL_THREAD_OPT_LG4X_RATE            10
-#define PLL_THREAD_COPY_ALPHA               11
-#define PLL_THREAD_COPY_RATES               12
-#define PLL_THREAD_COPY_LG4X_RATES          13
-#define PLL_THREAD_PER_SITE_LIKELIHOODS     14
-#define PLL_THREAD_NEWVIEW_ANCESTRAL        15
-#define PLL_THREAD_GATHER_ANCESTRAL         16
-#define PLL_THREAD_EXIT_GRACEFULLY          17
-#define PLL_THREAD_EVALUATE_PER_SITE_LIKES  18
-
-
-typedef struct
-{
-  pllInstance *tr;
-
-  partitionList *pr;
-  int threadNumber;
-}
-  threadData;
-extern void optRateCatPthreads(pllInstance *tr, partitionList *pr, double lower_spacing, double upper_spacing, double *lhs, int n, int tid);
-extern void pllMasterBarrier(pllInstance *, partitionList *, int);
-#endif
-
-
-#ifdef __AVX
-
-extern void newviewGTRGAMMAPROT_AVX_LG4(int tipCase,
-                                        double *x1, double *x2, double *x3, double *extEV[4], double *tipVector[4],
-                                        int *ex3, unsigned char *tipX1, unsigned char *tipX2, int n,
-                                        double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling);
-
-
-extern void newviewGTRCAT_AVX_GAPPED_SAVE(int tipCase,  double *EV,  int *cptr,
-                                   double *x1_start, double *x2_start,  double *x3_start, double *tipVector,
-                                   int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                   int n,  double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling,
-                                   unsigned int *x1_gap, unsigned int *x2_gap, unsigned int *x3_gap,
-                                   double *x1_gapColumn, double *x2_gapColumn, double *x3_gapColumn, const int maxCats);
-
-extern void newviewGTRCATPROT_AVX_GAPPED_SAVE(int tipCase, double *extEV,
-                                       int *cptr,
-                                       double *x1, double *x2, double *x3, double *tipVector,
-                                       int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                       int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling,
-                                       unsigned int *x1_gap, unsigned int *x2_gap, unsigned int *x3_gap,
-                                       double *x1_gapColumn, double *x2_gapColumn, double *x3_gapColumn, const int maxCats);
-
-extern void  newviewGTRGAMMA_AVX_GAPPED_SAVE(int tipCase,
-                                      double *x1_start, double *x2_start, double *x3_start,
-                                      double *extEV, double *tipVector,
-                                      int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                                      const int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling,
-                                      unsigned int *x1_gap, unsigned int *x2_gap, unsigned int *x3_gap,
-                                      double *x1_gapColumn, double *x2_gapColumn, double *x3_gapColumn
-                                      );
-
-extern void newviewGTRGAMMAPROT_AVX_GAPPED_SAVE(int tipCase,
-                                         double *x1_start, double *x2_start, double *x3_start, double *extEV, double *tipVector,
-                                         int *ex3, unsigned char *tipX1, unsigned char *tipX2, int n,
-                                         double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling,
-                                         unsigned int *x1_gap, unsigned int *x2_gap, unsigned int *x3_gap,
-                                         double *x1_gapColumn, double *x2_gapColumn, double *x3_gapColumn);
-
-extern void newviewGTRCAT_AVX(int tipCase,  double *EV,  int *cptr,
-    double *x1_start, double *x2_start,  double *x3_start, double *tipVector,
-    int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-    int n,  double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling);
-
-
-extern void newviewGenericCATPROT_AVX(int tipCase, double *extEV,
-    int *cptr,
-    double *x1, double *x2, double *x3, double *tipVector,
-    int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-    int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling);
-
-
-extern void newviewGTRGAMMA_AVX(int tipCase,
-    double *x1_start, double *x2_start, double *x3_start,
-    double *EV, double *tipVector,
-    int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-    const int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling);
-
-extern void newviewGTRGAMMAPROT_AVX(int tipCase,
-                             double *x1, double *x2, double *x3, double *extEV, double *tipVector,
-                             int *ex3, unsigned char *tipX1, unsigned char *tipX2, int n,
-                             double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling);
-
-extern void newviewGTRCATPROT_AVX(int tipCase, double *extEV,
-                           int *cptr,
-                           double *x1, double *x2, double *x3, double *tipVector,
-                           int *ex3, unsigned char *tipX1, unsigned char *tipX2,
-                           int n, double *left, double *right, int *wgt, int *scalerIncrement, const pllBoolean useFastScaling);
-
-#endif
-
-extern int virtual_width( int n );
-extern void computeAllAncestralVectors(nodeptr p, pllInstance *tr, partitionList *pr);
-
-#endif /* PLLINTERNAL_H_ */
diff --git a/pllrepo/src/pthread.h b/pllrepo/src/pthread.h
deleted file mode 100644
index b4072f7..0000000
--- a/pllrepo/src/pthread.h
+++ /dev/null
@@ -1,1368 +0,0 @@
-/* This is an implementation of the threads API of POSIX 1003.1-2001.
- *
- * --------------------------------------------------------------------------
- *
- *      Pthreads-win32 - POSIX Threads Library for Win32
- *      Copyright(C) 1998 John E. Bossom
- *      Copyright(C) 1999,2005 Pthreads-win32 contributors
- * 
- *      Contact Email: rpj at callisto.canberra.edu.au
- * 
- *      The current list of contributors is contained
- *      in the file CONTRIBUTORS included with the source
- *      code distribution. The list can also be seen at the
- *      following World Wide Web location:
- *      http://sources.redhat.com/pthreads-win32/contributors.html
- * 
- *      This library is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU Lesser General Public
- *      License as published by the Free Software Foundation; either
- *      version 2 of the License, or (at your option) any later version.
- * 
- *      This library is distributed in the hope that it will be useful,
- *      but WITHOUT ANY WARRANTY; without even the implied warranty of
- *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *      Lesser General Public License for more details.
- * 
- *      You should have received a copy of the GNU Lesser General Public
- *      License along with this library in the file COPYING.LIB;
- *      if not, write to the Free Software Foundation, Inc.,
- *      59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- */
-
-#if !defined( PTHREAD_H )
-#define PTHREAD_H
-
-/*
- * See the README file for an explanation of the pthreads-win32 version
- * numbering scheme and how the DLL is named etc.
- */
-#define PTW32_VERSION 2,9,1,0
-#define PTW32_VERSION_STRING "2, 9, 1, 0\0"
-
-/* There are three implementations of cancel cleanup.
- * Note that pthread.h is included in both application
- * compilation units and also internally for the library.
- * The code here and within the library aims to work
- * for all reasonable combinations of environments.
- *
- * The three implementations are:
- *
- *   WIN32 SEH
- *   C
- *   C++
- *
- * Please note that exiting a push/pop block via
- * "return", "exit", "break", or "continue" will
- * lead to different behaviour amongst applications
- * depending upon whether the library was built
- * using SEH, C++, or C. For example, a library built
- * with SEH will call the cleanup routine, while both
- * C++ and C built versions will not.
- */
-
-/*
- * Define defaults for cleanup code.
- * Note: Unless the build explicitly defines one of the following, then
- * we default to standard C style cleanup. This style uses setjmp/longjmp
- * in the cancelation and thread exit implementations and therefore won't
- * do stack unwinding if linked to applications that have it (e.g.
- * C++ apps). This is currently consistent with most/all commercial Unix
- * POSIX threads implementations.
- */
-#if !defined( __CLEANUP_SEH ) && !defined( __CLEANUP_CXX ) && !defined( __CLEANUP_C )
-# define __CLEANUP_C
-#endif
-
-#if defined( __CLEANUP_SEH ) && ( !defined( _MSC_VER ) && !defined(PTW32_RC_MSC))
-#error ERROR [__FILE__, line __LINE__]: SEH is not supported for this compiler.
-#endif
-
-/*
- * Stop here if we are being included by the resource compiler.
- */
-#if !defined(RC_INVOKED)
-
-#undef PTW32_LEVEL
-
-#if defined(_POSIX_SOURCE)
-#define PTW32_LEVEL 0
-/* Early POSIX */
-#endif
-
-#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309
-#undef PTW32_LEVEL
-#define PTW32_LEVEL 1
-/* Include 1b, 1c and 1d */
-#endif
-
-#if defined(INCLUDE_NP)
-#undef PTW32_LEVEL
-#define PTW32_LEVEL 2
-/* Include Non-Portable extensions */
-#endif
-
-#define PTW32_LEVEL_MAX 3
-
-#if ( defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112 )  || !defined(PTW32_LEVEL)
-#define PTW32_LEVEL PTW32_LEVEL_MAX
-/* Include everything */
-#endif
-
-#if defined(_UWIN)
-#   define HAVE_STRUCT_TIMESPEC 1
-#   define HAVE_SIGNAL_H        1
-#   undef HAVE_PTW32_CONFIG_H
-#   pragma comment(lib, "pthread")
-#endif
-
-/*
- * -------------------------------------------------------------
- *
- *
- * Module: pthread.h
- *
- * Purpose:
- *      Provides an implementation of PThreads based upon the
- *      standard:
- *
- *              POSIX 1003.1-2001
- *  and
- *    The Single Unix Specification version 3
- *
- *    (these two are equivalent)
- *
- *      in order to enhance code portability between Windows,
- *  various commercial Unix implementations, and Linux.
- *
- *      See the ANNOUNCE file for a full list of conforming
- *      routines and defined constants, and a list of missing
- *      routines and constants not defined in this implementation.
- *
- * Authors:
- *      There have been many contributors to this library.
- *      The initial implementation was contributed by
- *      John Bossom, and several others have provided major
- *      sections or revisions of parts of the implementation.
- *      Often significant effort has been contributed to
- *      find and fix important bugs and other problems to
- *      improve the reliability of the library, which sometimes
- *      is not reflected in the amount of code which changed as
- *      result.
- *      As much as possible, the contributors are acknowledged
- *      in the ChangeLog file in the source code distribution
- *      where their changes are noted in detail.
- *
- *      Contributors are listed in the CONTRIBUTORS file.
- *
- *      As usual, all bouquets go to the contributors, and all
- *      brickbats go to the project maintainer.
- *
- * Maintainer:
- *      The code base for this project is coordinated and
- *      eventually pre-tested, packaged, and made available by
- *
- *              Ross Johnson <rpj at callisto.canberra.edu.au>
- *
- * QA Testers:
- *      Ultimately, the library is tested in the real world by
- *      a host of competent and demanding scientists and
- *      engineers who report bugs and/or provide solutions
- *      which are then fixed or incorporated into subsequent
- *      versions of the library. Each time a bug is fixed, a
- *      test case is written to prove the fix and ensure
- *      that later changes to the code don't reintroduce the
- *      same error. The number of test cases is slowly growing
- *      and therefore so is the code reliability.
- *
- * Compliance:
- *      See the file ANNOUNCE for the list of implemented
- *      and not-implemented routines and defined options.
- *      Of course, these are all defined is this file as well.
- *
- * Web site:
- *      The source code and other information about this library
- *      are available from
- *
- *              http://sources.redhat.com/pthreads-win32/
- *
- * -------------------------------------------------------------
- */
-
-/* Try to avoid including windows.h */
-#if (defined(__MINGW64__) || defined(__MINGW32__)) && defined(__cplusplus)
-#define PTW32_INCLUDE_WINDOWS_H
-#endif
-
-#if defined(PTW32_INCLUDE_WINDOWS_H)
-#include <windows.h>
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER < 1300 || defined(__DMC__)
-/*
- * VC++6.0 or early compiler's header has no DWORD_PTR type.
- */
-typedef unsigned long DWORD_PTR;
-typedef unsigned long ULONG_PTR;
-#endif
-/*
- * -----------------
- * autoconf switches
- * -----------------
- */
-
-#if defined(HAVE_PTW32_CONFIG_H)
-#include "config.h"
-#endif /* HAVE_PTW32_CONFIG_H */
-
-#if !defined(NEED_FTIME)
-#include <time.h>
-#else /* NEED_FTIME */
-/* use native WIN32 time API */
-#endif /* NEED_FTIME */
-
-#if defined(HAVE_SIGNAL_H)
-#include <signal.h>
-#endif /* HAVE_SIGNAL_H */
-
-#include <limits.h>
-
-/*
- * Boolean values to make us independent of system includes.
- */
-enum {
-  PTW32_FALSE = 0,
-  PTW32_TRUE = (! PTW32_FALSE)
-};
-
-/*
- * This is a duplicate of what is in the autoconf config.h,
- * which is only used when building the pthread-win32 libraries.
- */
-
-#if !defined(PTW32_CONFIG_H)
-#  if defined(WINCE)
-#    define NEED_ERRNO
-#    define NEED_SEM
-#  endif
-#  if defined(__MINGW64__)
-#    define HAVE_STRUCT_TIMESPEC
-#    define HAVE_MODE_T
-#  elif defined(_UWIN) || defined(__MINGW32__)
-#    define HAVE_MODE_T
-#  endif
-#endif
-
-/*
- *
- */
-
-#if PTW32_LEVEL >= PTW32_LEVEL_MAX
-#if defined(NEED_ERRNO)
-#include "need_errno.h"
-#else
-#include <errno.h>
-#endif
-#endif /* PTW32_LEVEL >= PTW32_LEVEL_MAX */
-
-/*
- * Several systems don't define some error numbers.
- */
-#if !defined(ENOTSUP)
-#  define ENOTSUP 48   /* This is the value in Solaris. */
-#endif
-
-#if !defined(ETIMEDOUT)
-#  define ETIMEDOUT 10060 /* Same as WSAETIMEDOUT */
-#endif
-
-#if !defined(ENOSYS)
-#  define ENOSYS 140     /* Semi-arbitrary value */
-#endif
-
-#if !defined(EDEADLK)
-#  if defined(EDEADLOCK)
-#    define EDEADLK EDEADLOCK
-#  else
-#    define EDEADLK 36     /* This is the value in MSVC. */
-#  endif
-#endif
-
-/* POSIX 2008 - related to robust mutexes */
-#if !defined(EOWNERDEAD)
-#  define EOWNERDEAD 43
-#endif
-#if !defined(ENOTRECOVERABLE)
-#  define ENOTRECOVERABLE 44
-#endif
-
-#include <sched.h>
-
-/*
- * To avoid including windows.h we define only those things that we
- * actually need from it.
- */
-#if !defined(PTW32_INCLUDE_WINDOWS_H)
-#if !defined(HANDLE)
-# define PTW32__HANDLE_DEF
-# define HANDLE void *
-#endif
-#if !defined(DWORD)
-# define PTW32__DWORD_DEF
-# define DWORD unsigned long
-#endif
-#endif
-
-#if !defined(HAVE_STRUCT_TIMESPEC)
-#define HAVE_STRUCT_TIMESPEC
-#if !defined(_TIMESPEC_DEFINED)
-#define _TIMESPEC_DEFINED
-struct timespec {
-        time_t tv_sec;
-        long tv_nsec;
-};
-#endif /* _TIMESPEC_DEFINED */
-#endif /* HAVE_STRUCT_TIMESPEC */
-
-#if !defined(SIG_BLOCK)
-#define SIG_BLOCK 0
-#endif /* SIG_BLOCK */
-
-#if !defined(SIG_UNBLOCK)
-#define SIG_UNBLOCK 1
-#endif /* SIG_UNBLOCK */
-
-#if !defined(SIG_SETMASK)
-#define SIG_SETMASK 2
-#endif /* SIG_SETMASK */
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif                          /* __cplusplus */
-
-/*
- * -------------------------------------------------------------
- *
- * POSIX 1003.1-2001 Options
- * =========================
- *
- * Options are normally set in <unistd.h>, which is not provided
- * with pthreads-win32.
- *
- * For conformance with the Single Unix Specification (version 3), all of the
- * options below are defined, and have a value of either -1 (not supported)
- * or 200112L (supported).
- *
- * These options can neither be left undefined nor have a value of 0, because
- * either indicates that sysconf(), which is not implemented, may be used at
- * runtime to check the status of the option.
- *
- * _POSIX_THREADS (== 200112L)
- *                      If == 200112L, you can use threads
- *
- * _POSIX_THREAD_ATTR_STACKSIZE (== 200112L)
- *                      If == 200112L, you can control the size of a thread's
- *                      stack
- *                              pthread_attr_getstacksize
- *                              pthread_attr_setstacksize
- *
- * _POSIX_THREAD_ATTR_STACKADDR (== -1)
- *                      If == 200112L, you can allocate and control a thread's
- *                      stack. If not supported, the following functions
- *                      will return ENOSYS, indicating they are not
- *                      supported:
- *                              pthread_attr_getstackaddr
- *                              pthread_attr_setstackaddr
- *
- * _POSIX_THREAD_PRIORITY_SCHEDULING (== -1)
- *                      If == 200112L, you can use realtime scheduling.
- *                      This option indicates that the behaviour of some
- *                      implemented functions conforms to the additional TPS
- *                      requirements in the standard. E.g. rwlocks favour
- *                      writers over readers when threads have equal priority.
- *
- * _POSIX_THREAD_PRIO_INHERIT (== -1)
- *                      If == 200112L, you can create priority inheritance
- *                      mutexes.
- *                              pthread_mutexattr_getprotocol +
- *                              pthread_mutexattr_setprotocol +
- *
- * _POSIX_THREAD_PRIO_PROTECT (== -1)
- *                      If == 200112L, you can create priority ceiling mutexes
- *                      Indicates the availability of:
- *                              pthread_mutex_getprioceiling
- *                              pthread_mutex_setprioceiling
- *                              pthread_mutexattr_getprioceiling
- *                              pthread_mutexattr_getprotocol     +
- *                              pthread_mutexattr_setprioceiling
- *                              pthread_mutexattr_setprotocol     +
- *
- * _POSIX_THREAD_PROCESS_SHARED (== -1)
- *                      If set, you can create mutexes and condition
- *                      variables that can be shared with another
- *                      process.If set, indicates the availability
- *                      of:
- *                              pthread_mutexattr_getpshared
- *                              pthread_mutexattr_setpshared
- *                              pthread_condattr_getpshared
- *                              pthread_condattr_setpshared
- *
- * _POSIX_THREAD_SAFE_FUNCTIONS (== 200112L)
- *                      If == 200112L you can use the special *_r library
- *                      functions that provide thread-safe behaviour
- *
- * _POSIX_READER_WRITER_LOCKS (== 200112L)
- *                      If == 200112L, you can use read/write locks
- *
- * _POSIX_SPIN_LOCKS (== 200112L)
- *                      If == 200112L, you can use spin locks
- *
- * _POSIX_BARRIERS (== 200112L)
- *                      If == 200112L, you can use barriers
- *
- *      + These functions provide both 'inherit' and/or
- *        'protect' protocol, based upon these macro
- *        settings.
- *
- * -------------------------------------------------------------
- */
-
-/*
- * POSIX Options
- */
-#undef _POSIX_THREADS
-#define _POSIX_THREADS 200809L
-
-#undef _POSIX_READER_WRITER_LOCKS
-#define _POSIX_READER_WRITER_LOCKS 200809L
-
-#undef _POSIX_SPIN_LOCKS
-#define _POSIX_SPIN_LOCKS 200809L
-
-#undef _POSIX_BARRIERS
-#define _POSIX_BARRIERS 200809L
-
-#undef _POSIX_THREAD_SAFE_FUNCTIONS
-#define _POSIX_THREAD_SAFE_FUNCTIONS 200809L
-
-#undef _POSIX_THREAD_ATTR_STACKSIZE
-#define _POSIX_THREAD_ATTR_STACKSIZE 200809L
-
-/*
- * The following options are not supported
- */
-#undef _POSIX_THREAD_ATTR_STACKADDR
-#define _POSIX_THREAD_ATTR_STACKADDR -1
-
-#undef _POSIX_THREAD_PRIO_INHERIT
-#define _POSIX_THREAD_PRIO_INHERIT -1
-
-#undef _POSIX_THREAD_PRIO_PROTECT
-#define _POSIX_THREAD_PRIO_PROTECT -1
-
-/* TPS is not fully supported.  */
-#undef _POSIX_THREAD_PRIORITY_SCHEDULING
-#define _POSIX_THREAD_PRIORITY_SCHEDULING -1
-
-#undef _POSIX_THREAD_PROCESS_SHARED
-#define _POSIX_THREAD_PROCESS_SHARED -1
-
-
-/*
- * POSIX 1003.1-2001 Limits
- * ===========================
- *
- * These limits are normally set in <limits.h>, which is not provided with
- * pthreads-win32.
- *
- * PTHREAD_DESTRUCTOR_ITERATIONS
- *                      Maximum number of attempts to destroy
- *                      a thread's thread-specific data on
- *                      termination (must be at least 4)
- *
- * PTHREAD_KEYS_MAX
- *                      Maximum number of thread-specific data keys
- *                      available per process (must be at least 128)
- *
- * PTHREAD_STACK_MIN
- *                      Minimum supported stack size for a thread
- *
- * PTHREAD_THREADS_MAX
- *                      Maximum number of threads supported per
- *                      process (must be at least 64).
- *
- * SEM_NSEMS_MAX
- *                      The maximum number of semaphores a process can have.
- *                      (must be at least 256)
- *
- * SEM_VALUE_MAX
- *                      The maximum value a semaphore can have.
- *                      (must be at least 32767)
- *
- */
-#undef _POSIX_THREAD_DESTRUCTOR_ITERATIONS
-#define _POSIX_THREAD_DESTRUCTOR_ITERATIONS     4
-
-#undef PTHREAD_DESTRUCTOR_ITERATIONS
-#define PTHREAD_DESTRUCTOR_ITERATIONS           _POSIX_THREAD_DESTRUCTOR_ITERATIONS
-
-#undef _POSIX_THREAD_KEYS_MAX
-#define _POSIX_THREAD_KEYS_MAX                  128
-
-#undef PTHREAD_KEYS_MAX
-#define PTHREAD_KEYS_MAX                        _POSIX_THREAD_KEYS_MAX
-
-#undef PTHREAD_STACK_MIN
-#define PTHREAD_STACK_MIN                       0
-
-#undef _POSIX_THREAD_THREADS_MAX
-#define _POSIX_THREAD_THREADS_MAX               64
-
-  /* Arbitrary value */
-#undef PTHREAD_THREADS_MAX
-#define PTHREAD_THREADS_MAX                     2019
-
-#undef _POSIX_SEM_NSEMS_MAX
-#define _POSIX_SEM_NSEMS_MAX                    256
-
-  /* Arbitrary value */
-#undef SEM_NSEMS_MAX
-#define SEM_NSEMS_MAX                           1024
-
-#undef _POSIX_SEM_VALUE_MAX
-#define _POSIX_SEM_VALUE_MAX                    32767
-
-#undef SEM_VALUE_MAX
-#define SEM_VALUE_MAX                           INT_MAX
-
-
-#if defined(__GNUC__) && !defined(__declspec)
-# error Please upgrade your GNU compiler to one that supports __declspec.
-#endif
-
-/*
- * When building the library, you should define PTW32_BUILD so that
- * the variables/functions are exported correctly. When using the library,
- * do NOT define PTW32_BUILD, and then the variables/functions will
- * be imported correctly.
- */
-#if !defined(PTW32_STATIC_LIB)
-#  if defined(PTW32_BUILD)
-#    define PTW32_DLLPORT __declspec (dllexport)
-#  else
-#    define PTW32_DLLPORT __declspec (dllimport)
-#  endif
-#else
-#  define PTW32_DLLPORT
-#endif
-
-/*
- * The Open Watcom C/C++ compiler uses a non-standard calling convention
- * that passes function args in registers unless __cdecl is explicitly specified
- * in exposed function prototypes.
- *
- * We force all calls to cdecl even though this could slow Watcom code down
- * slightly. If you know that the Watcom compiler will be used to build both
- * the DLL and application, then you can probably define this as a null string.
- * Remember that pthread.h (this file) is used for both the DLL and application builds.
- */
-#define PTW32_CDECL __cdecl
-
-#if defined(_UWIN) && PTW32_LEVEL >= PTW32_LEVEL_MAX
-#   include     <sys/types.h>
-#else
-/*
- * Generic handle type - intended to extend uniqueness beyond
- * that available with a simple pointer. It should scale for either
- * IA-32 or IA-64.
- */
-typedef struct {
-    void * p;                   /* Pointer to actual object */
-    unsigned int x;             /* Extra information - reuse count etc */
-} ptw32_handle_t;
-
-typedef ptw32_handle_t pthread_t;
-typedef struct pthread_attr_t_ * pthread_attr_t;
-typedef struct pthread_once_t_ pthread_once_t;
-typedef struct pthread_key_t_ * pthread_key_t;
-typedef struct pthread_mutex_t_ * pthread_mutex_t;
-typedef struct pthread_mutexattr_t_ * pthread_mutexattr_t;
-typedef struct pthread_cond_t_ * pthread_cond_t;
-typedef struct pthread_condattr_t_ * pthread_condattr_t;
-#endif
-typedef struct pthread_rwlock_t_ * pthread_rwlock_t;
-typedef struct pthread_rwlockattr_t_ * pthread_rwlockattr_t;
-typedef struct pthread_spinlock_t_ * pthread_spinlock_t;
-typedef struct pthread_barrier_t_ * pthread_barrier_t;
-typedef struct pthread_barrierattr_t_ * pthread_barrierattr_t;
-
-/*
- * ====================
- * ====================
- * POSIX Threads
- * ====================
- * ====================
- */
-
-enum {
-/*
- * pthread_attr_{get,set}detachstate
- */
-  PTHREAD_CREATE_JOINABLE       = 0,  /* Default */
-  PTHREAD_CREATE_DETACHED       = 1,
-
-/*
- * pthread_attr_{get,set}inheritsched
- */
-  PTHREAD_INHERIT_SCHED         = 0,
-  PTHREAD_EXPLICIT_SCHED        = 1,  /* Default */
-
-/*
- * pthread_{get,set}scope
- */
-  PTHREAD_SCOPE_PROCESS         = 0,
-  PTHREAD_SCOPE_SYSTEM          = 1,  /* Default */
-
-/*
- * pthread_setcancelstate paramters
- */
-  PTHREAD_CANCEL_ENABLE         = 0,  /* Default */
-  PTHREAD_CANCEL_DISABLE        = 1,
-
-/*
- * pthread_setcanceltype parameters
- */
-  PTHREAD_CANCEL_ASYNCHRONOUS   = 0,
-  PTHREAD_CANCEL_DEFERRED       = 1,  /* Default */
-
-/*
- * pthread_mutexattr_{get,set}pshared
- * pthread_condattr_{get,set}pshared
- */
-  PTHREAD_PROCESS_PRIVATE       = 0,
-  PTHREAD_PROCESS_SHARED        = 1,
-
-/*
- * pthread_mutexattr_{get,set}robust
- */
-  PTHREAD_MUTEX_STALLED         = 0,  /* Default */
-  PTHREAD_MUTEX_ROBUST          = 1,
-
-/*
- * pthread_barrier_wait
- */
-  PTHREAD_BARRIER_SERIAL_THREAD = -1
-};
-
-/*
- * ====================
- * ====================
- * Cancelation
- * ====================
- * ====================
- */
-#define PTHREAD_CANCELED       ((void *)(size_t) -1)
-
-
-/*
- * ====================
- * ====================
- * Once Key
- * ====================
- * ====================
- */
-#define PTHREAD_ONCE_INIT       { PTW32_FALSE, 0, 0, 0}
-
-struct pthread_once_t_
-{
-  int          done;        /* indicates if user function has been executed */
-  void *       lock;
-  int          reserved1;
-  int          reserved2;
-};
-
-
-/*
- * ====================
- * ====================
- * Object initialisers
- * ====================
- * ====================
- */
-#define PTHREAD_MUTEX_INITIALIZER ((pthread_mutex_t)(size_t) -1)
-#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER ((pthread_mutex_t)(size_t) -2)
-#define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER ((pthread_mutex_t)(size_t) -3)
-
-/*
- * Compatibility with LinuxThreads
- */
-#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP PTHREAD_RECURSIVE_MUTEX_INITIALIZER
-#define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP PTHREAD_ERRORCHECK_MUTEX_INITIALIZER
-
-#define PTHREAD_COND_INITIALIZER ((pthread_cond_t)(size_t) -1)
-
-#define PTHREAD_RWLOCK_INITIALIZER ((pthread_rwlock_t)(size_t) -1)
-
-#define PTHREAD_SPINLOCK_INITIALIZER ((pthread_spinlock_t)(size_t) -1)
-
-
-/*
- * Mutex types.
- */
-enum
-{
-  /* Compatibility with LinuxThreads */
-  PTHREAD_MUTEX_FAST_NP,
-  PTHREAD_MUTEX_RECURSIVE_NP,
-  PTHREAD_MUTEX_ERRORCHECK_NP,
-  PTHREAD_MUTEX_TIMED_NP = PTHREAD_MUTEX_FAST_NP,
-  PTHREAD_MUTEX_ADAPTIVE_NP = PTHREAD_MUTEX_FAST_NP,
-  /* For compatibility with POSIX */
-  PTHREAD_MUTEX_NORMAL = PTHREAD_MUTEX_FAST_NP,
-  PTHREAD_MUTEX_RECURSIVE = PTHREAD_MUTEX_RECURSIVE_NP,
-  PTHREAD_MUTEX_ERRORCHECK = PTHREAD_MUTEX_ERRORCHECK_NP,
-  PTHREAD_MUTEX_DEFAULT = PTHREAD_MUTEX_NORMAL
-};
-
-
-typedef struct ptw32_cleanup_t ptw32_cleanup_t;
-
-#if defined(_MSC_VER)
-/* Disable MSVC 'anachronism used' warning */
-#pragma warning( disable : 4229 )
-#endif
-
-typedef void (* PTW32_CDECL ptw32_cleanup_callback_t)(void *);
-
-#if defined(_MSC_VER)
-#pragma warning( default : 4229 )
-#endif
-
-struct ptw32_cleanup_t
-{
-  ptw32_cleanup_callback_t routine;
-  void *arg;
-  struct ptw32_cleanup_t *prev;
-};
-
-#if defined(__CLEANUP_SEH)
-        /*
-         * WIN32 SEH version of cancel cleanup.
-         */
-
-#define pthread_cleanup_push( _rout, _arg ) \
-        { \
-            ptw32_cleanup_t     _cleanup; \
-            \
-        _cleanup.routine        = (ptw32_cleanup_callback_t)(_rout); \
-            _cleanup.arg        = (_arg); \
-            __try \
-              { \
-
-#define pthread_cleanup_pop( _execute ) \
-              } \
-            __finally \
-                { \
-                    if( _execute || AbnormalTermination()) \
-                      { \
-                          (*(_cleanup.routine))( _cleanup.arg ); \
-                      } \
-                } \
-        }
-
-#else /* __CLEANUP_SEH */
-
-#if defined(__CLEANUP_C)
-
-        /*
-         * C implementation of PThreads cancel cleanup
-         */
-
-#define pthread_cleanup_push( _rout, _arg ) \
-        { \
-            ptw32_cleanup_t     _cleanup; \
-            \
-            ptw32_push_cleanup( &_cleanup, (ptw32_cleanup_callback_t) (_rout), (_arg) ); \
-
-#define pthread_cleanup_pop( _execute ) \
-            (void) ptw32_pop_cleanup( _execute ); \
-        }
-
-#else /* __CLEANUP_C */
-
-#if defined(__CLEANUP_CXX)
-
-        /*
-         * C++ version of cancel cleanup.
-         * - John E. Bossom.
-         */
-
-        class PThreadCleanup {
-          /*
-           * PThreadCleanup
-           *
-           * Purpose
-           *      This class is a C++ helper class that is
-           *      used to implement pthread_cleanup_push/
-           *      pthread_cleanup_pop.
-           *      The destructor of this class automatically
-           *      pops the pushed cleanup routine regardless
-           *      of how the code exits the scope
-           *      (i.e. such as by an exception)
-           */
-      ptw32_cleanup_callback_t cleanUpRout;
-          void    *       obj;
-          int             executeIt;
-
-        public:
-          PThreadCleanup() :
-            cleanUpRout( 0 ),
-            obj( 0 ),
-            executeIt( 0 )
-            /*
-             * No cleanup performed
-             */
-            {
-            }
-
-          PThreadCleanup(
-             ptw32_cleanup_callback_t routine,
-                         void    *       arg ) :
-            cleanUpRout( routine ),
-            obj( arg ),
-            executeIt( 1 )
-            /*
-             * Registers a cleanup routine for 'arg'
-             */
-            {
-            }
-
-          ~PThreadCleanup()
-            {
-              if ( executeIt && ((void *) cleanUpRout != (void *) 0) )
-                {
-                  (void) (*cleanUpRout)( obj );
-                }
-            }
-
-          void execute( int exec )
-            {
-              executeIt = exec;
-            }
-        };
-
-        /*
-         * C++ implementation of PThreads cancel cleanup;
-         * This implementation takes advantage of a helper
-         * class who's destructor automatically calls the
-         * cleanup routine if we exit our scope weirdly
-         */
-#define pthread_cleanup_push( _rout, _arg ) \
-        { \
-            PThreadCleanup  cleanup((ptw32_cleanup_callback_t)(_rout), \
-                                    (void *) (_arg) );
-
-#define pthread_cleanup_pop( _execute ) \
-            cleanup.execute( _execute ); \
-        }
-
-#else
-
-#error ERROR [__FILE__, line __LINE__]: Cleanup type undefined.
-
-#endif /* __CLEANUP_CXX */
-
-#endif /* __CLEANUP_C */
-
-#endif /* __CLEANUP_SEH */
-
-/*
- * ===============
- * ===============
- * Methods
- * ===============
- * ===============
- */
-
-/*
- * PThread Attribute Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_init (pthread_attr_t * attr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_destroy (pthread_attr_t * attr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_getdetachstate (const pthread_attr_t * attr,
-                                         int *detachstate);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_getstackaddr (const pthread_attr_t * attr,
-                                       void **stackaddr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_getstacksize (const pthread_attr_t * attr,
-                                       size_t * stacksize);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_setdetachstate (pthread_attr_t * attr,
-                                         int detachstate);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_setstackaddr (pthread_attr_t * attr,
-                                       void *stackaddr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_setstacksize (pthread_attr_t * attr,
-                                       size_t stacksize);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_getschedparam (const pthread_attr_t *attr,
-                                        struct sched_param *param);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_setschedparam (pthread_attr_t *attr,
-                                        const struct sched_param *param);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_setschedpolicy (pthread_attr_t *,
-                                         int);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_getschedpolicy (const pthread_attr_t *,
-                                         int *);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_setinheritsched(pthread_attr_t * attr,
-                                         int inheritsched);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_getinheritsched(const pthread_attr_t * attr,
-                                         int * inheritsched);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_setscope (pthread_attr_t *,
-                                   int);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_getscope (const pthread_attr_t *,
-                                   int *);
-
-/*
- * PThread Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_create (pthread_t * tid,
-                            const pthread_attr_t * attr,
-                            void *(PTW32_CDECL *start) (void *),
-                            void *arg);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_detach (pthread_t tid);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_equal (pthread_t t1,
-                           pthread_t t2);
-
-PTW32_DLLPORT void PTW32_CDECL pthread_exit (void *value_ptr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_join (pthread_t thread,
-                          void **value_ptr);
-
-PTW32_DLLPORT pthread_t PTW32_CDECL pthread_self (void);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_cancel (pthread_t thread);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_setcancelstate (int state,
-                                    int *oldstate);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_setcanceltype (int type,
-                                   int *oldtype);
-
-PTW32_DLLPORT void PTW32_CDECL pthread_testcancel (void);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_once (pthread_once_t * once_control,
-                          void (PTW32_CDECL *init_routine) (void));
-
-#if PTW32_LEVEL >= PTW32_LEVEL_MAX
-PTW32_DLLPORT ptw32_cleanup_t * PTW32_CDECL ptw32_pop_cleanup (int execute);
-
-PTW32_DLLPORT void PTW32_CDECL ptw32_push_cleanup (ptw32_cleanup_t * cleanup,
-                                 ptw32_cleanup_callback_t routine,
-                                 void *arg);
-#endif /* PTW32_LEVEL >= PTW32_LEVEL_MAX */
-
-/*
- * Thread Specific Data Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_key_create (pthread_key_t * key,
-                                void (PTW32_CDECL *destructor) (void *));
-
-PTW32_DLLPORT int PTW32_CDECL pthread_key_delete (pthread_key_t key);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_setspecific (pthread_key_t key,
-                                 const void *value);
-
-PTW32_DLLPORT void * PTW32_CDECL pthread_getspecific (pthread_key_t key);
-
-
-/*
- * Mutex Attribute Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_init (pthread_mutexattr_t * attr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_destroy (pthread_mutexattr_t * attr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_getpshared (const pthread_mutexattr_t
-                                          * attr,
-                                          int *pshared);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_setpshared (pthread_mutexattr_t * attr,
-                                          int pshared);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_settype (pthread_mutexattr_t * attr, int kind);
-PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_gettype (const pthread_mutexattr_t * attr, int *kind);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_setrobust(
-                                           pthread_mutexattr_t *attr,
-                                           int robust);
-PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_getrobust(
-                                           const pthread_mutexattr_t * attr,
-                                           int * robust);
-
-/*
- * Barrier Attribute Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_init (pthread_barrierattr_t * attr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_destroy (pthread_barrierattr_t * attr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_getpshared (const pthread_barrierattr_t
-                                            * attr,
-                                            int *pshared);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_setpshared (pthread_barrierattr_t * attr,
-                                            int pshared);
-
-/*
- * Mutex Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_mutex_init (pthread_mutex_t * mutex,
-                                const pthread_mutexattr_t * attr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_mutex_destroy (pthread_mutex_t * mutex);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_mutex_lock (pthread_mutex_t * mutex);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_mutex_timedlock(pthread_mutex_t * mutex,
-                                    const struct timespec *abstime);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_mutex_trylock (pthread_mutex_t * mutex);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_mutex_unlock (pthread_mutex_t * mutex);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_mutex_consistent (pthread_mutex_t * mutex);
-
-/*
- * Spinlock Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_spin_init (pthread_spinlock_t * lock, int pshared);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_spin_destroy (pthread_spinlock_t * lock);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_spin_lock (pthread_spinlock_t * lock);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_spin_trylock (pthread_spinlock_t * lock);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_spin_unlock (pthread_spinlock_t * lock);
-
-/*
- * Barrier Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_barrier_init (pthread_barrier_t * barrier,
-                                  const pthread_barrierattr_t * attr,
-                                  unsigned int count);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_barrier_destroy (pthread_barrier_t * barrier);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_barrier_wait (pthread_barrier_t * barrier);
-
-/*
- * Condition Variable Attribute Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_condattr_init (pthread_condattr_t * attr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_condattr_destroy (pthread_condattr_t * attr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_condattr_getpshared (const pthread_condattr_t * attr,
-                                         int *pshared);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_condattr_setpshared (pthread_condattr_t * attr,
-                                         int pshared);
-
-/*
- * Condition Variable Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_cond_init (pthread_cond_t * cond,
-                               const pthread_condattr_t * attr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_cond_destroy (pthread_cond_t * cond);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_cond_wait (pthread_cond_t * cond,
-                               pthread_mutex_t * mutex);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_cond_timedwait (pthread_cond_t * cond,
-                                    pthread_mutex_t * mutex,
-                                    const struct timespec *abstime);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_cond_signal (pthread_cond_t * cond);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_cond_broadcast (pthread_cond_t * cond);
-
-/*
- * Scheduling
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_setschedparam (pthread_t thread,
-                                   int policy,
-                                   const struct sched_param *param);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_getschedparam (pthread_t thread,
-                                   int *policy,
-                                   struct sched_param *param);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_setconcurrency (int);
- 
-PTW32_DLLPORT int PTW32_CDECL pthread_getconcurrency (void);
-
-/*
- * Read-Write Lock Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_init(pthread_rwlock_t *lock,
-                                const pthread_rwlockattr_t *attr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_destroy(pthread_rwlock_t *lock);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_tryrdlock(pthread_rwlock_t *);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_trywrlock(pthread_rwlock_t *);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_rdlock(pthread_rwlock_t *lock);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_timedrdlock(pthread_rwlock_t *lock,
-                                       const struct timespec *abstime);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_wrlock(pthread_rwlock_t *lock);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_timedwrlock(pthread_rwlock_t *lock,
-                                       const struct timespec *abstime);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_unlock(pthread_rwlock_t *lock);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_init (pthread_rwlockattr_t * attr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_destroy (pthread_rwlockattr_t * attr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_getpshared (const pthread_rwlockattr_t * attr,
-                                           int *pshared);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_setpshared (pthread_rwlockattr_t * attr,
-                                           int pshared);
-
-#if PTW32_LEVEL >= PTW32_LEVEL_MAX - 1
-
-/*
- * Signal Functions. Should be defined in <signal.h> but MSVC and MinGW32
- * already have signal.h that don't define these.
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_kill(pthread_t thread, int sig);
-
-/*
- * Non-portable functions
- */
-
-/*
- * Compatibility with Linux.
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_setkind_np(pthread_mutexattr_t * attr,
-                                         int kind);
-PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_getkind_np(pthread_mutexattr_t * attr,
-                                         int *kind);
-
-/*
- * Possibly supported by other POSIX threads implementations
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_delay_np (struct timespec * interval);
-PTW32_DLLPORT int PTW32_CDECL pthread_num_processors_np(void);
-PTW32_DLLPORT unsigned __int64 PTW32_CDECL pthread_getunique_np(pthread_t thread);
-
-/*
- * Useful if an application wants to statically link
- * the lib rather than load the DLL at run-time.
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_win32_process_attach_np(void);
-PTW32_DLLPORT int PTW32_CDECL pthread_win32_process_detach_np(void);
-PTW32_DLLPORT int PTW32_CDECL pthread_win32_thread_attach_np(void);
-PTW32_DLLPORT int PTW32_CDECL pthread_win32_thread_detach_np(void);
-
-/*
- * Features that are auto-detected at load/run time.
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_win32_test_features_np(int);
-enum ptw32_features {
-  PTW32_SYSTEM_INTERLOCKED_COMPARE_EXCHANGE = 0x0001, /* System provides it. */
-  PTW32_ALERTABLE_ASYNC_CANCEL              = 0x0002  /* Can cancel blocked threads. */
-};
-
-/*
- * Register a system time change with the library.
- * Causes the library to perform various functions
- * in response to the change. Should be called whenever
- * the application's top level window receives a
- * WM_TIMECHANGE message. It can be passed directly to
- * pthread_create() as a new thread if desired.
- */
-PTW32_DLLPORT void * PTW32_CDECL pthread_timechange_handler_np(void *);
-
-#endif /*PTW32_LEVEL >= PTW32_LEVEL_MAX - 1 */
-
-#if PTW32_LEVEL >= PTW32_LEVEL_MAX
-
-/*
- * Returns the Win32 HANDLE for the POSIX thread.
- */
-PTW32_DLLPORT HANDLE PTW32_CDECL pthread_getw32threadhandle_np(pthread_t thread);
-/*
- * Returns the win32 thread ID for POSIX thread.
- */
-PTW32_DLLPORT DWORD PTW32_CDECL pthread_getw32threadid_np (pthread_t thread);
-
-
-/*
- * Protected Methods
- *
- * This function blocks until the given WIN32 handle
- * is signaled or pthread_cancel had been called.
- * This function allows the caller to hook into the
- * PThreads cancel mechanism. It is implemented using
- *
- *              WaitForMultipleObjects
- *
- * on 'waitHandle' and a manually reset WIN32 Event
- * used to implement pthread_cancel. The 'timeout'
- * argument to TimedWait is simply passed to
- * WaitForMultipleObjects.
- */
-PTW32_DLLPORT int PTW32_CDECL pthreadCancelableWait (HANDLE waitHandle);
-PTW32_DLLPORT int PTW32_CDECL pthreadCancelableTimedWait (HANDLE waitHandle,
-                                        DWORD timeout);
-
-#endif /* PTW32_LEVEL >= PTW32_LEVEL_MAX */
-
-/*
- * Thread-Safe C Runtime Library Mappings.
- */
-#if !defined(_UWIN)
-#  if defined(NEED_ERRNO)
-     PTW32_DLLPORT int * PTW32_CDECL _errno( void );
-#  else
-#    if !defined(errno)
-#      if (defined(_MT) || defined(_DLL))
-         __declspec(dllimport) extern int * __cdecl _errno(void);
-#        define errno   (*_errno())
-#      endif
-#    endif
-#  endif
-#endif
-
-/*
- * Some compiler environments don't define some things.
- */
-#if defined(__BORLANDC__)
-#  define _ftime ftime
-#  define _timeb timeb
-#endif
-
-#if defined(__cplusplus)
-
-/*
- * Internal exceptions
- */
-class ptw32_exception {};
-class ptw32_exception_cancel : public ptw32_exception {};
-class ptw32_exception_exit   : public ptw32_exception {};
-
-#endif
-
-#if PTW32_LEVEL >= PTW32_LEVEL_MAX
-
-/* FIXME: This is only required if the library was built using SEH */
-/*
- * Get internal SEH tag
- */
-PTW32_DLLPORT DWORD PTW32_CDECL ptw32_get_exception_services_code(void);
-
-#endif /* PTW32_LEVEL >= PTW32_LEVEL_MAX */
-
-#if !defined(PTW32_BUILD)
-
-#if defined(__CLEANUP_SEH)
-
-/*
- * Redefine the SEH __except keyword to ensure that applications
- * propagate our internal exceptions up to the library's internal handlers.
- */
-#define __except( E ) \
-        __except( ( GetExceptionCode() == ptw32_get_exception_services_code() ) \
-                 ? EXCEPTION_CONTINUE_SEARCH : ( E ) )
-
-#endif /* __CLEANUP_SEH */
-
-#if defined(__CLEANUP_CXX)
-
-/*
- * Redefine the C++ catch keyword to ensure that applications
- * propagate our internal exceptions up to the library's internal handlers.
- */
-#if defined(_MSC_VER)
-        /*
-         * WARNING: Replace any 'catch( ... )' with 'PtW32CatchAll'
-         * if you want Pthread-Win32 cancelation and pthread_exit to work.
-         */
-
-#if !defined(PtW32NoCatchWarn)
-
-#pragma message("Specify \"/DPtW32NoCatchWarn\" compiler flag to skip this message.")
-#pragma message("------------------------------------------------------------------")
-#pragma message("When compiling applications with MSVC++ and C++ exception handling:")
-#pragma message("  Replace any 'catch( ... )' in routines called from POSIX threads")
-#pragma message("  with 'PtW32CatchAll' or 'CATCHALL' if you want POSIX thread")
-#pragma message("  cancelation and pthread_exit to work. For example:")
-#pragma message("")
-#pragma message("    #if defined(PtW32CatchAll)")
-#pragma message("      PtW32CatchAll")
-#pragma message("    #else")
-#pragma message("      catch(...)")
-#pragma message("    #endif")
-#pragma message("        {")
-#pragma message("          /* Catchall block processing */")
-#pragma message("        }")
-#pragma message("------------------------------------------------------------------")
-
-#endif
-
-#define PtW32CatchAll \
-        catch( ptw32_exception & ) { throw; } \
-        catch( ... )
-
-#else /* _MSC_VER */
-
-#define catch( E ) \
-        catch( ptw32_exception & ) { throw; } \
-        catch( E )
-
-#endif /* _MSC_VER */
-
-#endif /* __CLEANUP_CXX */
-
-#endif /* ! PTW32_BUILD */
-
-#if defined(__cplusplus)
-}                               /* End of extern "C" */
-#endif                          /* __cplusplus */
-
-#if defined(PTW32__HANDLE_DEF)
-# undef HANDLE
-#endif
-#if defined(PTW32__DWORD_DEF)
-# undef DWORD
-#endif
-
-#undef PTW32_LEVEL
-#undef PTW32_LEVEL_MAX
-
-#endif /* ! RC_INVOKED */
-
-#endif /* PTHREAD_H */
diff --git a/pllrepo/src/queue.c b/pllrepo/src/queue.c
deleted file mode 100644
index eecf3fb..0000000
--- a/pllrepo/src/queue.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file queue.c
- */
-#include <stdio.h>
-#include "queue.h"
-#include "mem_alloc.h"
-
-int
-pllQueueInit (pllQueue ** q)
-{  
-  *q = (pllQueue *) rax_malloc (sizeof (pllQueue));
-  if (!*q) return (0);
-   
-  (*q)->head = NULL;
-  (*q)->tail = NULL;
-   
-  return (1);
-}  
-
-int 
-pllQueueSize (pllQueue * q)
-{  
-  int n = 0;
-  struct pllQueueItem * elm;
-   
-  if (!q) return (0);
-   
-  for (elm = q->head; elm; elm = elm->next) ++n;
-   
-  return (n);
-}  
-
-int
-pllQueueRemove (pllQueue * q, void ** item)
-{  
-  struct pllQueueItem * elm;
-   
-  if (!q || !q->head) return (0);
-   
-  elm = q->head;
-   
-  *item = elm->item;
-   
-  q->head = q->head->next;
-  if (!q->head)  q->tail = NULL;
-  rax_free (elm);
-   
-  return (1);
-}  
-
-int 
-pllQueueAppend (pllQueue * q, void * item)
-{ 
-  struct pllQueueItem * qitem;
-  if (!q) return (0);
-  
-  qitem = (struct pllQueueItem *) rax_malloc (sizeof (struct pllQueueItem));
-  if (!qitem) return (0);
-  
-  qitem->item = item;
-  qitem->next = NULL;
-  
-  if (!q->head) 
-    q->head = qitem;
-  else
-    q->tail->next = qitem;
-  
-  q->tail = qitem;
-
-  return (1);
-} 
diff --git a/pllrepo/src/queue.h b/pllrepo/src/queue.h
deleted file mode 100644
index b359c4a..0000000
--- a/pllrepo/src/queue.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file queue.h
- */
-#ifndef __pll_QUEUE__
-#define __pll_QUEUE__
-
-struct pllQueueItem
-{  
-  void * item;
-  struct pllQueueItem * next;
-}; 
-   
-typedef struct
-{  
-  struct pllQueueItem * head;
-  struct pllQueueItem * tail;
-} pllQueue; 
-
-int pllQueueInit (pllQueue ** q);
-int pllQueueSize (pllQueue * q);
-int pllQueueRemove (pllQueue * q, void ** item);
-int pllQueueAppend (pllQueue * q, void * item);
-#endif
diff --git a/pllrepo/src/randomTree.c b/pllrepo/src/randomTree.c
deleted file mode 100644
index c1d9af4..0000000
--- a/pllrepo/src/randomTree.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file randomTree.c
- */
-#include "mem_alloc.h"
-#include <math.h>
-#include <time.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <assert.h>
-
-#include "pll.h"
-#include "pllInternal.h"
-
-static void insertTaxon (nodeptr p, nodeptr q)
-{
-  nodeptr  r;
-  
-  r = q->back;
-  
-  hookupDefault(p->next,       q);
-  hookupDefault(p->next->next, r);
-} 
-
-static nodeptr buildNewTip (pllInstance *tr, nodeptr p)
-{ 
-  nodeptr  q;
-
-  q = tr->nodep[(tr->nextnode)++];
-  hookupDefault(p, q);
-  q->next->back = (nodeptr)NULL;
-  q->next->next->back = (nodeptr)NULL;
- 
-  return  q;
-} 
-
-static void buildSimpleTreeRandom (pllInstance *tr, int ip, int iq, int ir)
-{    
-  nodeptr  
-    p, 
-    s;
-  
-  int  
-    i;
-  
-  i = PLL_MIN(ip, iq);
-  if (ir < i)  i = ir; 
-  tr->start = tr->nodep[i];
-  tr->ntips = 3;
-  p = tr->nodep[ip];
-  
-  hookupDefault(p, tr->nodep[iq]);
-  
-  s = buildNewTip(tr, tr->nodep[ir]);
-  
-  insertTaxon(s, p);
-}
-
-static int randomInt(int n, pllInstance *tr)
-{
-  int 
-    res = (int)((double)(n) * randum(&tr->randomNumberSeed));
-
-  assert(res >= 0 && res < n);
-  
-  return res;
-}
-
-void makePermutation(int *perm, int n, pllInstance *tr)
-{    
-  int  
-    i, 
-    j, 
-    k;    
-
-  for (i = 1; i <= n; i++)    
-    perm[i] = i;               
-
-  for (i = 1; i <= n; i++) 
-    {    
-      k =  randomInt(n + 1 - i, tr); /*(int)((double)(n + 1 - i) * randum(&tr->randomNumberSeed));*/
-
-      assert(i + k <= n);
-      
-      j        = perm[i];
-      perm[i]     = perm[i + k];
-      perm[i + k] = j; 
-    }
-}
-
-static int markBranches(nodeptr *branches, nodeptr p, int *counter, int numsp)
-{
-  if(isTip(p->number, numsp))
-    return 0;
-  else
-    {
-      branches[*counter] = p->next;
-      branches[*counter + 1] = p->next->next;
-      
-      *counter = *counter + 2;
-      
-      return ((2 + markBranches(branches, p->next->back, counter, numsp) + 
-	       markBranches(branches, p->next->next->back, counter, numsp)));
-    }
-}
-
-
-
-void pllMakeRandomTree(pllInstance *tr)
-{  
-  nodeptr 
-    p, 
-    f, 
-    randomBranch,
-    *branches = (nodeptr *)rax_malloc(sizeof(nodeptr) * (2 * tr->mxtips));    
-  
-  int 
-    nextsp, 
-    *perm = (int *)rax_malloc((tr->mxtips + 1) * sizeof(int)), 
-    branchCounter;                      
-  
-  makePermutation(perm, tr->mxtips, tr);              
-  
-  tr->ntips = 0;       	       
-  tr->nextnode = tr->mxtips + 1;    
-  
-  buildSimpleTreeRandom(tr, perm[1], perm[2], perm[3]);
-  
-  while(tr->ntips < tr->mxtips) 
-    {	             
-      nextsp = ++(tr->ntips);             
-      p = tr->nodep[perm[nextsp]];            
-      
-      buildNewTip(tr, p);  	
-      
-      f = findAnyTip(tr->start, tr->mxtips);
-      f = f->back;
-      
-      branchCounter = 1;
-      branches[0] = f;
-      markBranches(branches, f, &branchCounter, tr->mxtips);
-
-      assert(branchCounter == ((2 * (tr->ntips - 1)) - 3));
-      
-      randomBranch = branches[randomInt(branchCounter, tr)];
-      
-      insertTaxon(p->back, randomBranch);
-    }
-  
-  rax_free(perm);            
-  rax_free(branches);
-}
-
diff --git a/pllrepo/src/recom.c b/pllrepo/src/recom.c
deleted file mode 100644
index 5ab20c7..0000000
--- a/pllrepo/src/recom.c
+++ /dev/null
@@ -1,689 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file recom.c
- * @brief Functions used for recomputation of vectors (only a fraction of LH vectors stored in RAM)   
- */
-#include "mem_alloc.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <string.h>
-#include <limits.h>
-#include <errno.h>
-#include <time.h>
-#include <math.h>
-#ifndef WIN32
-#include <sys/time.h>
-#endif
-#include "pll.h"
-#include "pllInternal.h"
-
-/** @brief Locks node \a nodenum to force it remains availably in memory
- *
- * @warning If a node is available we dont need to recompute it, but we neet to make sure it is not unpinned while buildding the rest of the traversal descriptor, i.e. unpinnable must be PLL_FALSE at this point, it will automatically be set to PLL_TRUE, after the counter post-order instructions have been executed 
-Omitting this call the traversal will likely still work as long as num_allocated_nodes >> log n, but wrong inner vectors will be used at the wrong moment of pllNewviewIterative, careful! 
- *
- *  @param rvec 
- *    Recomputation info
- *
- *  @param nodenum
- *    Node id that must remain available in memory 
- *
- *  @param mxtips
- *    Number of tips in the tree
- *
- */
-void protectNode(recompVectors *rvec, int nodenum, int mxtips)
-{
-
-  int slot;
-  slot = rvec->iNode[nodenum - mxtips - 1];
-  assert(slot != PLL_NODE_UNPINNED);
-  assert(rvec->iVector[slot] == nodenum);
-
-  if(rvec->unpinnable[slot])
-    rvec->unpinnable[slot] = PLL_FALSE;
-}
-
-/** @brief Checks if \a nodenum  is currently pinned (available in RAM)
- *
- *  @note shall we document static functions? 
- * 
- *  @param rvec 
- *    Recomputation info
- *
- *  @param nodenum
- *    Node id to be checked
- *
- *  @param mxtips
- *    Number of tips in the tree
- *
- */
-static pllBoolean isNodePinned(recompVectors *rvec, int nodenum, int mxtips)
-{
-  assert(nodenum > mxtips);
-
-  if(rvec->iNode[nodenum - mxtips - 1] == PLL_NODE_UNPINNED)
-    return PLL_FALSE;
-  else
-    return PLL_TRUE;
-}
-
-/** @brief Checks if the likelihood entries at node \a p should be updated
- *
- * A node needs update if one of the following holds:
- *    1. It is not oriented (p->x == 0) 
- *    2. We are applying recomputations and node \a p is not currently available in RAM
- *  
- *  @param recompute 
- *    PLL_TRUE if recomputation is currently applied 
- *
- *  @param p
- *    Node to check whether it is associated with the likelihood vector
- *
- *  @param mxtips
- *    Number of tips in the tree
- *
- */
-pllBoolean needsRecomp(pllBoolean recompute, recompVectors *rvec, nodeptr p, int mxtips)
-{ 
-  if((!p->x) || (recompute && !isNodePinned(rvec, p->number, mxtips)))
-    return PLL_TRUE;
-  else
-    return PLL_FALSE;
-}
-
-
-
-/** @brief Allocates memory for recomputation structure
- *  
- *  
- *  @todo this should not depend on tr (\a vectorRecomFraction should be a parameter)
- *    PLL_TRUE if recomputation is currently applied 
- *
- */
-void allocRecompVectorsInfo(pllInstance *tr)
-{
-  recompVectors 
-    *v = (recompVectors *) rax_malloc(sizeof(recompVectors));
-
-  int 
-    num_inner_nodes = tr->mxtips - 2,
-                    num_vectors, 
-                    i;
-
-  assert(tr->vectorRecomFraction > PLL_MIN_RECOM_FRACTION);
-  assert(tr->vectorRecomFraction < PLL_MAX_RECOM_FRACTION);
-
-  num_vectors = (int) (1 + tr->vectorRecomFraction * (float)num_inner_nodes); 
-
-  int theoretical_minimum_of_vectors = 3 + ((int)(log((double)tr->mxtips)/log(2.0)));
-  //printBothOpen("Try to use %d ancestral vectors, min required %d\n", num_vectors, theoretical_minimum_of_vectors);
-
-  assert(num_vectors >= theoretical_minimum_of_vectors);
-  assert(num_vectors < tr->mxtips);
-
-
-  v->numVectors = num_vectors; /* use minimum bound theoretical */
-
-  /* init vectors tracking */
-
-  v->iVector         = (int *) rax_malloc((size_t)num_vectors * sizeof(int));
-  v->unpinnable      = (pllBoolean *) rax_malloc((size_t)num_vectors * sizeof(pllBoolean));
-
-  for(i = 0; i < num_vectors; i++)
-  {
-    v->iVector[i]         = PLL_SLOT_UNUSED;
-    v->unpinnable[i]      = PLL_FALSE;
-  }
-
-  v->iNode      = (int *) rax_malloc((size_t)num_inner_nodes * sizeof(int));
-  v->stlen      = (int *) rax_malloc((size_t)num_inner_nodes * sizeof(int));
-
-  for(i = 0; i < num_inner_nodes; i++)
-  {
-    v->iNode[i] = PLL_NODE_UNPINNED;
-    v->stlen[i] = PLL_INNER_NODE_INIT_STLEN;
-  }
-
-  v->allSlotsBusy = PLL_FALSE;
-
-  /* init nodes tracking */
-
-  v->maxVectorsUsed = 0;
-  tr->rvec = v;
-}
-
-/** @brief Find the slot id with the minimum cost to be recomputed.
- *  
- *  The minum cost is defined as the minimum subtree size. In general, the closer a vector is to the tips, 
- *  the less recomputations are required to re-establish its likelihood entries
- *
- *  @todo remove _DEBUG_RECOMPUTATION code
- *  
- *  @param v
- *
- *  @param mxtips
- *    Number of tips in the tree
- *
- */
-static int findUnpinnableSlotByCost(recompVectors *v, int mxtips)
-{
-  int 
-    i, 
-    slot, 
-    cheapest_slot = -1, 
-    min_cost = mxtips * 2; /* more expensive than the most expensive*/
-#ifdef _DEBUG_RECOMPUTATION 
-  double straTime = gettime();
-#endif 
-
-
-  for(i = 0; i < mxtips - 2; i++)
-  {
-    slot = v->iNode[i];
-    if(slot != PLL_NODE_UNPINNED)
-    {
-      assert(slot >= 0 && slot < v->numVectors);
-
-      if(v->unpinnable[slot])
-      {
-        assert(v->stlen[i] > 0);
-
-        if(v->stlen[i] < min_cost)
-        {
-          min_cost = v->stlen[i];
-          cheapest_slot = slot;
-          /* if the slot costs 2 you can break cause there is nothing cheaper to recompute */
-          if(min_cost == 2)
-            break;
-        }
-      }
-    }
-  }
-  assert(min_cost < mxtips * 2 && min_cost >= 2);
-  assert(cheapest_slot >= 0);
-  return cheapest_slot;
-}
-
-static void unpinAtomicSlot(recompVectors *v, int slot, int mxtips)
-{
-  int 
-    nodenum = v->iVector[slot];
-
-  v->iVector[slot] = PLL_SLOT_UNUSED;
-
-  if(nodenum != PLL_SLOT_UNUSED)  
-    v->iNode[nodenum - mxtips - 1] = PLL_NODE_UNPINNED; 
-}
-
-/** @brief Finds the cheapest slot and unpins it
- *
- */
-static int findUnpinnableSlot(recompVectors *v, int mxtips)
-{
-  int     
-    slot_unpinned = findUnpinnableSlotByCost(v, mxtips);
-
-  assert(slot_unpinned >= 0);
-  assert(v->unpinnable[slot_unpinned]);
-
-  unpinAtomicSlot(v, slot_unpinned, mxtips);
-
-  return slot_unpinned;
-}
-
-/** @brief Finds a free slot 
- * 
- *  If all slots are occupied, it will find the cheapest slot and unpin it
- *
- */
-static int findFreeSlot(recompVectors *v, int mxtips)
-{
-  int 
-    slotno = -1, 
-           i;
-
-  assert(v->allSlotsBusy == PLL_FALSE);
-
-  for(i = 0; i < v->numVectors; i++)
-  {
-    if(v->iVector[i] == PLL_SLOT_UNUSED)
-    {
-      slotno = i;
-      break;
-    } 
-  }
-
-  if(slotno == -1)
-  {
-    v->allSlotsBusy = PLL_TRUE;
-    slotno = findUnpinnableSlot(v, mxtips);
-  }
-
-  return slotno;
-}
-
-
-/** @brief Pins node \a nodenum to slot \a slot
- *  
- *  The slot is initialized as non-unpinnable (ensures that the contents of the vector will not be overwritten)
- *
- *  @param nodenum
- *    node id
- *
- *  @param slot
- *    slot id 
- *    
- *  @param mxtips
- *    Number of tips in the tree
- *
- */
-static void pinAtomicNode(recompVectors *v, int nodenum, int slot, int mxtips)
-{
-  v->iVector[slot] = nodenum;
-  v->iNode[nodenum - mxtips - 1] = slot;
-  v->unpinnable[slot] = PLL_FALSE;
-}
-
-static int pinNode(recompVectors *rvec, int nodenum, int mxtips)
-{
-  int 
-    slot;
-
-  assert(!isNodePinned(rvec, nodenum, mxtips));
-
-  if(rvec->allSlotsBusy)
-    slot = findUnpinnableSlot(rvec, mxtips);
-  else
-    slot = findFreeSlot(rvec, mxtips);
-
-  assert(slot >= 0);
-
-  pinAtomicNode(rvec, nodenum, slot, mxtips);
-
-  if(slot > rvec->maxVectorsUsed)
-    rvec->maxVectorsUsed = slot;
-
-  assert(slot == rvec->iNode[nodenum - mxtips - 1]);
-
-  return slot;
-}
-
-/** @brief Marks node \a nodenum as unpinnable
- *  
- *  The slot holding the node \a nodenum is added to the pool of slot candidates that can be overwritten.
- *
- *  @param v
- *    Recomputation info
- *    
- *  @param nodenum
- *    node id
- *    
- *  @param mxtips
- *    Number of tips in the tree
- *
- */
-void unpinNode(recompVectors *v, int nodenum, int mxtips)
-{
-  if(nodenum <= mxtips)
-    return;
-  else
-  {
-    int 
-      slot = -1;
-
-    assert(nodenum > mxtips);
-    slot = v->iNode[nodenum-mxtips-1];
-    assert(slot >= 0 && slot < v->numVectors); 
-
-    if(slot >= 0 && slot < v->numVectors)
-      v->unpinnable[slot] = PLL_TRUE;
-  }
-}
-
-
-/** @brief Get a pinned slot \a slot that holds the likelihood vector for inner node \a nodenum
- *  
- *  If node \a node nodenum is not pinned to any slot yet, the minimum cost replacement strategy is used.
- *
- *  @param v
- *    Recomputation info
- *    
- *  @param nodenum
- *    node id
- *    
- *  @param slot
- *    slot id
- *
- *  @param mxtips
- *    Number of tips in the tree
- *
- */
-pllBoolean getxVector(recompVectors *rvec, int nodenum, int *slot, int mxtips)
-{
-  pllBoolean 
-    slotNeedsRecomp = PLL_FALSE;
-
-  *slot = rvec->iNode[nodenum - mxtips - 1];
-
-  if(*slot == PLL_NODE_UNPINNED)
-  {
-    *slot = pinNode(rvec, nodenum, mxtips); /* now we will run the replacement strategy */
-    slotNeedsRecomp = PLL_TRUE;
-  }
-
-  assert(*slot >= 0 && *slot < rvec->numVectors);
-
-  rvec->unpinnable[*slot] = PLL_FALSE;
-
-  return slotNeedsRecomp;
-}
-
-
-#ifdef _DEBUG_RECOMPUTATION
-
-static int subtreeSize(nodeptr p, int maxTips)
-{
-  if(isTip(p->number, maxTips))
-    return 1;
-  else   
-    return (subtreeSize(p->next->back, maxTips) + subtreeSize(p->next->next->back, maxTips));
-}
-
-#endif
-
-/** @brief Annotes unoriented tree nodes \a tr with their subtree size 
- *  
- *  This function recursively updates the subtree size of each inner node.
- *  @note The subtree size of node \a p->number is the number of nodes included in the subtree where node record \a p is the virtual root. 
- *
- *  @param p
- *    Pointer to node 
- *    
- *  @param maxTips
- *    Number of tips in the tree
- *
- *  @param rvec 
- *    Recomputation info
- *    
- *  @param count
- *    Number of visited nodes 
- */
-void computeTraversalInfoStlen(nodeptr p, int maxTips, recompVectors *rvec, int *count) 
-{
-  if(isTip(p->number, maxTips))
-    return;
-  else
-  {          
-    nodeptr 
-      q = p->next->back,
-        r = p->next->next->back;
-
-    *count += 1;
-    /* set xnode info at this point */     
-
-    if(isTip(r->number, maxTips) && isTip(q->number, maxTips))  
-    {
-      rvec->stlen[p->number - maxTips - 1] = 2;	
-
-#ifdef _DEBUG_RECOMPUTATION
-      assert(rvec->stlen[p->number - maxTips - 1] == subtreeSize(p, maxTips));
-#endif
-    }
-    else
-    {
-      if(isTip(r->number, maxTips) || isTip(q->number, maxTips))
-      {	     
-        nodeptr 
-          tmp;
-
-        if(isTip(r->number, maxTips))
-        {
-          tmp = r;
-          r = q;
-          q = tmp;
-        }
-
-        if(!r->x)
-          computeTraversalInfoStlen(r, maxTips, rvec, count);
-
-        rvec->stlen[p->number - maxTips - 1] = rvec->stlen[r->number - maxTips - 1] + 1;
-
-#ifdef _DEBUG_RECOMPUTATION	      
-        assert(rvec->stlen[p->number - maxTips - 1] == subtreeSize(p, maxTips));
-#endif
-      }
-      else
-      {		 
-        if(!r->x)
-          computeTraversalInfoStlen(r, maxTips, rvec, count);
-        if(!q->x)
-          computeTraversalInfoStlen(q, maxTips, rvec, count); 
-
-        rvec->stlen[p->number - maxTips - 1] = rvec->stlen[q->number - maxTips - 1] + rvec->stlen[r->number - maxTips - 1];	
-
-#ifdef _DEBUG_RECOMPUTATION
-        assert(rvec->stlen[p->number - maxTips - 1] == subtreeSize(p, maxTips));
-#endif
-      }
-    }
-  }
-}
-
-
-
-
-/* pre-compute the node stlens (this needs to be known prior to running the strategy) */
-/** @brief Annotes all tree nodes \a tr with their subtree size 
- *  
- *  Similar to \a computeTraversalInfoStlen, but does a full traversal ignoring orientation.
- *  The minum cost is defined as the minimum subtree size. In general, the closer a vector is to the tips, 
- *  the less recomputations are required to re-establish its likelihood entries
- *
- *  @param p
- *    Pointer to node 
- *    
- *  @param maxTips
- *    Number of tips in the tree
- *
- *  @param rvec 
- *    Recomputation info
- */
-void computeFullTraversalInfoStlen(nodeptr p, int maxTips, recompVectors *rvec) 
-{
-  if(isTip(p->number, maxTips))
-    return;
-  else
-  {    
-    nodeptr 
-      q = p->next->back,
-        r = p->next->next->back;     
-
-    if(isTip(r->number, maxTips) && isTip(q->number, maxTips))
-    {	  
-      rvec->stlen[p->number - maxTips - 1] = 2;
-
-#ifdef _DEBUG_RECOMPUTATION
-      assert(rvec->stlen[p->number - maxTips - 1] == subtreeSize(p, maxTips));
-#endif
-    }
-    else
-    {	    
-      if(isTip(r->number, maxTips) || isTip(q->number, maxTips))
-      {	  	      
-        nodeptr 
-          tmp;
-
-        if(isTip(r->number, maxTips))
-        {
-          tmp = r;
-          r = q;
-          q = tmp;
-        }
-
-        computeFullTraversalInfoStlen(r, maxTips, rvec);
-
-        rvec->stlen[p->number - maxTips - 1] = rvec->stlen[r->number - maxTips - 1] + 1;	   
-
-#ifdef _DEBUG_RECOMPUTATION
-        assert(rvec->stlen[p->number - maxTips - 1] == subtreeSize(p, maxTips));
-#endif
-      }
-      else
-      {	    	     	      
-        computeFullTraversalInfoStlen(r, maxTips, rvec);
-        computeFullTraversalInfoStlen(q, maxTips, rvec); 
-
-        rvec->stlen[p->number - maxTips - 1] = rvec->stlen[q->number - maxTips - 1] + rvec->stlen[r->number - maxTips - 1];
-#ifdef _DEBUG_RECOMPUTATION
-        assert(rvec->stlen[p->number - maxTips - 1] == subtreeSize(p, maxTips));
-#endif
-      }
-    }
-  }
-}
-
-
-#ifdef _DEBUG_RECOMPUTATION
-
-void allocTraversalCounter(pllInstance *tr)
-{
-  traversalCounter 
-    *tc;
-
-  int 
-    k;
-
-  tc = (traversalCounter *)rax_malloc(sizeof(traversalCounter));
-
-  tc->travlenFreq = (unsigned int *)rax_malloc(tr->mxtips * sizeof(int));
-
-  for(k = 0; k < tr->mxtips; k++)
-    tc->travlenFreq[k] = 0;
-
-  tc->tt = 0;
-  tc->ti = 0;
-  tc->ii = 0;
-  tc->numTraversals = 0;
-  tr->travCounter = tc;
-}
-
-/* recomp */
-/* code to track traversal descriptor stats */
-
-void countTraversal(pllInstance *tr)
-{
-  traversalInfo 
-    *ti   = tr->td[0].ti;
-  int i;
-  traversalCounter *tc = tr->travCounter; 
-  tc->numTraversals += 1;
-
-  /*
-  printBothOpen("trav #%d(%d):",tc->numTraversals, tr->td[0].count);
-  */
-
-  for(i = 1; i < tr->td[0].count; i++)
-  {
-    traversalInfo *tInfo = &ti[i];
-
-    /* 
-       printBothOpen(" %d q%d r%d |",  tInfo->pNumber, tInfo->qNumber, tInfo->rNumber);
-       printBothOpen("%d",  tInfo->pNumber);
-       */
-    switch(tInfo->tipCase)
-    {
-      case PLL_TIP_TIP: 
-        tc->tt++; 
-        /* printBothOpen("T"); */
-        break;		  
-      case PLL_TIP_INNER: 
-        tc->ti++; 
-        /* printBothOpen("M"); */
-        break;		  
-
-      case PLL_INNER_INNER: 
-        tc->ii++; 
-        /* printBothOpen("I"); */
-        break;		  
-      default: 
-        assert(0);
-    }
-    /* printBothOpen(" "); */
-  }
-  /* printBothOpen(" so far T %d, M %d, I %d \n", tc->tt, tc->ti,tc->ii); */
-  tc->travlenFreq[tr->td[0].count] += 1;
-}
-
-
-/*
-void printTraversalInfo(pllInstance *tr)
-{
-  int 
-    k, 
-    total_steps = 0;
-
-  printBothOpen("Traversals : %d \n", tr->travCounter->numTraversals);
-  printBothOpen("Traversals tt: %d \n", tr->travCounter->tt);
-  printBothOpen("Traversals ti: %d \n", tr->travCounter->ti);
-  printBothOpen("Traversals ii: %d \n", tr->travCounter->ii);
-  printBothOpen("all: %d \n", tr->travCounter->tt + tr->travCounter->ii + tr->travCounter->ti);
-  printBothOpen("Traversals len freq  : \n");
-  
-  for(k = 0; k < tr->mxtips; k++)
-  {
-    total_steps += tr->travCounter->travlenFreq[k] * (k - 1);
-    if(tr->travCounter->travlenFreq[k] > 0)
-      printBothOpen("len %d : %d\n", k, tr->travCounter->travlenFreq[k]);
-  }
-  printBothOpen("all steps: %d \n", total_steps);
-}
-*/
-/*end code to track traversal descriptor stats */
-/* E recomp */
-
-/*
-void printVector(double *vector, int len, char *name)
-{ 
-  int i;
-  printBothOpen("LHVECTOR %s :", name);
-  for(i=0; i < len; i++)
-  {
-    printBothOpen("%.2f ", vector[i]);
-    if(i>10)
-    {
-      printBothOpen("...");
-      break; 
-    }
-  } 
-  printBothOpen("\n");
-} 
-*/
-
-#endif
-
diff --git a/pllrepo/src/restartHashTable.c b/pllrepo/src/restartHashTable.c
deleted file mode 100644
index 007e247..0000000
--- a/pllrepo/src/restartHashTable.c
+++ /dev/null
@@ -1,357 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file bipartitionList.c
- */
-#include "mem_alloc.h"
-
-#ifndef WIN32
-#include <sys/times.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <unistd.h> 
-#endif
-
-#include <math.h>
-#include <time.h> 
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <assert.h>
-
-#include "pll.h"
-#include "pllInternal.h"
-
-/*
-static pllBoolean treeNeedString(const char *fp, char c1, int *position)
-{
-  char 
-    c2 = fp[(*position)++];
-  
-  if(c2 == c1)  
-    return PLL_TRUE;
-  else  
-    {   
-      int 
-	lower = PLL_MAX(0, *position - 20),
-	upper = *position + 20;
-      
-      printf("Tree Parsing ERROR: Expecting '%c', found: '%c'\n", c1, c2); 
-      printf("Context: \n");
-      
-      while(lower < upper && fp[lower])
-	printf("%c", fp[lower++]);
-      
-      printf("\n");
-
-      return PLL_FALSE;
-  }
-} 
-
-
-static pllBoolean treeLabelEndString (char ch)
-{
-  switch(ch) 
-    {   
-    case '\0':  
-    case '\t':  
-    case '\n':  
-    case '\r': 
-    case ' ':
-    case ':':  
-    case ',':   
-    case '(':   
-    case ')':  
-    case ';':
-      return PLL_TRUE;
-    default:
-      break;
-    }
-  
-  return PLL_FALSE;
-} 
-
-static pllBoolean  treeGetLabelString (const char *fp, char *lblPtr, int maxlen, int *position)
-{
-  char 
-    ch;
-  
-  pllBoolean  
-    done, 
-    lblfound;
-
-  if (--maxlen < 0) 
-    lblPtr = (char *)NULL; 
-  else 
-    if(lblPtr == NULL) 
-      maxlen = 0;
-
-  ch = fp[(*position)++];
-  
-  done = treeLabelEndString(ch);
-
-  lblfound = !done;  
-
-  while(!done) 
-    {      
-      if(treeLabelEndString(ch)) 
-	break;     
-
-      if(--maxlen >= 0) 
-	*lblPtr++ = ch;
-      
-      ch = fp[(*position)++];      
-    }
-  
-  (*position)--; 
-
-  if (lblPtr != NULL) 
-    *lblPtr = '\0';
-
-  return lblfound;
-}
-
-static pllBoolean  treeFlushLabelString(const char *fp, int *position)
-{ 
-  return  treeGetLabelString(fp, (char *) NULL, (int) 0, position);
-} 
-
-static pllBoolean treeProcessLengthString (const char *fp, double *dptr, int *position)
-{ 
-  (*position)++;
-  
-  if(sscanf(&fp[*position], "%lf", dptr) != 1) 
-    {
-      printf("ERROR: treeProcessLength: Problem reading branch length\n");     
-      assert(0);
-    }
-
-  while(fp[*position] != ',' && fp[*position] != ')' && fp[*position] != ';')
-    *position = *position + 1;
-  
-  return  PLL_TRUE;
-}
-
-static int treeFlushLenString (const char *fp, int *position)
-{
-  double  
-    dummy;  
-  
-  char     
-    ch;
-
-  ch = fp[(*position)++];
- 
-  if(ch == ':') 
-    {     
-      if(!treeProcessLengthString(fp, &dummy, position)) 
-	return 0;
-      return 1;	  
-    }
-    
-  (*position)--;
-
-  return 1;
-} 
-
-static int treeFindTipByLabelString(char  *str, pllInstance *tr)                    
-{
-  int lookup = lookupWord(str, tr->nameHash);
-
-  if(lookup > 0)
-    {
-      assert(! tr->nodep[lookup]->back);
-      return lookup;
-    }
-  else
-    { 
-      printf("ERROR: Cannot find tree species: %s\n", str);
-      return  0;
-    }
-}
-
-static int treeFindTipNameString (const char *fp, pllInstance *tr, int *position)
-{
-  char    str[PLL_NMLNGTH + 2];
-  int      n;
-
-  if (treeGetLabelString (fp, str, PLL_NMLNGTH + 2, position))
-    n = treeFindTipByLabelString(str, tr);
-  else
-    n = 0;
-   
-  return  n;
-} 
-
-static pllBoolean addElementLenString(const char *fp, pllInstance *tr, nodeptr p, int *position)
-{
-  nodeptr  
-    q;
-  
-  int      
-    n, 
-    fres;
-
-  char 
-    ch;
-  
-  if ((ch = fp[(*position)++]) == '(') 
-    { 
-      n = (tr->nextnode)++;
-      if (n > 2*(tr->mxtips) - 2) 
-	{
-	  if (tr->rooted || n > 2*(tr->mxtips) - 1) 
-	    {
-	      printf("ERROR: Too many internal nodes.  Is tree rooted?\n");
-	      printf("       Deepest splitting should be a trifurcation.\n");
-	      return PLL_FALSE;
-	    }
-	  else 
-	    {	   
-	      tr->rooted = PLL_TRUE;
-	    }
-	}
-      
-      q = tr->nodep[n];
-
-      if (!addElementLenString(fp, tr, q->next, position))        
-	return PLL_FALSE;
-      if (!treeNeedString(fp, ',', position))             
-	return PLL_FALSE;
-      if (!addElementLenString(fp, tr, q->next->next, position))  
-	return PLL_FALSE;
-      if (!treeNeedString(fp, ')', position))             
-	return PLL_FALSE;
-      
-     
-      treeFlushLabelString(fp, position);
-    }
-  else 
-    {   
-      (*position)--;
-     
-      if ((n = treeFindTipNameString(fp, tr, position)) <= 0)          
-	return PLL_FALSE;
-      q = tr->nodep[n];
-      
-      if (tr->start->number > n)  
-	tr->start = q;
-      (tr->ntips)++;
-    }
-  
-     
-  fres = treeFlushLenString(fp, position);
-  if(!fres) 
-    return PLL_FALSE;
-  
-  hookupDefault(p, q);
-
-  return PLL_TRUE;          
-}
-
-
-
-void treeReadTopologyString(char *treeString, pllInstance *tr)
-{ 
-  char 
-    *fp = treeString;
-
-  nodeptr  
-    p;
-  
-  int
-    position = 0, 
-    i;
-  
-  char 
-    ch;   
-    
-
-  for(i = 1; i <= tr->mxtips; i++)    
-    tr->nodep[i]->back = (node *)NULL;      
-  
-  for(i = tr->mxtips + 1; i < 2 * tr->mxtips; i++)
-    {
-      tr->nodep[i]->back = (nodeptr)NULL;
-      tr->nodep[i]->next->back = (nodeptr)NULL;
-      tr->nodep[i]->next->next->back = (nodeptr)NULL;
-      tr->nodep[i]->number = i;
-      tr->nodep[i]->next->number = i;
-      tr->nodep[i]->next->next->number = i;           
-    }
-      
-  tr->start       = tr->nodep[1];
-  tr->ntips       = 0;
-  tr->nextnode    = tr->mxtips + 1;    
-  tr->rooted      = PLL_FALSE;      
-  
-  p = tr->nodep[(tr->nextnode)++]; 
-   
-  assert(fp[position++] == '(');  
-    
-  if (! addElementLenString(fp, tr, p, &position))                 
-    assert(0);
-  
-  if (! treeNeedString(fp, ',', &position))                
-    assert(0);
-   
-  if (! addElementLenString(fp, tr, p->next, &position))           
-    assert(0);
-
-  if(!tr->rooted) 
-    {
-      if ((ch = fp[position++]) == ',') 
-	{ 
-	  if (! addElementLenString(fp, tr, p->next->next, &position)) 
-	    assert(0);	 
-	}
-      else 
-	assert(0);     
-    }
-  else
-    assert(0);
-        
-  if (! treeNeedString(fp, ')', &position))                
-    assert(0);
-
-  treeFlushLabelString(fp, &position);
-  
-  if (!treeFlushLenString(fp, &position))                         
-    assert(0);
-  
-  if (!treeNeedString(fp, ';', &position))       
-    assert(0);
-    
-  if(tr->rooted)     
-    assert(0);           
-  else           
-    tr->start = tr->nodep[1];   
-
-  printf("Tree parsed\n");
-
-} 
-*/
diff --git a/pllrepo/src/sched.h b/pllrepo/src/sched.h
deleted file mode 100644
index f36a97a..0000000
--- a/pllrepo/src/sched.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Module: sched.h
- *
- * Purpose:
- *      Provides an implementation of POSIX realtime extensions
- *      as defined in 
- *
- *              POSIX 1003.1b-1993      (POSIX.1b)
- *
- * --------------------------------------------------------------------------
- *
- *      Pthreads-win32 - POSIX Threads Library for Win32
- *      Copyright(C) 1998 John E. Bossom
- *      Copyright(C) 1999,2005 Pthreads-win32 contributors
- * 
- *      Contact Email: rpj at callisto.canberra.edu.au
- * 
- *      The current list of contributors is contained
- *      in the file CONTRIBUTORS included with the source
- *      code distribution. The list can also be seen at the
- *      following World Wide Web location:
- *      http://sources.redhat.com/pthreads-win32/contributors.html
- * 
- *      This library is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU Lesser General Public
- *      License as published by the Free Software Foundation; either
- *      version 2 of the License, or (at your option) any later version.
- * 
- *      This library is distributed in the hope that it will be useful,
- *      but WITHOUT ANY WARRANTY; without even the implied warranty of
- *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *      Lesser General Public License for more details.
- * 
- *      You should have received a copy of the GNU Lesser General Public
- *      License along with this library in the file COPYING.LIB;
- *      if not, write to the Free Software Foundation, Inc.,
- *      59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- */
-#if !defined(_SCHED_H)
-#define _SCHED_H
-
-#undef PTW32_SCHED_LEVEL
-
-#if defined(_POSIX_SOURCE)
-#define PTW32_SCHED_LEVEL 0
-/* Early POSIX */
-#endif
-
-#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309
-#undef PTW32_SCHED_LEVEL
-#define PTW32_SCHED_LEVEL 1
-/* Include 1b, 1c and 1d */
-#endif
-
-#if defined(INCLUDE_NP)
-#undef PTW32_SCHED_LEVEL
-#define PTW32_SCHED_LEVEL 2
-/* Include Non-Portable extensions */
-#endif
-
-#define PTW32_SCHED_LEVEL_MAX 3
-
-#if ( defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112 )  || !defined(PTW32_SCHED_LEVEL)
-#define PTW32_SCHED_LEVEL PTW32_SCHED_LEVEL_MAX
-/* Include everything */
-#endif
-
-
-#if defined(__GNUC__) && !defined(__declspec)
-# error Please upgrade your GNU compiler to one that supports __declspec.
-#endif
-
-/*
- * When building the library, you should define PTW32_BUILD so that
- * the variables/functions are exported correctly. When using the library,
- * do NOT define PTW32_BUILD, and then the variables/functions will
- * be imported correctly.
- */
-#if !defined(PTW32_STATIC_LIB)
-#  if defined(PTW32_BUILD)
-#    define PTW32_DLLPORT __declspec (dllexport)
-#  else
-#    define PTW32_DLLPORT __declspec (dllimport)
-#  endif
-#else
-#  define PTW32_DLLPORT
-#endif
-
-/*
- * This is a duplicate of what is in the autoconf config.h,
- * which is only used when building the pthread-win32 libraries.
- */
-
-#if !defined(PTW32_CONFIG_H)
-#  if defined(WINCE)
-#    define NEED_ERRNO
-#    define NEED_SEM
-#  endif
-#  if defined(__MINGW64__)
-#    define HAVE_STRUCT_TIMESPEC
-#    define HAVE_MODE_T
-#  elif defined(_UWIN) || defined(__MINGW32__)
-#    define HAVE_MODE_T
-#  endif
-#endif
-
-/*
- *
- */
-
-#if PTW32_SCHED_LEVEL >= PTW32_SCHED_LEVEL_MAX
-#if defined(NEED_ERRNO)
-#include "need_errno.h"
-#else
-#include <errno.h>
-#endif
-#endif /* PTW32_SCHED_LEVEL >= PTW32_SCHED_LEVEL_MAX */
-
-#if (defined(__MINGW64__) || defined(__MINGW32__)) || defined(_UWIN)
-# if PTW32_SCHED_LEVEL >= PTW32_SCHED_LEVEL_MAX
-/* For pid_t */
-#  include <sys/types.h>
-/* Required by Unix 98 */
-#  include <time.h>
-# else
-   typedef int pid_t;
-# endif
-#else
- typedef int pid_t;
-#endif
-
-/* Thread scheduling policies */
-
-enum {
-  SCHED_OTHER = 0,
-  SCHED_FIFO,
-  SCHED_RR,
-  SCHED_MIN   = SCHED_OTHER,
-  SCHED_MAX   = SCHED_RR
-};
-
-struct sched_param {
-  int sched_priority;
-};
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif                          /* __cplusplus */
-
-PTW32_DLLPORT int __cdecl sched_yield (void);
-
-PTW32_DLLPORT int __cdecl sched_get_priority_min (int policy);
-
-PTW32_DLLPORT int __cdecl sched_get_priority_max (int policy);
-
-PTW32_DLLPORT int __cdecl sched_setscheduler (pid_t pid, int policy);
-
-PTW32_DLLPORT int __cdecl sched_getscheduler (pid_t pid);
-
-/*
- * Note that this macro returns ENOTSUP rather than
- * ENOSYS as might be expected. However, returning ENOSYS
- * should mean that sched_get_priority_{min,max} are
- * not implemented as well as sched_rr_get_interval.
- * This is not the case, since we just don't support
- * round-robin scheduling. Therefore I have chosen to
- * return the same value as sched_setscheduler when
- * SCHED_RR is passed to it.
- */
-#define sched_rr_get_interval(_pid, _interval) \
-  ( errno = ENOTSUP, (int) -1 )
-
-
-#if defined(__cplusplus)
-}                               /* End of extern "C" */
-#endif                          /* __cplusplus */
-
-#undef PTW32_SCHED_LEVEL
-#undef PTW32_SCHED_LEVEL_MAX
-
-#endif                          /* !_SCHED_H */
-
diff --git a/pllrepo/src/searchAlgo.c b/pllrepo/src/searchAlgo.c
deleted file mode 100644
index c638d48..0000000
--- a/pllrepo/src/searchAlgo.c
+++ /dev/null
@@ -1,3310 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file searchAlgo.c
- * @brief Collection of routines for performing likelihood computation and branch optimization.
- *
- * Detailed description to appear soon.
- */
-#include "mem_alloc.h"
-
-#ifndef WIN32
-#include <sys/times.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <unistd.h> 
-#endif
-
-#include <math.h>
-#include <time.h> 
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <assert.h>
-#include <errno.h>
-
-#include "pll.h"
-#include "pllInternal.h"
-
-typedef struct bInf {
-  double likelihood;
-  nodeptr node;
-} bestInfo;
-
-typedef struct iL {
-  bestInfo *list;
-  int n;
-  int valid;
-} infoList;
-
-double treeOptimizeRapid(pllInstance *tr, partitionList *pr, int mintrav, int maxtrav, bestlist *bt, infoList *iList);
-nniMove getBestNNIForBran(pllInstance* tr, partitionList *pr, nodeptr p, double curLH);
-void evalNNIForSubtree(pllInstance* tr, partitionList *pr, nodeptr p, nniMove* nniList, int* cnt, int* cnt_nni, double curLH);
-
-
-static int cmp_nni(const void* nni1, const void* nni2);
-static void pllTraverseUpdate (pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q, int mintrav, int maxtrav, pllRearrangeList * bestList);
-static int pllStoreRearrangement (pllRearrangeList * bestList, pllRearrangeInfo * rearr);
-static int pllTestInsertBIG (pllInstance * tr, partitionList * pr, nodeptr p, nodeptr q, pllRearrangeList * bestList);
-static int pllTestSPR (pllInstance * tr, partitionList * pr, nodeptr p, int mintrav, int maxtrav, pllRearrangeList * bestList);
-static void pllCreateSprInfoRollback (pllInstance * tr, pllRearrangeInfo * rearr, int numBranches);
-static void pllCreateNniInfoRollback (pllInstance * tr, pllRearrangeInfo * rearr);
-static void pllCreateRollbackInfo (pllInstance * tr, pllRearrangeInfo * rearr, int numBranches);
-static void pllRollbackNNI (pllInstance * tr, partitionList * pr, pllRollbackInfo * ri);
-static void pllRollbackSPR (partitionList * pr, pllRollbackInfo * ri);
-
-extern partitionLengths pLengths[PLL_MAX_MODEL];
-
-pllBoolean initrav (pllInstance *tr, partitionList *pr, nodeptr p)
-{ 
-  nodeptr  q;
-
-  if (!isTip(p->number, tr->mxtips)) 
-  {      
-    q = p->next;
-
-    do 
-    {	   
-      if (! initrav(tr, pr, q->back))  return PLL_FALSE;
-      q = q->next;	
-    } 
-    while (q != p);  
-
-    pllUpdatePartials(tr, pr, p, PLL_FALSE);
-  }
-
-  return PLL_TRUE;
-} 
-
-
-/** @brief Optimize the length of a specific branch
-
-    Optimize the length of the branch connecting \a p and \a p->back
-    for each partition (\a tr->numBranches) in library instance \a tr.
- 
-    @param tr
-      The library instance
-
-    @param pr
-      Partition list
- 
-    @param p
-      Endpoints of branch to be optimized 
-*/
-void update(pllInstance *tr, partitionList *pr, nodeptr p)
-{       
-  nodeptr  q; 
-  int i;
-  double   z[PLL_NUM_BRANCHES], z0[PLL_NUM_BRANCHES];
-  int numBranches = pr->perGeneBranchLengths ? pr->numberOfPartitions : 1;
-
-  #ifdef _DEBUG_UPDATE
-    double 
-      startLH;
-  
-    pllEvaluateLikelihood (tr, p);
-  
-    startLH = tr->likelihood;
-  #endif
-
-  q = p->back;   
-
-  for(i = 0; i < numBranches; i++)
-    z0[i] = q->z[i];    
-
-  if(numBranches > 1)
-    makenewzGeneric(tr, pr, p, q, z0, PLL_NEWZPERCYCLE, z, PLL_TRUE);
-  else
-    makenewzGeneric(tr, pr, p, q, z0, PLL_NEWZPERCYCLE, z, PLL_FALSE);
-
-  for(i = 0; i < numBranches; i++)
-  {         
-    if(!tr->partitionConverged[i])
-    {	  
-      if(PLL_ABS(z[i] - z0[i]) > PLL_DELTAZ)  
-      {	      
-        tr->partitionSmoothed[i] = PLL_FALSE;
-      }	 
-
-      p->z[i] = q->z[i] = z[i];	 
-    }
-  }
- 
-  #ifdef _DEBUG_UPDATE
-    pllEvaluateLikelihood (tr, p);
-  
-    if(tr->likelihood <= startLH)
-      {
-        if(fabs(tr->likelihood - startLH) > 0.01)
-  	{
-  	  printf("%f %f\n", startLH, tr->likelihood);
-  	  assert(0);      
-  	}
-      }
-  #endif
-}
-
-/** @brief Branch length optimization of subtree
-
-    Optimize the length of branch connected by \a p and \a p->back, and the
-    lengths of all branches in the subtrees rooted at \a p->next and \a p->next->next
-
-    @param tr
-      The library instance
-
-    @param pr
-      Partition list
-
-    @param p
-      Endpoint of branches to be optimized
-*/
-void smooth (pllInstance *tr, partitionList *pr, nodeptr p)
-{
-  nodeptr  q;
-  int numBranches = pr->perGeneBranchLengths?pr->numberOfPartitions:1;
-
-  update(tr, pr, p);    /*  Adjust branch */
-
-  if (! isTip(p->number, tr->mxtips)) 
-  {                                  /*  Adjust descendants */
-    q = p->next;
-    while (q != p) 
-    {
-      smooth(tr, pr, q->back);
-      q = q->next;
-    }	
-
-    if(numBranches > 1 && !tr->useRecom)
-      pllUpdatePartials(tr, pr,p, PLL_TRUE);
-    else
-      pllUpdatePartials(tr, pr,p, PLL_FALSE);
-  }
-} 
-
-/**  @brief Check whether the branches in all partitions have been optimized
- 
-     Check if all branches in all partitions have reached the threshold for
-     optimization. If at least one branch can be optimized further return \b PLL_FALSE.
-
-     @param tr
-       The library instance 
-
-     @return
-       If at least one branch can be further optimized return \b PLL_FALSE,
-       otherwise \b PLL_TRUE.
-             
-*/
-static pllBoolean allSmoothed(pllInstance *tr, int numBranches)
-{
-  int i;
-  pllBoolean result = PLL_TRUE;
-
-  for(i = 0; i < numBranches; i++)
-  {
-    if(tr->partitionSmoothed[i] == PLL_FALSE)
-      result = PLL_FALSE;
-    else
-      tr->partitionConverged[i] = PLL_TRUE;
-  }
-
-  return result;
-}
-
-
-/** @brief Optimize all branch lenghts of a tree
-  
-    Perform \a maxtimes rounds of branch length optimization by running smooth()
-    on all neighbour nodes of node \a tr->start.
-
-    @param tr
-      The library instance
-
-    @param maxtimes
-      Number of optimization rounds to perform
-*/
-/* do maxtimes rounds of branch length optimization */
-void smoothTree (pllInstance *tr, partitionList *pr, int maxtimes)
-{
-	nodeptr  p, q;
-	int i, count = 0;
-    int numBranches = pr->perGeneBranchLengths?pr->numberOfPartitions:1;
-
-	p = tr->start;
-	for(i = 0; i < numBranches; i++)
-		tr->partitionConverged[i] = PLL_FALSE;
-
-	while (--maxtimes >= 0)
-	{
-		for(i = 0; i < numBranches; i++)
-			tr->partitionSmoothed[i] = PLL_TRUE;
-
-		smooth(tr, pr, p->back);
-		if (!isTip(p->number, tr->mxtips))
-		{
-			q = p->next;
-			while (q != p)
-			{
-				smooth(tr, pr, q->back);
-				q = q->next;
-			}
-		}
-		count++;
-
-		if (allSmoothed(tr, numBranches)) break;
-	}
-
-	for(i = 0; i < numBranches; i++)
-		tr->partitionConverged[i] = PLL_FALSE;
-} 
-
-
-/** @brief Optimize the branch length of edges around a specific node
-    
-    Optimize \a maxtimes the branch length of all (3) edges around a given node 
-    \a p of the tree of library instance \a tr.
-
-    @param tr
-      The library instance
-
-    @param p
-      The node around which to optimize the edges
-
-    @param maxtimes
-      Number of optimization rounds to perform
-*/
-void localSmooth (pllInstance *tr, partitionList *pr, nodeptr p, int maxtimes)
-{ 
-  nodeptr  q;
-  int i;
-  int numBranches = pr->perGeneBranchLengths ? pr->numberOfPartitions : 1;
-  if (isTip(p->number, tr->mxtips)) return;
-
-  for(i = 0; i < PLL_NUM_BRANCHES; i++)
-    tr->partitionConverged[i] = PLL_FALSE;	
-
-  while (--maxtimes >= 0) 
-  {     
-    for(i = 0; i < PLL_NUM_BRANCHES; i++)
-      tr->partitionSmoothed[i] = PLL_TRUE;
-
-    q = p;
-    do 
-    {
-      update(tr, pr, q);
-      q = q->next;
-    } 
-    while (q != p);
-
-    if (allSmoothed(tr, numBranches))
-      break;
-  }
-
-  for(i = 0; i < PLL_NUM_BRANCHES; i++)
-  {
-    tr->partitionSmoothed[i] = PLL_FALSE; 
-    tr->partitionConverged[i] = PLL_FALSE;
-  }
-}
-
-
-
-
-/** @brief Reset an \a infoList
-
-    Resets an \a infoList by setting elements \a node and \a likelihood
-    of each element of the \a bestInfo list structure to \b NULL and
-    \a PLL_UNLIKELY, respectively.
-
-    @param iList
-      The given \a infoList.
-*/
-static void resetInfoList(infoList *iList)
-{
-  int 
-    i;
-
-  iList->valid = 0;
-
-  for(i = 0; i < iList->n; i++)    
-  {
-    iList->list[i].node = (nodeptr)NULL;
-    iList->list[i].likelihood = PLL_UNLIKELY;
-  }    
-}
-
-/** @brief Initialize an \a infoList
-
-    Initialize an \a infoList by creating a \a bestInfo list structure
-    of \a n elements and setting the attributes \a node and \a likelihood
-    of each element of the \a bestInfo list structure to \b NULL and
-    \a PLL_UNLIKELY, respectively.
-
-    @param iList
-      The given \a infoList.
-
-    @param n
-      Number of elements to be created in the \a bestInfo list.
-*/
-static void initInfoList(infoList *iList, int n)
-{
-  int 
-    i;
-
-  iList->n = n;
-  iList->valid = 0;
-  iList->list = (bestInfo *)rax_malloc(sizeof(bestInfo) * (size_t)n);
-
-  for(i = 0; i < n; i++)
-  {
-    iList->list[i].node = (nodeptr)NULL;
-    iList->list[i].likelihood = PLL_UNLIKELY;
-  }
-}
-
-/** @brief Deallocate the contents of an \a infoList
-    
-    Deallocate the contents of a given \a infoList by freeing
-    the memory used by its \a bestInfo list structure.
-
-    @param iList
-      The \a infoList to be used.
-*/
-static void freeInfoList(infoList *iList)
-{ 
-  rax_free(iList->list);   
-}
-
-
-/** @brief Insert a record in an \a infoList
-
-    Insert the pair \a likelihood and \node into list \a iList 
-    \b only if there already exists a pair in \a iList 
-    whose \a likelihood attribute is smaller than the given \a 
-    likelihood. The insertion is done by replacing the smallest
-    likelihood pair with the new pair.
-
-    @param node
-      The given node
-
-    @param likelihood
-      The given likelihood
-
-    @param iList
-      The given \a infoList where the record will possibly be appended.
-*/
-static void insertInfoList(nodeptr node, double likelihood, infoList *iList)
-{
-  int 
-    i,
-    min = 0;
-
-  double 
-    min_l =  iList->list[0].likelihood;
-
-  for(i = 1; i < iList->n; i++)
-  {
-    if(iList->list[i].likelihood < min_l)
-    {
-      min = i;
-      min_l = iList->list[i].likelihood;
-    }
-  }
-
-  if(likelihood > min_l)
-  {
-    iList->list[min].likelihood = likelihood;
-    iList->list[min].node = node;
-    if(iList->valid < iList->n)
-      iList->valid += 1;
-  }
-}
-
-
-/** @brief  Optimize branch lengths of region
-
-    Optimize the branch lenghts of only a specific region. The branch optimization starts
-    at a node \a p and is carried out in all nodes with distance upto \a region edges from 
-    \a p.
-
-    @param tr
-      The library instance.
-    
-    @param p
-      Node to start branch optimization from.
-
-    @param region
-      The allowed node distance from \p for which to still perform branch optimization.
-*/
-void smoothRegion (pllInstance *tr, partitionList *pr, nodeptr p, int region)
-{ 
-  nodeptr  q;
-
-  update(tr, pr, p);   /* Adjust branch */
-
-  if (region > 0)
-  {
-    if (!isTip(p->number, tr->mxtips)) 
-    {                                 
-      q = p->next;
-      while (q != p) 
-      {
-        smoothRegion(tr, pr, q->back, --region);
-        q = q->next;
-      }	
-
-      pllUpdatePartials(tr, pr,p, PLL_FALSE);
-    }
-  }
-}
-
-/** @brief Wrapper function for optimizing the branch length of a region \a maxtimes times
-
-    Optimize the branch lengths of a specific region \a maxtimes times. The branch optimization
-    starts at a given node \a p and is carried out in all nodes with distance upto \a region
-    from \a p.
-
-    @param tr
-      The library instance.
-
-    @param p
-      Node to start branch optimization from.
-
-    @param maxtimes
-      Number of times to perform branch optimization.
-
-    @param region
-      The allwed node distance from \p for which to still perform branch optimization.
-
-    @todo
-      In the previous version (before the model-sep merge) the loops were controlled by tr->numBranches,
-      and now they are controlled by a constant PLL_NUM_BRANCHES. What is right?
-*/
-void regionalSmooth (pllInstance *tr, partitionList *pr, nodeptr p, int maxtimes, int region)
-{
-  nodeptr  q;
-  int i;
-  int numBranches = pr->perGeneBranchLengths?pr->numberOfPartitions:1;
-
-  if (isTip(p->number, tr->mxtips)) return;            /* Should be an error */
-
-  for(i = 0; i < PLL_NUM_BRANCHES; i++)
-    tr->partitionConverged[i] = PLL_FALSE;
-
-  while (--maxtimes >= 0) 
-  {	
-    for(i = 0; i < PLL_NUM_BRANCHES; i++)
-      tr->partitionSmoothed[i] = PLL_TRUE;
-
-    q = p;
-    do 
-    {
-      smoothRegion(tr, pr, q, region);
-      q = q->next;
-    } 
-    while (q != p);
-
-    if (allSmoothed(tr, numBranches))
-      break;
-  }
-
-  for(i = 0; i < PLL_NUM_BRANCHES; i++) {
-    tr->partitionSmoothed[i] = PLL_FALSE;
-    tr->partitionConverged[i] = PLL_FALSE;
-  }
-} 
-
-
-
-
-/** @brief Split the tree into two components and optimize new branch length
-
-   Split the tree into two components. The disconnection point is node \a p.
-   First, a branch length is computed for the newly created branch between nodes
-   \a p->next->back and \a p->next->next->back and then the two nodes are
-   connected (hookup). Disconnection is done by setting \a p->next->next->back
-   and \a p->next->back to \b NULL.
-
-   @param tr
-     The library instance
-
-   @param p
-     The node at which the tree should be decomposed into two components.
-
-   @param numBranches
-     Number of branches per partition
-
-   @return
-     Node from the disconnected component
-
-   @todo
-     Why do we return this node?
-
-   @image html removeBIG.png "The diagram shows in blue color the new edge that is created and in red the edges that are removed"
-*/
-nodeptr  removeNodeBIG (pllInstance *tr, partitionList *pr, nodeptr p, int numBranches)
-{  
-//  double   zqr[numBranches], result[numBranches];
-  double*   zqr = rax_malloc(numBranches*sizeof(double)), *result = rax_malloc(numBranches*sizeof(double));
-  nodeptr  q, r;
-  int i;
-
-  q = p->next->back;
-  r = p->next->next->back;
-
-  for(i = 0; i < numBranches; i++)
-    zqr[i] = q->z[i] * r->z[i];        
-
-  makenewzGeneric(tr, pr, q, r, zqr, PLL_ITERATIONS, result, PLL_FALSE);
-
-  for(i = 0; i < numBranches; i++)        
-    tr->zqr[i] = result[i];
-
-  hookup(q, r, result, numBranches); 
-
-  p->next->next->back = p->next->back = (node *) NULL;
-
-  rax_free(result);
-  rax_free(zqr);
-  return  q; 
-}
-
-/** @brief Split the tree into two components and recompute likelihood
-
-    Split the tree into two component. The disconnection point is node \a p.
-    Set the branch length of the new node between \a p->next->back and
-    \a p->next->next->back to \a tr->currentZQR and then decompose the tree
-    into two components by setting \a p->next->back and \a p->next->next->back
-    to \b NULL.
-
-    @param tr
-      The library instance
-
-    @param p
-      The node at which the tree should be decomposed into two components.
-
-    @return q
-      the node after \a p
-
-    @todo
-      Why do we return this node? Why do we set to tr->currentZQR and not compute
-      new optimized length? What is tr->currentZQR? 
-*/
-nodeptr  removeNodeRestoreBIG (pllInstance *tr, partitionList *pr, nodeptr p)
-{
-  nodeptr  q, r;
-
-  q = p->next->back;
-  r = p->next->next->back;  
-
-  pllUpdatePartials(tr, pr,q, PLL_FALSE);
-  pllUpdatePartials(tr, pr,r, PLL_FALSE);
-
-  hookup(q, r, tr->currentZQR, pr->perGeneBranchLengths?pr->numberOfPartitions:1);
-
-  p->next->next->back = p->next->back = (node *) NULL;
-
-  return  q;
-}
-
-/** @brief Connect two disconnected tree components
-   
-   Connect two disconnected components by specifying an internal edge from one
-   component and a leaf from the other component. The internal edge \a e is the
-   edge between \a q and \a q->back. The leaf is specified by \a p.
-   Edge \a e is removed and two new edges are created. The first one is an edge
-   between \a p->next and \a q, and the second one is between \a p->next->next
-   and \a q->back. The new likelihood vector for node \a p is computed.
-
-   @note The function makes use of the \a thoroughInsertion flag
-
-   @todo
-     What is tr->lzi ? What is thorough insertion? Why do we optimize branch lengths
-     that will be removed? Add explanation
-
-   @image html pll.png "The diagram shows in blue colors the new edges that are created and in red the edge that is removed" 
-*/
-pllBoolean insertBIG (pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q)
-{
-  nodeptr  r, s;
-  int i;
-  int numBranches = pr->perGeneBranchLengths?pr->numberOfPartitions:1;
-
-  r = q->back;
-  s = p->back;
-
-  for(i = 0; i < numBranches; i++)
-    tr->lzi[i] = q->z[i];
-
-  if(tr->thoroughInsertion)
-  { 
-	  double * zqr = rax_malloc(numBranches*sizeof(double)), 
-		  *zqs = rax_malloc(numBranches*sizeof(double)), 
-		  *zrs = rax_malloc(numBranches*sizeof(double));
-	  double lzqr, lzqs, lzrs, lzsum, lzq, lzr, lzs, lzmax;
-    double *defaultArray=rax_malloc(numBranches*sizeof(double));
-	double *e1 = rax_malloc(numBranches*sizeof(double)),
-		*e2 = rax_malloc(numBranches*sizeof(double)),
-		*e3 = rax_malloc(numBranches*sizeof(double));
-    double *qz;
-
-    qz = q->z;
-
-    for(i = 0; i < numBranches; i++)
-      defaultArray[i] = PLL_DEFAULTZ;
-
-    makenewzGeneric(tr, pr, q, r, qz, PLL_ITERATIONS, zqr, PLL_FALSE);
-    /* the branch lengths values will be estimated using q, r and s
-     * q-s are not connected, but both q and s have a valid LH vector , so we can call makenewzGeneric  to get a value for
-     * lzsum, which is then use to generate reasonable starting values e1, e2, e3 for the new branches we create after the       insertion
-     */
-
-    makenewzGeneric(tr, pr, q, s, defaultArray, PLL_ITERATIONS, zqs, PLL_FALSE);
-    makenewzGeneric(tr, pr, r, s, defaultArray, PLL_ITERATIONS, zrs, PLL_FALSE);
-
-
-    for(i = 0; i < numBranches; i++)
-    {
-      lzqr = (zqr[i] > PLL_ZMIN) ? log(zqr[i]) : log(PLL_ZMIN); 
-      lzqs = (zqs[i] > PLL_ZMIN) ? log(zqs[i]) : log(PLL_ZMIN);
-      lzrs = (zrs[i] > PLL_ZMIN) ? log(zrs[i]) : log(PLL_ZMIN);
-      lzsum = 0.5 * (lzqr + lzqs + lzrs);
-
-      lzq = lzsum - lzrs;
-      lzr = lzsum - lzqs;
-      lzs = lzsum - lzqr;
-      lzmax = log(PLL_ZMAX);
-
-      if      (lzq > lzmax) {lzq = lzmax; lzr = lzqr; lzs = lzqs;} 
-      else if (lzr > lzmax) {lzr = lzmax; lzq = lzqr; lzs = lzrs;}
-      else if (lzs > lzmax) {lzs = lzmax; lzq = lzqs; lzr = lzrs;}          
-
-      e1[i] = exp(lzq);
-      e2[i] = exp(lzr);
-      e3[i] = exp(lzs);
-    }
-    hookup(p->next,       q, e1, numBranches);
-    hookup(p->next->next, r, e2, numBranches);
-    hookup(p,             s, e3, numBranches);      		  
-	rax_free(e3);
-	rax_free(e2);
-	rax_free(e1);
-	rax_free(defaultArray);
-	rax_free(zrs);
-	rax_free(zqs);
-	rax_free(zqr);
-
-  }
-  else
-  {       
-	  double  *z = rax_malloc(numBranches*sizeof(double));
-
-    for(i = 0; i < numBranches; i++)
-    {
-      z[i] = sqrt(q->z[i]);      
-
-      if(z[i] < PLL_ZMIN) 
-        z[i] = PLL_ZMIN;
-      if(z[i] > PLL_ZMAX)
-        z[i] = PLL_ZMAX;
-    }
-
-    hookup(p->next,       q, z, numBranches);
-    hookup(p->next->next, r, z, numBranches);
-	rax_free(z);
-  }
-
-  pllUpdatePartials(tr, pr,p, PLL_FALSE);
-
-  if(tr->thoroughInsertion)
-  {     
-    localSmooth(tr, pr, p, PLL_MAX_LOCAL_SMOOTHING_ITERATIONS);
-    for(i = 0; i < numBranches; i++)
-    {
-      tr->lzq[i] = p->next->z[i];
-      tr->lzr[i] = p->next->next->z[i];
-      tr->lzs[i] = p->z[i];            
-    }
-  }           
-
-  return  PLL_TRUE;
-}
-
-/** @brief Connect two disconnected tree components without optimizing branch lengths
-   
-   Connect two disconnected components by specifying an internal edge from one
-   component and a leaf from the other component. The internal edge \a e is the
-   edge between \a q and \a q->back. The leaf is specified by \a p.
-   Edge \a e is removed and two new edges are created. The first one is an edge
-   between \a p->next and \a q, and the second one is between \a p->next->next
-   and \a q->back. The new likelihood vector for node \a p is computed.
-
-   @note The function makes use of the \a thoroughInsertion flag
-
-   @todo
-     What is the difference between this and insertBIG? 
-*/
-pllBoolean insertRestoreBIG (pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q)
-{
-  nodeptr  r, s;
-
-  r = q->back;
-  s = p->back;
-
-  int numBranches = pr->perGeneBranchLengths?pr->numberOfPartitions:1;
-
-  if(tr->thoroughInsertion)
-  {                        
-    hookup(p->next,       q, tr->currentLZQ, numBranches);
-    hookup(p->next->next, r, tr->currentLZR, numBranches);
-    hookup(p,             s, tr->currentLZS, numBranches);
-  }
-  else
-  {       
-    double  z[PLL_NUM_BRANCHES];
-    int i;
-
-    for(i = 0; i < numBranches; i++)
-    {
-      double zz;
-      zz = sqrt(q->z[i]);     
-      if(zz < PLL_ZMIN) 
-        zz = PLL_ZMIN;
-      if(zz > PLL_ZMAX)
-        zz = PLL_ZMAX;
-      z[i] = zz;
-    }
-
-    hookup(p->next,       q, z, numBranches);
-    hookup(p->next->next, r, z, numBranches);
-  }   
-
-  pllUpdatePartials(tr, pr,p, PLL_FALSE);
-
-  return  PLL_TRUE;
-}
-
-
-static void restoreTopologyOnly(pllInstance *tr, bestlist *bt, int numBranches)
-{ 
-  nodeptr p = tr->removeNode;
-  nodeptr q = tr->insertNode;
-  double qz[PLL_NUM_BRANCHES], pz[PLL_NUM_BRANCHES], p1z[PLL_NUM_BRANCHES], p2z[PLL_NUM_BRANCHES];
-  nodeptr p1, p2, r, s;
-  double currentLH = tr->likelihood;
-  int i;
-
-  p1 = p->next->back;
-  p2 = p->next->next->back;
-
-  //memcpy(p1z, p1->z, numBranches*sizeof(double));
-  //memcpy(p2z, p2->z, numBranches*sizeof(double));
-  //memcpy(qz, q->z, numBranches*sizeof(double));
-  //memcpy(pz, p->z, numBranches*sizeof(double));
-  for(i = 0; i < numBranches; i++)
-  {
-    p1z[i] = p1->z[i];
-    p2z[i] = p2->z[i];
-  }
-
-  hookup(p1, p2, tr->currentZQR, numBranches);
-
-  p->next->next->back = p->next->back = (node *) NULL;             
-  for(i = 0; i < numBranches; i++)
-  {
-    qz[i] = q->z[i];
-    pz[i] = p->z[i];
-  }
-
-  r = q->back;
-  s = p->back;
-
-  if(tr->thoroughInsertion)
-  {                        
-    hookup(p->next,       q, tr->currentLZQ, numBranches);
-    hookup(p->next->next, r, tr->currentLZR, numBranches);
-    hookup(p,             s, tr->currentLZS, numBranches);
-  }
-  else
-  { 	
-    double  z[PLL_NUM_BRANCHES];	
-    for(i = 0; i < numBranches; i++)
-    {
-      z[i] = sqrt(q->z[i]);      
-      if(z[i] < PLL_ZMIN)
-        z[i] = PLL_ZMIN;
-      if(z[i] > PLL_ZMAX)
-        z[i] = PLL_ZMAX;
-    }
-    hookup(p->next,       q, z, numBranches);
-    hookup(p->next->next, r, z, numBranches);
-  }     
-
-  tr->likelihood = tr->bestOfNode;
-
-  saveBestTree(bt, tr, numBranches);
-
-  tr->likelihood = currentLH;
-
-  hookup(q, r, qz, numBranches);
-
-  p->next->next->back = p->next->back = (nodeptr) NULL;
-
-  if(tr->thoroughInsertion)    
-    hookup(p, s, pz, numBranches);
-
-  hookup(p->next,       p1, p1z, numBranches);
-  hookup(p->next->next, p2, p2z, numBranches);
-}
-
-/** @brief Test the 
-*/
-pllBoolean testInsertBIG (pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q)
-{
-
-  int numBranches = pr->perGeneBranchLengths?pr->numberOfPartitions:1;
-
-  double  qz[PLL_NUM_BRANCHES], pz[PLL_NUM_BRANCHES];
-  nodeptr  r;
-  double startLH = tr->endLH;
-  int i;
-
-  r = q->back; 
-  for(i = 0; i < numBranches; i++)
-  {
-    qz[i] = q->z[i];
-    pz[i] = p->z[i];
-  }
-
-  if (! insertBIG(tr, pr, p, q))       return PLL_FALSE;
-
-  pllEvaluateLikelihood (tr, pr, p->next->next, PLL_FALSE, PLL_FALSE);
-
-  if(tr->likelihood > tr->bestOfNode)
-  {
-    tr->bestOfNode = tr->likelihood;
-    tr->insertNode = q;
-    tr->removeNode = p;   
-    for(i = 0; i < numBranches; i++)
-    {
-      tr->currentZQR[i] = tr->zqr[i];           
-      tr->currentLZR[i] = tr->lzr[i];
-      tr->currentLZQ[i] = tr->lzq[i];
-      tr->currentLZS[i] = tr->lzs[i];      
-    }
-  }
-
-  if(tr->likelihood > tr->endLH)
-  {			  
-    tr->insertNode = q;
-    tr->removeNode = p;   
-    for(i = 0; i < numBranches; i++)
-      tr->currentZQR[i] = tr->zqr[i];      
-    tr->endLH = tr->likelihood;                      
-  }        
-
-  /* reset the topology so that it is the same as it was before calling insertBIG */
-  hookup(q, r, qz, numBranches);
-
-  p->next->next->back = p->next->back = (nodeptr) NULL;
-
-  if(tr->thoroughInsertion)
-  {
-    nodeptr s = p->back;
-    hookup(p, s, pz, numBranches);
-  } 
-
-  if((tr->doCutoff) && (tr->likelihood < startLH))
-  {
-    tr->lhAVG += (startLH - tr->likelihood);
-    tr->lhDEC++;
-    if((startLH - tr->likelihood) >= tr->lhCutoff)
-      return PLL_FALSE;	    
-    else
-      return PLL_TRUE;
-  }
-  else
-    return PLL_TRUE;
-}
-
-
-/** @brief Recursively traverse tree and test insertion
-
-    Recursively traverses the tree structure starting from node \a q and
-    tests the insertion of the component specified by leaf \a p at the edge
-    between \a q and \a q->back.
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-    @param p
-      Leaf node of one tree component
-
-    @param q
-      Endpoint node of the edge to test the insertion
-
-    @param mintrav
-      Minimum radius around \a q to test the insertion
-
-    @param maxtrav
-      Maximum radius around \a q to test the insertion\
-*/
-void addTraverseBIG(pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q, int mintrav, int maxtrav)
-{  
-  if (--mintrav <= 0) 
-  {              
-    if (! testInsertBIG(tr, pr, p, q))  return;
-
-  }
-
-  if ((!isTip(q->number, tr->mxtips)) && (--maxtrav > 0)) 
-  {    
-    addTraverseBIG(tr, pr, p, q->next->back, mintrav, maxtrav);
-    addTraverseBIG(tr, pr, p, q->next->next->back, mintrav, maxtrav);
-  }
-} 
-
-
-
-
-/** @brief  Compute the  best SPR movement
-
-    Compute all SPR moves starting from \a p in the space defined by \a mintrav and
-    \a maxtrav and store the best in the \a tr structure.
-
-    @param tr
-      PLL instancve
-
-    @param pr
-      List of partitions
-
-    @param p
-      Node from which to start the SPR moves testing
-
-    @param mintrav
-      Minimum distance from \a p where to start testing SPRs
-
-    @param maxtrav
-      Maximum distance from \a p where to test SPRs
-
-    @return
-       0,1 or \b PLL_BADREAR
-
-    @todo
-      fix the return value
-*/
-int rearrangeBIG(pllInstance *tr, partitionList *pr, nodeptr p, int mintrav, int maxtrav)
-{  
-  double   p1z[PLL_NUM_BRANCHES], p2z[PLL_NUM_BRANCHES], q1z[PLL_NUM_BRANCHES], q2z[PLL_NUM_BRANCHES];
-  nodeptr  p1, p2, q, q1, q2;
-  int      mintrav2, i;  
-  pllBoolean doP = PLL_TRUE, doQ = PLL_TRUE;
-  int numBranches = pr->perGeneBranchLengths ? pr->numberOfPartitions : 1;
-
-  if (maxtrav < 1 || mintrav > maxtrav)  return (0);
-  q = p->back;
-
-
-
-
-  if (!isTip(p->number, tr->mxtips) && doP) 
-  {     
-    p1 = p->next->back;
-    p2 = p->next->next->back;
-
-
-    if(!isTip(p1->number, tr->mxtips) || !isTip(p2->number, tr->mxtips))
-    {
-      for(i = 0; i < numBranches; i++)
-      {
-        p1z[i] = p1->z[i];
-        p2z[i] = p2->z[i];	   	   
-      }
-
-      if (! removeNodeBIG(tr, pr, p,  numBranches)) return PLL_BADREAR;
-
-      if (!isTip(p1->number, tr->mxtips)) 
-      {
-        addTraverseBIG(tr, pr, p, p1->next->back,
-            mintrav, maxtrav);         
-
-        addTraverseBIG(tr, pr, p, p1->next->next->back,
-            mintrav, maxtrav);          
-      }
-
-      if (!isTip(p2->number, tr->mxtips)) 
-      {
-        addTraverseBIG(tr, pr, p, p2->next->back,
-            mintrav, maxtrav);
-        addTraverseBIG(tr, pr, p, p2->next->next->back,
-            mintrav, maxtrav);          
-      }
-
-      hookup(p->next,       p1, p1z, numBranches);
-      hookup(p->next->next, p2, p2z, numBranches);
-      pllUpdatePartials(tr, pr,p, PLL_FALSE);
-    }
-  }  
-
-  if (!isTip(q->number, tr->mxtips) && maxtrav > 0 && doQ) 
-  {
-    q1 = q->next->back;
-    q2 = q->next->next->back;
-
-    /*if (((!q1->tip) && (!q1->next->back->tip || !q1->next->next->back->tip)) ||
-      ((!q2->tip) && (!q2->next->back->tip || !q2->next->next->back->tip))) */
-    if (
-        (
-         ! isTip(q1->number, tr->mxtips) && 
-         (! isTip(q1->next->back->number, tr->mxtips) || ! isTip(q1->next->next->back->number, tr->mxtips))
-        )
-        ||
-        (
-         ! isTip(q2->number, tr->mxtips) && 
-         (! isTip(q2->next->back->number, tr->mxtips) || ! isTip(q2->next->next->back->number, tr->mxtips))
-        )
-       )
-    {
-
-      for(i = 0; i < numBranches; i++)
-      {
-        q1z[i] = q1->z[i];
-        q2z[i] = q2->z[i];
-      }
-
-      if (! removeNodeBIG(tr, pr, q, numBranches)) return PLL_BADREAR;
-
-      mintrav2 = mintrav > 2 ? mintrav : 2;
-
-      if (/*! q1->tip*/ !isTip(q1->number, tr->mxtips)) 
-      {
-        addTraverseBIG(tr, pr, q, q1->next->back,
-            mintrav2 , maxtrav);
-        addTraverseBIG(tr, pr, q, q1->next->next->back,
-            mintrav2 , maxtrav);         
-      }
-
-      if (/*! q2->tip*/ ! isTip(q2->number, tr->mxtips)) 
-      {
-        addTraverseBIG(tr, pr, q, q2->next->back,
-            mintrav2 , maxtrav);
-        addTraverseBIG(tr, pr, q, q2->next->next->back,
-            mintrav2 , maxtrav);          
-      }	   
-
-      hookup(q->next,       q1, q1z, numBranches);
-      hookup(q->next->next, q2, q2z, numBranches);
-
-      pllUpdatePartials(tr, pr,q, PLL_FALSE);
-    }
-  } 
-
-  return  1;
-} 
-
-
-
-
-/** @brief Perform an SPR move?
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param mintrav
-
-    @param maxtrav
-
-    @param adef
-
-    @param bt
-
-    @param iList
-
-*/
-double treeOptimizeRapid(pllInstance *tr, partitionList *pr, int mintrav, int maxtrav, bestlist *bt, infoList *iList)
-{
-  int i, index,
-      *perm = (int*)NULL;   
-
-  nodeRectifier(tr);
-
-
-
-  if (maxtrav > tr->mxtips - 3)  
-    maxtrav = tr->mxtips - 3;  
-
-
-
-  resetInfoList(iList);
-
-  resetBestTree(bt);
-
-  tr->startLH = tr->endLH = tr->likelihood;
-
-  if(tr->doCutoff)
-  {
-    if(tr->bigCutoff)
-    {	  
-      if(tr->itCount == 0)    
-        tr->lhCutoff = 0.5 * (tr->likelihood / -1000.0);    
-      else    		 
-        tr->lhCutoff = 0.5 * ((tr->lhAVG) / ((double)(tr->lhDEC))); 	  
-    }
-    else
-    {
-      if(tr->itCount == 0)    
-        tr->lhCutoff = tr->likelihood / -1000.0;    
-      else    		 
-        tr->lhCutoff = (tr->lhAVG) / ((double)(tr->lhDEC));   
-    }    
-
-    tr->itCount = tr->itCount + 1;
-    tr->lhAVG = 0;
-    tr->lhDEC = 0;
-  }
-
-  /*
-     printf("DoCutoff: %d\n", tr->doCutoff);
-     printf("%d %f %f %f\n", tr->itCount, tr->lhAVG, tr->lhDEC, tr->lhCutoff);
-
-     printf("%d %d\n", mintrav, maxtrav);
-     */
-
-  for(i = 1; i <= tr->mxtips + tr->mxtips - 2; i++)
-  {           
-    tr->bestOfNode = PLL_UNLIKELY;          
-
-    if(tr->permuteTreeoptimize)
-      index = perm[i];
-    else
-      index = i;     
-
-    if(rearrangeBIG(tr, pr, tr->nodep[index], mintrav, maxtrav))
-    {    
-      if(tr->thoroughInsertion)
-      {
-        if(tr->endLH > tr->startLH)                 	
-        {			   
-          /* commit the best SPR found by rearrangeBIG */
-          restoreTreeFast(tr, pr);    
-          tr->startLH = tr->endLH = tr->likelihood;	 
-          saveBestTree(bt, tr, pr->perGeneBranchLengths?pr->numberOfPartitions:1);
-        }
-        else
-        { 		  
-          if(tr->bestOfNode != PLL_UNLIKELY)
-            restoreTopologyOnly(tr, bt, pr->perGeneBranchLengths?pr->numberOfPartitions:1);
-        }	   
-      }
-      else
-      {
-        insertInfoList(tr->nodep[index], tr->bestOfNode, iList);	    
-        if(tr->endLH > tr->startLH)                 	
-        {		      
-          restoreTreeFast(tr, pr);
-          tr->startLH = tr->endLH = tr->likelihood;	  	 	  	  	  	  	  	  
-        }	    	  
-      }
-    }     
-  }     
-
-  if(!tr->thoroughInsertion)
-  {           
-    tr->thoroughInsertion = PLL_TRUE;  
-
-    for(i = 0; i < iList->valid; i++)
-    { 	  
-      tr->bestOfNode = PLL_UNLIKELY;
-
-      if(rearrangeBIG(tr, pr, iList->list[i].node, mintrav, maxtrav))
-      {	  
-        if(tr->endLH > tr->startLH)                 	
-        {	 	     
-          restoreTreeFast(tr, pr);
-          tr->startLH = tr->endLH = tr->likelihood;	 
-          saveBestTree(bt, tr, pr->perGeneBranchLengths?pr->numberOfPartitions:1);
-        }
-        else
-        { 
-
-          if(tr->bestOfNode != PLL_UNLIKELY)
-          {	     
-            restoreTopologyOnly(tr, bt, pr->perGeneBranchLengths?pr->numberOfPartitions:1);
-          }	
-        }      
-      }
-    }       
-
-    tr->thoroughInsertion = PLL_FALSE;
-  }
-
-  if(tr->permuteTreeoptimize)
-    rax_free(perm);
-
-  return tr->startLH;     
-}
-
-
-
-
-pllBoolean testInsertRestoreBIG (pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q)
-{    
-  if(tr->thoroughInsertion)
-  {
-    if (! insertBIG(tr, pr, p, q))       return PLL_FALSE;
-
-    pllEvaluateLikelihood (tr, pr, p->next->next, PLL_FALSE, PLL_FALSE);
-  }
-  else
-  {
-    if (! insertRestoreBIG(tr, pr, p, q))       return PLL_FALSE;
-
-    {
-      nodeptr x, y;
-      x = p->next->next;
-      y = p->back;
-
-      if(! isTip(x->number, tr->mxtips) && isTip(y->number, tr->mxtips))
-      {
-        while ((! x->x)) 
-        {
-          if (! (x->x))
-            pllUpdatePartials(tr, pr,x, PLL_FALSE);
-        }
-      }
-
-      if(isTip(x->number, tr->mxtips) && !isTip(y->number, tr->mxtips))
-      {
-        while ((! y->x)) 
-        {		  
-          if (! (y->x))
-            pllUpdatePartials(tr, pr,y, PLL_FALSE);
-        }
-      }
-
-      if(!isTip(x->number, tr->mxtips) && !isTip(y->number, tr->mxtips))
-      {
-        while ((! x->x) || (! y->x)) 
-        {
-          if (! (x->x))
-            pllUpdatePartials(tr, pr,x, PLL_FALSE);
-          if (! (y->x))
-            pllUpdatePartials(tr, pr,y, PLL_FALSE);
-        }
-      }				      	
-
-    }
-
-    tr->likelihood = tr->endLH;
-  }
-
-  return PLL_TRUE;
-} 
-
-void restoreTreeFast(pllInstance *tr, partitionList *pr)
-{
-  removeNodeRestoreBIG(tr, pr, tr->removeNode);
-  testInsertRestoreBIG(tr, pr, tr->removeNode, tr->insertNode);
-}
-
-/*
-static void myfwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream)
-{
-  size_t  
-    bytes_written = fwrite(ptr, size, nmemb, stream);
-
-  assert(bytes_written == nmemb);
-}
-
-static void myfread(void *ptr, size_t size, size_t nmemb, FILE *stream)
-{
-  size_t
-    bytes_read;
-
-  bytes_read = fread(ptr, size, nmemb, stream);
-
-  assert(bytes_read == nmemb);
-}
-
-static void readTree(pllInstance *tr, partitionList *pr, FILE *f)
-{
-  int 
-    nodeNumber,   
-    x = tr->mxtips + 3 * (tr->mxtips - 1);
-
-  nodeptr
-    startAddress;
-
-  myfread(&nodeNumber, sizeof(int), 1, f);
-
-  tr->start = tr->nodep[nodeNumber];
-
-
-  myfread(&startAddress, sizeof(nodeptr), 1, f);
-
-  myfread(tr->nodeBaseAddress, sizeof(node), x, f);
-
-  {
-    int i;    
-
-    size_t         
-      offset;
-
-    pllBoolean 
-      addIt;
-
-    if(startAddress > tr->nodeBaseAddress)
-    {
-      addIt = PLL_FALSE;
-      offset = (size_t)startAddress - (size_t)tr->nodeBaseAddress;
-    }
-    else
-    {
-      addIt = PLL_TRUE;
-      offset = (size_t)tr->nodeBaseAddress - (size_t)startAddress;
-    }       
-
-    for(i = 0; i < x; i++)
-    {      	
-      if(addIt)
-      {	    
-        tr->nodeBaseAddress[i].next = (nodeptr)((size_t)tr->nodeBaseAddress[i].next + offset);	
-        tr->nodeBaseAddress[i].back = (nodeptr)((size_t)tr->nodeBaseAddress[i].back + offset);
-      }
-      else
-      {
-
-        tr->nodeBaseAddress[i].next = (nodeptr)((size_t)tr->nodeBaseAddress[i].next - offset);	
-        tr->nodeBaseAddress[i].back = (nodeptr)((size_t)tr->nodeBaseAddress[i].back - offset);	   
-      } 
-    }
-
-  }
-
-  pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-
-  printBothOpen("RAxML Restart with likelihood: %1.50f\n", tr->likelihood);
-}
-
-static void readCheckpoint(pllInstance *tr, partitionList *pr)
-{
-  int  
-    restartErrors = 0,
-                  model; 
-
-  FILE 
-    *f = myfopen(binaryCheckpointInputName, "r");
-*/
-  /* cdta */   
-/*
-  myfread(&(tr->ckp), sizeof(checkPointState), 1, f);
-
-
-
-  if(tr->ckp.searchConvergenceCriterion != tr->searchConvergenceCriterion)
-  {
-    printf("restart error, you are trying to re-start a run where the ML search criterion was turned %s\n", (tr->ckp.searchConvergenceCriterion)?"ON":"OFF");
-    restartErrors++;
-  }  
-
-  if(tr->ckp.rateHetModel !=  tr->rateHetModel)
-  {
-    printf("restart error, you are trying to re-start a run with a different model of rate heterogeneity, the checkpoint was obtained under: %s\n", (tr->ckp.rateHetModel == PLL_GAMMA)?"GAMMA":"PSR");
-    restartErrors++;
-  }  
-
-  if(tr->ckp.maxCategories !=  tr->maxCategories)
-  {
-    printf("restart error, you are trying to re-start a run with %d per-site rate categories, the checkpoint was obtained with: %d\n", tr->maxCategories, tr->ckp.maxCategories);
-    restartErrors++;
-  }
-
-  if(tr->ckp.NumberOfModels != pr->numberOfPartitions)
-  {
-    printf("restart error, you are trying to re-start a run with %d partitions, the checkpoint was obtained with: %d partitions\n", (int)pr->numberOfPartitions, tr->ckp.NumberOfModels);
-    restartErrors++;      
-  }
-
-  if(tr->ckp.numBranches != pr->perGeneBranchLengths?pr->numberOfPartitions:1)
-  {
-    printf("restart error, you are trying to re-start a run where independent per-site branch length estimates were turned %s\n", (tr->ckp.numBranches > 1)?"ON":"OFF");
-    restartErrors++;
-  }
-
-  if(tr->ckp.originalCrunchedLength != tr->originalCrunchedLength)
-  {
-    printf("restart error, you are trying to re-start a run with %d site patterns, the checkpoint was obtained with: %d site patterns\n", tr->ckp.originalCrunchedLength, tr->originalCrunchedLength);
-    restartErrors++; 
-  }
-
-  if(tr->ckp.mxtips != tr->mxtips)
-  {
-    printf("restart error, you are trying to re-start a run with %d taxa, the checkpoint was obtained with: %d taxa\n", tr->mxtips, tr->ckp.mxtips);
-    restartErrors++; 
-  }
-
-  if(strcmp(tr->ckp.seq_file, seq_file) != 0)
-  {
-    printf("restart error, you are trying to re-start from alignemnt file %s, the checkpoint was obtained with file: %s\n", tr->ckp.seq_file, seq_file);
-    restartErrors++; 
-  }
-
-  printf("REstart errors: %d\n", restartErrors);
-
-  if(restartErrors > 0)
-  {
-    printf("User induced errors with the restart from checkpoint, exiting ...\n");
-
-    if(restartErrors > 4)
-      printf(" ... maybe you should do field work instead of trying to use a computer ...\n");
-    if(restartErrors > 6)
-      printf(" ... kala eisai telios ilithios;\n");
-
-    exit(-1);
-  }
-
-  tr->ntips = tr->mxtips;
-
-  tr->startLH    = tr->ckp.tr_startLH;
-  tr->endLH      = tr->ckp.tr_endLH;
-  tr->likelihood = tr->ckp.tr_likelihood;
-  tr->bestOfNode = tr->ckp.tr_bestOfNode;
-
-  tr->lhCutoff   = tr->ckp.tr_lhCutoff;
-  tr->lhAVG      = tr->ckp.tr_lhAVG;
-  tr->lhDEC      = tr->ckp.tr_lhDEC;
-  tr->itCount    = tr->ckp.tr_itCount;
-  tr->thoroughInsertion       = tr->ckp.tr_thoroughInsertion;
-
-
-
-  accumulatedTime = tr->ckp.accumulatedTime;
-*/
-  /* printf("Accumulated time so far: %f\n", accumulatedTime); */
-/*
-  tr->optimizeRateCategoryInvocations = tr->ckp.tr_optimizeRateCategoryInvocations;
-
-
-  myfread(tr->tree0, sizeof(char), tr->treeStringLength, f);
-  myfread(tr->tree1, sizeof(char), tr->treeStringLength, f);
-
-  if(tr->searchConvergenceCriterion)
-  {
-    int bCounter = 0;
-
-    if((tr->ckp.state == PLL_FAST_SPRS && tr->ckp.fastIterations > 0) ||
-        (tr->ckp.state == PLL_SLOW_SPRS && tr->ckp.thoroughIterations > 0))
-    { 
-
-#ifdef _DEBUG_CHECKPOINTING    
-      printf("parsing Tree 0\n");
-#endif
-
-      treeReadTopologyString(tr->tree0, tr);   
-
-      bitVectorInitravSpecial(tr->bitVectors, tr->nodep[1]->back, tr->mxtips, tr->vLength, tr->h, 0, PLL_BIPARTITIONS_RF, (branchInfo *)NULL,
-          &bCounter, 1, PLL_FALSE, PLL_FALSE, tr->threadID);
-
-      assert(bCounter == tr->mxtips - 3);
-    }
-
-    bCounter = 0;
-
-    if((tr->ckp.state == PLL_FAST_SPRS && tr->ckp.fastIterations > 1) ||
-        (tr->ckp.state == PLL_SLOW_SPRS && tr->ckp.thoroughIterations > 1))
-    {
-
-#ifdef _DEBUG_CHECKPOINTING
-      printf("parsing Tree 1\n");
-#endif
-
-      treeReadTopologyString(tr->tree1, tr); 
-
-      bitVectorInitravSpecial(tr->bitVectors, tr->nodep[1]->back, tr->mxtips, tr->vLength, tr->h, 1, PLL_BIPARTITIONS_RF, (branchInfo *)NULL,
-          &bCounter, 1, PLL_FALSE, PLL_FALSE, tr->threadID);
-
-      assert(bCounter == tr->mxtips - 3);
-    }
-  }
-
-  myfread(tr->rateCategory, sizeof(int), tr->originalCrunchedLength, f);
-  myfread(tr->patrat, sizeof(double), tr->originalCrunchedLength, f);
-  myfread(tr->patratStored, sizeof(double), tr->originalCrunchedLength, f);
-
-*/
-  /* need to read this as well in checkpoints, otherwise the branch lengths 
-     in the output tree files will be wrong, not the internal branch lengths though */
-/*
-  //TODO: Same problem as writing the checkpoint
-  //myfread(tr->fracchanges,  sizeof(double), pr->numberOfPartitions, f);
-  myfread(&(tr->fracchange),   sizeof(double), 1, f);
-*/
-  /* pInfo */
-/*
-  for(model = 0; model < pr->numberOfPartitions; model++)
-  {
-    int 
-      dataType = pr->partitionData[model]->dataType;
-
-    myfread(&(pr->partitionData[model]->numberOfCategories), sizeof(int), 1, f);
-    myfread(pr->partitionData[model]->perSiteRates, sizeof(double), tr->maxCategories, f);
-    myfread(pr->partitionData[model]->EIGN, sizeof(double), pLengths[dataType].eignLength, f);
-    myfread(pr->partitionData[model]->EV, sizeof(double),  pLengths[dataType].evLength, f);
-    myfread(pr->partitionData[model]->EI, sizeof(double),  pLengths[dataType].eiLength, f);
-
-    myfread(pr->partitionData[model]->frequencies, sizeof(double),  pLengths[dataType].frequenciesLength, f);
-    myfread(pr->partitionData[model]->tipVector, sizeof(double),  pLengths[dataType].tipVectorLength, f);
-    myfread(pr->partitionData[model]->substRates, sizeof(double),  pLengths[dataType].substRatesLength, f);
-    myfread(&(pr->partitionData[model]->alpha), sizeof(double), 1, f);
-    
-    if(pr->partitionData[model]->protModels == PLL_LG4M || pr->partitionData[model]->protModels == PLL_LG4X)
-	{
-	  int 
-	    k;
-	  
-	  for(k = 0; k < 4; k++)
-	    {
-	      myfread(pr->partitionData[model]->EIGN_LG4[k], sizeof(double), pLengths[dataType].eignLength, f);
-	      myfread(pr->partitionData[model]->EV_LG4[k], sizeof(double),  pLengths[dataType].evLength, f);
-	      myfread(pr->partitionData[model]->EI_LG4[k], sizeof(double),  pLengths[dataType].eiLength, f);    
-	      myfread(pr->partitionData[model]->frequencies_LG4[k], sizeof(double),  pLengths[dataType].frequenciesLength, f);
-	      myfread(pr->partitionData[model]->tipVector_LG4[k], sizeof(double),  pLengths[dataType].tipVectorLength, f);  
-	      myfread(pr->partitionData[model]->substRates_LG4[k], sizeof(double),  pLengths[dataType].substRatesLength, f);    
-	    }
-	}
-
-    pllMakeGammaCats(pr->partitionData[model]->alpha, pr->partitionData[model]->gammaRates, 4, tr->useMedian);
-  }
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-  pllMasterBarrier (tr, pr, PLL_THREAD_COPY_INIT_MODEL);
-#endif
-
-  updatePerSiteRates(tr, pr, PLL_FALSE);
-
-  readTree(tr, pr, f);
-
-  fclose(f); 
-
-}
-
-void restart(pllInstance *tr, partitionList *pr)
-{  
-  readCheckpoint(tr, pr);
-
-  switch(tr->ckp.state)
-  {
-    case PLL_REARR_SETTING:      
-      break;
-    case PLL_FAST_SPRS:
-      break;
-    case PLL_SLOW_SPRS:
-      break;
-    default:
-      assert(0);
-  }
-}
-*/
-
-/* The number of maximum smoothing iterations is given explicitely */
-/** @brief Optimize branch lenghts and evaluate likelihood of topology
-    
-    Optimize the branch lengths \a maxSmoothIterations times and evaluate
-    the likelihood of tree. The resulting likelihood is placed in
-    \a tr->likelihood
-
-    @param tr
-      The PLL instance
-
-    @param pr
-      List of partitions
-
-    @param maxSmoothIterations
-      Number of times to optimize branch lengths
-*/
-void
-pllOptimizeBranchLengths (pllInstance *tr, partitionList *pr, int maxSmoothIterations)       /* Evaluate a user tree */
-{
-  smoothTree(tr, pr, maxSmoothIterations); /* former (32 * smoothFactor) */
-
-  pllEvaluateLikelihood (tr, pr, tr->start, PLL_FALSE, PLL_FALSE);
-}
-
-/** @brief Perform an NNI move
-
-    Modify the tree topology of instance \a tr by performing an NNI (Neighbour Neighbor
-    Interchange) move at node \a p. Let \a q be \a p->back. If \a swap is set to \b PLL_NNI_P_NEXT 
-    then the subtrees rooted at \a p->next->back and \a q->next->back will be swapped. Otherwise,
-    if \a swap is set to \b PLL_NNI_P_NEXTNEXT then the subtrees rooted at \a p->next->next->back and
-    \a q->next->back are swapped. For clarity, see the illustration.
-
-    @param tr
-      PLL instance
-
-    @param p
-      Node to use as origin for performing NNI
-
-    @param swap
-      Which node to use for the NNI move. \b PLL_NNI_P_NEXT uses node p->next while \b PLL_NNI_P_NEXTNEXT uses p->next->next
-
-    @return
-      In case of success \b PLL_TRUE, otherwise \b PLL_FALSE
-
-    @todo
-      Started error checking here. Instead of checking the errors in the specified way, implement a variadic
-      function where we pass the results of each check and the error code we want to assign if there is at
-      least one negative result
-
-    @image html nni.png "In case \a swap is set to \b PLL_NNI_P_NEXT then the dashed red edge between \a p and \a r is removed and the blue edges are created. If \a swap is set to \b PLL_INIT_P_NEXTNEXT then the dashed red edge between \a p and \a s is removed and the green edges are created. In both cases the black dashed edge is removed"
-*/
-int pllTopologyPerformNNI(pllInstance * tr, nodeptr p, int swap)
-{
-  nodeptr       q, r;
-
-  q = p->back;
-  if (isTip(q->number, tr->mxtips))
-   {
-     errno = PLL_NNI_Q_TIP;
-     return (PLL_FALSE);
-   }
-  if (isTip(p->number, tr->mxtips))
-   {
-     errno = PLL_NNI_P_TIP;
-     return (PLL_FALSE);
-   }
-  assert(!isTip(q->number, tr->mxtips));
-  assert(!isTip(p->number, tr->mxtips));
-
-
-  if(swap == PLL_NNI_P_NEXT)
-   {
-     r = p->next->back;
-     hookupFull(p->next, q->next->back, q->next->z);
-     hookupFull(q->next, r,             p->next->z);
-   }
-  else
-   {
-     r = p->next->next->back;
-     hookupFull(p->next->next, q->next->back, q->next->z);
-     hookupFull(q->next,       r,             p->next->next->z);
-   }
-
-  return PLL_TRUE;
-}
-
-/** @brief Compares 2 NNI moves */
-static int cmp_nni(const void* nni1, const void* nni2) {
-	nniMove* myNNI1 = (nniMove*) nni1;
-	nniMove* myNNI2 = (nniMove*) nni2;
-	return (int) (1000000.f * myNNI1->deltaLH - 1000000.f * myNNI2->deltaLH);
-}
-
-/** @brief Gets the best NNI move for a branch
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param p
-      Node to use as origin for performing NNI
-
-    @param curLH
-      The current likelihood
-
-    @return
-      The best NNI move
-
-*/
-nniMove getBestNNIForBran(pllInstance* tr, partitionList *pr, nodeptr p,
-		double curLH) {
-	nodeptr q = p->back;
-	assert( ! isTip(p->number, tr->mxtips));
-	assert( ! isTip(q->number, tr->mxtips));
-#ifdef _DEBUG_NNI
-	pllTreeToNewick(tr->tree_string, tr, tr->start->back, TRUE, FALSE, 0, 0, 0, SUMMARIZE_LH, 0,0);
-	fprintf(stderr, "%s\n", tr->tree_string);
-#endif
-
-	/* Backup the current branch length */
-	double z0[PLL_NUM_BRANCHES];
-	int i;
-	for (i = 0; i < pr->numberOfPartitions; i++) {
-		z0[i] = p->z[i];
-	}
-#ifdef _DEBUG_NNI
-	double lhOld = tr->likelihood;
-	printf("lhOld: %f \n", lhOld);
-#endif
-	double lh0 = curLH;
-
-
-#ifdef _DEBUG_NNI
-	printf("lh0: %f \n", lh0);
-#endif
-	nniMove nni0; // nni0 means no NNI move is done
-	nni0.p = p;
-	nni0.nniType = 0;
-	nni0.deltaLH = 0;
-	for (i = 0; i < pr->numberOfPartitions; i++) {
-		nni0.z[i] = p->z[i];
-	}
-
-	/* Save the scaling factor */
-	// Now try to do an NNI move of type 1
-	pllTopologyPerformNNI(tr, p, PLL_NNI_P_NEXT);
-	double lh1 = tr->likelihood;
-	/* Update branch lengths */
-	pllUpdatePartials(tr, pr, p, PLL_FALSE);
-	pllUpdatePartials(tr, pr, q, PLL_FALSE);
-	update(tr, pr, p);
-	pllEvaluateLikelihood (tr, pr, p, PLL_FALSE, PLL_FALSE);
-
-	nniMove nni1;
-	nni1.p = p;
-	nni1.nniType = 1;
-	// Store the optimized und unoptimized central branch length
-	for (i = 0; i < pr->numberOfPartitions; i++) {
-		nni1.z[i] = p->z[i];
-		nni1.z0[i] = z0[i];
-	}
-	nni1.likelihood = lh1;
-	nni1.deltaLH = lh1 - lh0;
-#ifdef _DEBUG_NNI
-	printf("Delta likelihood of the 1.NNI move: %f\n", nni1.deltaLH);
-#endif
-
-	/* Restore previous NNI move */
-	pllTopologyPerformNNI(tr, p, PLL_NNI_P_NEXT);
-	/* Restore the old branch length */
-	for (i = 0; i < pr->numberOfPartitions; i++) {
-		p->z[i] = z0[i];
-		p->back->z[i] = z0[i];
-	}
-
-#ifdef _DEBUG_NNI
-	printf("Restore topology\n");
-	pllTreeToNewick(tr->tree_string, tr, tr->start->back, TRUE, FALSE, 0, 0, 0, SUMMARIZE_LH, 0,0);
-	fprintf(stderr, "%s\n", tr->tree_string);
-	pllEvaluateLikelihood (tr, tr->start, TRUE);
-	printf("Likelihood after restoring from NNI 1: %f\n", tr->likelihood);
-#endif
-	/* Try to do an NNI move of type 2 */
-	pllTopologyPerformNNI(tr, p, 2);
-	double lh2 = tr->likelihood;
-	/* Update branch lengths */
-	pllUpdatePartials(tr, pr, p, PLL_FALSE);
-	pllUpdatePartials(tr, pr, q, PLL_FALSE);
-	update(tr, pr, p);
-	pllEvaluateLikelihood (tr, pr, p, PLL_FALSE, PLL_FALSE);
-
-	// Create the nniMove struct to store this move
-	nniMove nni2;
-	nni2.p = p;
-	nni2.nniType = 2;
-
-	// Store the optimized and unoptimized central branch length
-	for (i = 0; i < pr->numberOfPartitions; i++) {
-		nni2.z[i] = p->z[i];
-		nni2.z0[i] = z0[i];
-	}
-	nni2.likelihood = lh2;
-	nni2.deltaLH = lh2 - lh0;
-#ifdef _DEBUG_NNI
-	printf("Delta likelihood of the 2.NNI move: %f\n", nni2.deltaLH);
-#endif
-
-	/* Restore previous NNI move */
-	pllTopologyPerformNNI(tr, p, 2);
-	pllUpdatePartials(tr, pr, p, PLL_FALSE);
-	pllUpdatePartials(tr, pr, p->back, PLL_FALSE);
-	/* Restore the old branch length */
-	for (i = 0; i < pr->numberOfPartitions; i++) {
-		p->z[i] = z0[i];
-		p->back->z[i] = z0[i];
-	}
-	if (nni1.deltaLH > 0 && nni1.deltaLH >= nni2.deltaLH) {
-		return nni1;
-	} else if (nni1.deltaLH > 0 && nni1.deltaLH < nni2.deltaLH) {
-		return nni2;
-	} else if (nni1.deltaLH < 0 && nni2.deltaLH > 0) {
-		return nni2;
-	} else {
-		return nni0;
-	}
-}
-
-/** @brief ??? Not sure */
-void evalNNIForSubtree(pllInstance* tr, partitionList *pr, nodeptr p,
-		nniMove* nniList, int* cnt, int* cnt_nni, double curLH) {
-	if (!isTip(p->number, tr->mxtips)) {
-		nniList[*cnt] = getBestNNIForBran(tr, pr, p, curLH);
-		if (nniList[*cnt].deltaLH != 0.0) {
-			*cnt_nni = *cnt_nni + 1;
-		}
-		*cnt = *cnt + 1;
-		nodeptr q = p->next;
-		while (q != p) {
-			evalNNIForSubtree(tr, pr, q->back, nniList, cnt, cnt_nni, curLH);
-			q = q->next;
-		}
-	}
-}
-
-/** @brief Perform an NNI search
-
-    Modify the tree topology of instance and model parameters \a tr by performing a NNI (Neighbour Neighbor
-    Interchange) moves \a p.
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param estimateModel
-      Determine wheter the model parameters should be optimized
-
-    @return
-      In case of success \b PLL_TRUE, otherwise \b PLL_FALSE
-
-*/
-int pllNniSearch(pllInstance * tr, partitionList *pr, int estimateModel) {
-
-	double curScore = tr->likelihood;
-
-	/* Initialize the NNI list */
-	nniMove* nniList = (nniMove*) malloc((tr->mxtips - 3) * sizeof(nniMove));
-	int i;
-	/* fill up the NNI list */
-	nodeptr p = tr->start->back;
-	nodeptr q = p->next;
-	int cnt = 0; // number of visited internal branches during NNI evaluation
-	int cnt_nni = 0; // number of positive NNI found
-	while (q != p) {
-		evalNNIForSubtree(tr, pr, q->back, nniList, &cnt, &cnt_nni, curScore);
-		q = q->next;
-	}
-	if (cnt_nni == 0)
-		return 0.0;
-
-	nniMove* impNNIList = (nniMove*) malloc(cnt_nni * sizeof(nniMove));
-	int j = 0;
-	for (i = 0; i < tr->mxtips - 3; i++) {
-		if (nniList[i].deltaLH > 0.0) {
-			impNNIList[j] = nniList[i];
-			j++;
-		}
-	}
-	// sort impNNIList
-	qsort(impNNIList, cnt_nni, sizeof(nniMove), cmp_nni);
-
-	// creating a list of non-conflicting positive NNI
-	nniMove* nonConfNNIList = (nniMove*) calloc(cnt_nni, sizeof(nniMove));
-
-	// the best NNI will always be taken
-	nonConfNNIList[0] = impNNIList[cnt_nni - 1];
-
-	// Filter out conflicting NNI
-	int numNonConflictNNI = 1; // size of the non-conflicting NNI list;
-	int k;
-	for (k = cnt_nni - 2; k >= 0; k--) {
-		int conflict = PLL_FALSE;
-		int j;
-		for (j = 0; j < numNonConflictNNI; j++) {
-			if (impNNIList[k].p->number == nonConfNNIList[j].p->number
-					|| impNNIList[k].p->number
-							== nonConfNNIList[j].p->back->number) {
-				conflict = PLL_TRUE;
-				break;
-			}
-		}
-		if (conflict) {
-			continue;
-		} else {
-			nonConfNNIList[numNonConflictNNI] = impNNIList[k];
-			numNonConflictNNI++;
-		}
-	}
-
-	// Applying non-conflicting NNI moves
-	double delta = 1.0; // portion of NNI moves to apply
-	int notImproved;
-	do {
-		notImproved = PLL_FALSE;
-		int numNNI2Apply = ceil(numNonConflictNNI * delta);
-		for (i = 0; i < numNNI2Apply; i++) {
-			// Just do the topological change
-			pllTopologyPerformNNI(tr, nonConfNNIList[i].p, nonConfNNIList[i].nniType);
-			pllUpdatePartials(tr, pr, nonConfNNIList[i].p, PLL_FALSE);
-			pllUpdatePartials(tr, pr, nonConfNNIList[i].p->back, PLL_FALSE);
-			// Apply the store branch length
-			int j;
-			for (j = 0; j < pr->numberOfPartitions; j++) {
-				nonConfNNIList[i].p->z[j] = nonConfNNIList[i].z[j];
-				nonConfNNIList[i].p->back->z[j] = nonConfNNIList[i].z[j];
-			}
-		}
-		// Re-optimize all branches
-		smoothTree(tr, pr, 2);
-		pllEvaluateLikelihood (tr, pr, tr->start, PLL_FALSE, PLL_FALSE);
-		if (estimateModel) {
-			modOpt(tr, pr, 0.1);
-		}
-		pllEvaluateLikelihood (tr, pr, tr->start, PLL_FALSE, PLL_FALSE);
-		if (tr->likelihood < curScore) {
-#ifdef _DEBUG_NNI
-			printf("Tree likelihood gets worse after applying NNI\n");
-			printf("curScore = %30.20f\n", curScore);
-			printf("newScore = %30.20f\n", tr->likelihood);
-			printf("Rolling back the tree\n");
-#endif
-			for (i = 0; i < numNNI2Apply; i++) {
-				pllTopologyPerformNNI(tr, nonConfNNIList[i].p, nonConfNNIList[i].nniType);
-				// Restore the branch length
-				int j;
-				for (j = 0; j < pr->numberOfPartitions; j++) {
-					nonConfNNIList[i].p->z[j] = nonConfNNIList[i].z0[j];
-					nonConfNNIList[i].p->back->z[j] = nonConfNNIList[i].z0[j];
-				}
-			}
-			pllEvaluateLikelihood (tr, pr, tr->start, PLL_FALSE, PLL_FALSE);
-#ifdef _DEBUG_NNI
-			printf("Tree likelihood after rolling back = %f \n",
-					tr->likelihood);
-#endif
-			notImproved = PLL_TRUE & (numNNI2Apply > 1);
-			delta = delta * 0.5;
-		}
-	} while (notImproved);
-	free(nniList);
-	free(impNNIList);
-	free(nonConfNNIList);
-
-	return PLL_TRUE;
-}
-
-
-/** @defgroup rearrangementGroup Topological rearrangements
-    
-    This set of functions handles the rearrangement of the tree topology
-*/
-
-
-/** @ingroup rearrangementGroup
-    @brief Create a list for storing topology rearrangements
- 
-    Allocates space and initializes a structure that will hold information
-    of \a max topological rearrangements
-
-    @param max
-      Maximum number of elements that the structure should hold
-    
-    @note This should be called for creating a storage space (list) for
-    routines such as ::pllRearrangeSearch which compute the best NNI/PR/TBR rearrangements.
-*/
-pllRearrangeList * pllCreateRearrangeList (int max)
-{
-  pllRearrangeList * bl;
-
-  bl = (pllRearrangeList *) malloc (sizeof (pllRearrangeList));
-
-  bl->max_entries = max;
-  bl->entries     = 0;
-  bl->rearr       = (pllRearrangeInfo *) malloc (max * sizeof (pllRearrangeInfo));
-
-  return bl;
-}
-
-/** @ingroup rearrangementGroup
-    @brief Deallocator for topology rearrangements list
-    
-    Call this to destroy (deallocate) the memory taken by the \a bestList which holds
-    topological rearrangements
-
-    @param bestList
-      Pointer to the list to be deallocated
-*/
-void pllDestroyRearrangeList (pllRearrangeList ** bestList)
-{
-  pllRearrangeList * bl;
-
-  bl = *bestList;
-
-  rax_free (bl->rearr);
-  rax_free (bl);
-
-  *bestList = NULL;
-}
-
-
-/** @ingroup rearrangementGroup
-    @brief Store a rearrangement move to the list of best rearrangement moves
-
-     Checks if the likelihood yielded by the rearrangement move described in \a rearr
-     is better than any in the sorted list \a bestList. If it is, or
-     if there is still space in \a bestList, the info about the
-     move is inserted in the list.
-
-     @param bestList
-       The list of information about the best rearrangement moves
-
-     @param rearr
-       Info about the current rearrangement move
-
-     @return
-       Returns \b PLL_FALSE if the rearrangement move doesn't make it in the list, otherwise \b PLL_TRUE
-*/
-static int pllStoreRearrangement (pllRearrangeList * bestList, pllRearrangeInfo * rearr)
- {
-   /* naive implementation of saving rearrangement moves */
-   int i;
-
-   for (i = 0; i < bestList->entries; ++ i)
-    {
-      /* Does the new rearrangement yield a better likelihood that the current in the list */
-      if (rearr->likelihood > bestList->rearr[i].likelihood)
-       {
-         /* is there enough space in the array ? */
-         if (bestList->entries < bestList->max_entries)
-          {
-            /* slide the entries to the right and overwrite the i-th element with the new item */
-            memmove (&(bestList->rearr[i + 1]), &(bestList->rearr[i]), (bestList->entries - i ) * sizeof (pllRearrangeInfo));
-            ++ bestList->entries;
-          }
-         else
-          {
-            memmove (&(bestList->rearr[i + 1]), &(bestList->rearr[i]), (bestList->entries - i - 1 ) * sizeof (pllRearrangeInfo));
-          }
-         memcpy (&(bestList->rearr[i]), rearr, sizeof (pllRearrangeInfo));
-         return (PLL_TRUE);
-       }
-    }
-   if (bestList->entries < bestList->max_entries)
-    {
-      memcpy (&(bestList->rearr[bestList->entries]), rearr, sizeof (pllRearrangeInfo));
-      ++ bestList->entries;
-      return (PLL_TRUE);
-    }
-
-   return (PLL_FALSE);
- }
-
-/** @ingroup rearrangementGroup
-    @brief Internal function for testing and saving an SPR move
-    
-    Checks the likelihood of the placement of the pruned subtree specified by \a p
-    to node \a q. If the likelihood is better than some in the sorted list 
-    \a bestList, or if there is still free space in \a bestList, then the SPR 
-    move is recorded (in \a bestList)
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param p
-      Root of the subtree that is to be pruned
-
-    @param q
-      Where to place the pruned subtree (between \a q and \a q->back
-
-    @param bestList
-      Where to store the SPR move
-
-    @note Internal function which is not part of the PLL API and therefore should not be
-    called by the user
-
-    @return
-*/
-static int
-pllTestInsertBIG (pllInstance * tr, partitionList * pr, nodeptr p, nodeptr q, pllRearrangeList * bestList)
-{
-  int numBranches = pr->perGeneBranchLengths?pr->numberOfPartitions:1;
-  pllRearrangeInfo rearr;
-
-  double  qz[PLL_NUM_BRANCHES], pz[PLL_NUM_BRANCHES];
-  nodeptr  r;
-  //double startLH = tr->endLH;
-  int i;
-
-  r = q->back; 
-  for(i = 0; i < numBranches; i++)
-  {
-    qz[i] = q->z[i];
-    pz[i] = p->z[i];
-  }
-
-  if (! insertBIG(tr, pr, p, q))       return PLL_FALSE;
-
-  pllEvaluateLikelihood (tr, pr, p->next->next, PLL_FALSE, PLL_FALSE);
-  
-  rearr.rearrangeType  = PLL_REARRANGE_SPR;
-  rearr.likelihood     = tr->likelihood;
-  rearr.SPR.removeNode = p;
-  rearr.SPR.insertNode = q;
-  for (i = 0; i < numBranches; ++ i)
-   {
-     rearr.SPR.zqr[i] = tr->zqr[i];
-   }
-
-  pllStoreRearrangement (bestList, &rearr);
-
-/*
-  if(tr->likelihood > tr->bestOfNode)
-  {
-    pllStoreRearrangement (bestList, rearr)
-    tr->bestOfNode = tr->likelihood;
-    tr->insertNode = q;
-    tr->removeNode = p;   
-    for(i = 0; i < numBranches; i++)
-    {
-      tr->currentZQR[i] = tr->zqr[i];           
-      tr->currentLZR[i] = tr->lzr[i];
-      tr->currentLZQ[i] = tr->lzq[i];
-      tr->currentLZS[i] = tr->lzs[i];      
-    }
-  }
-
-  if(tr->likelihood > tr->endLH)
-  {			  
-    
-    tr->insertNode = q;
-    tr->removeNode = p;   
-    for(i = 0; i < numBranches; i++)
-      tr->currentZQR[i] = tr->zqr[i];      
-    tr->endLH = tr->likelihood;                      
-  }        
-*/
-  /* reset the topology so that it is the same as it was before calling insertBIG */
-  hookup(q, r, qz, numBranches);
-
-  p->next->next->back = p->next->back = (nodeptr) NULL;
-
-  if(tr->thoroughInsertion)
-  {
-    nodeptr s = p->back;
-    hookup(p, s, pz, numBranches);
-  } 
-
-/*
-  if((tr->doCutoff) && (tr->likelihood < startLH))
-  {
-    tr->lhAVG += (startLH - tr->likelihood);
-    tr->lhDEC++;
-    if((startLH - tr->likelihood) >= tr->lhCutoff)
-      return PLL_FALSE;	    
-    else
-      return PLL_TRUE;
-  }
-  else
-    return PLL_TRUE;
-  */
-  return (PLL_TRUE);
-}
-
-/** @ingroup rearrangementGroup
-    @brief Internal function for recursively traversing a tree and testing a possible subtree insertion
-
-    Recursively traverses the tree rooted at \a q in the direction of \a q->next->back and \a q->next->next->back
-    and at each node tests the placement of the pruned subtree rooted at \a p by calling the function
-    \a pllTestInsertBIG, which in turn saves the computed SPR in \a bestList if a) there is still space in
-    the \a bestList or b) if the likelihood of the SPR is better than any of the ones in \a bestList.
-
-    @note This function is not part of the API and should not be called by the user.
-*/
-static void pllTraverseUpdate (pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q, int mintrav, int maxtrav, pllRearrangeList * bestList)
-{  
-  if (--mintrav <= 0) 
-  {              
-    if (! pllTestInsertBIG(tr, pr, p, q, bestList))  return;
-
-  }
-
-  if ((!isTip(q->number, tr->mxtips)) && (--maxtrav > 0)) 
-  {    
-    pllTraverseUpdate(tr, pr, p, q->next->back, mintrav, maxtrav, bestList);
-    pllTraverseUpdate(tr, pr, p, q->next->next->back, mintrav, maxtrav, bestList);
-  }
-} 
-
-
-/** @ingroup rearrangementGroup
-    @brief Internal function for computing SPR moves
-
-    Compute a list of at most \a max SPR moves that can be performed by pruning
-    the subtree rooted at node \a p and testing all possible placements in a
-    radius of at least \a mintrav nodes and at most \a maxtrav nodes from \a p.
-    Note that \a tr->thoroughInsertion affects the behaviour of the function (see note).
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param p
-      Node specifying the root of the pruned subtree, i.e. where to prune.
-
-    @param mintrav
-      Minimum distance from \a p where to try inserting the pruned subtree
-
-    @param maxtrav
-      Maximum distance from \a p where to try inserting the pruned subtree
-
-    @param bestList
-      The list of best topological rearrangements
-
-    @note This function is not part of the API and should not be called by the user
-    as it is called internally by the API function \a pllComputeSPR. 
-    Also, setting \a tr->thoroughInsertion affects this function. For each tested SPR
-    the new branch lengths will also be optimized. This computes better likelihoods
-    but also slows down the method considerably.
-*/
-static int pllTestSPR (pllInstance * tr, partitionList * pr, nodeptr p, int mintrav, int maxtrav, pllRearrangeList * bestList)
-{
-  nodeptr 
-    p1, p2, q, q1, q2;
-  double 
-    p1z[PLL_NUM_BRANCHES], p2z[PLL_NUM_BRANCHES], q1z[PLL_NUM_BRANCHES], q2z[PLL_NUM_BRANCHES];
-  int
-    mintrav2, i;
-  int numBranches = pr->perGeneBranchLengths ? pr->numberOfPartitions : 1;
-
-  if (maxtrav < 1 || mintrav > maxtrav) return (PLL_FALSE);
-  q = p->back;
-
-  if (!isTip (p->number, tr->mxtips))
-   {
-     p1 = p->next->back;
-     p2 = p->next->next->back;
-
-     if (!isTip (p1->number, tr->mxtips) || !isTip (p2->number, tr->mxtips))
-      {
-        /* save branch lengths before splitting the tree in two components */
-        for (i = 0; i < numBranches; ++ i)
-         {
-           p1z[i] = p1->z[i];
-           p2z[i] = p2->z[i];
-         }
-
-        /* split the tree in two components */
-        if (! removeNodeBIG (tr, pr, p, numBranches)) return PLL_BADREAR;
-
-        /* recursively traverse and perform SPR on the subtree rooted at p1 */
-        if (!isTip (p1->number, tr->mxtips))
-         {
-           pllTraverseUpdate (tr, pr, p, p1->next->back,       mintrav, maxtrav, bestList);
-           pllTraverseUpdate (tr, pr, p, p1->next->next->back, mintrav, maxtrav, bestList);
-         }
-
-        /* recursively traverse and perform SPR on the subtree rooted at p2 */
-        if (!isTip (p2->number, tr->mxtips))
-         {
-           pllTraverseUpdate (tr, pr, p, p2->next->back,       mintrav, maxtrav, bestList);
-           pllTraverseUpdate (tr, pr, p, p2->next->next->back, mintrav, maxtrav, bestList);
-         }
-
-        /* restore the topology as it was before the split */
-        hookup (p->next,       p1, p1z, numBranches);
-        hookup (p->next->next, p2, p2z, numBranches);
-        pllUpdatePartials (tr, pr, p, PLL_FALSE);
-      }
-   }
-
-  if (!isTip (q->number, tr->mxtips) && maxtrav > 0)
-   {
-     q1 = q->next->back;
-     q2 = q->next->next->back;
-
-    /* why so many conditions? Why is it not analogous to the previous if for node p? */
-    if (
-        (
-         ! isTip(q1->number, tr->mxtips) && 
-         (! isTip(q1->next->back->number, tr->mxtips) || ! isTip(q1->next->next->back->number, tr->mxtips))
-        )
-        ||
-        (
-         ! isTip(q2->number, tr->mxtips) && 
-         (! isTip(q2->next->back->number, tr->mxtips) || ! isTip(q2->next->next->back->number, tr->mxtips))
-        )
-       )
-     {
-       for (i = 0; i < numBranches; ++ i)
-        {
-          q1z[i] = q1->z[i];
-          q2z[i] = q2->z[i];
-        }
-
-       if (! removeNodeBIG (tr, pr, q, numBranches)) return PLL_BADREAR;
-
-       mintrav2 = mintrav > 2 ? mintrav : 2;
-
-       if (!isTip (q1->number, tr->mxtips))
-        {
-          pllTraverseUpdate (tr, pr, q, q1->next->back,       mintrav2, maxtrav, bestList);
-          pllTraverseUpdate (tr, pr, q, q1->next->next->back, mintrav2, maxtrav, bestList);
-        }
-
-       if (!isTip (q2->number, tr->mxtips))
-        {
-          pllTraverseUpdate (tr, pr, q, q2->next->back,       mintrav2, maxtrav, bestList);
-          pllTraverseUpdate (tr, pr, q, q2->next->next->back, mintrav2, maxtrav, bestList);
-        }
-
-       hookup (q->next,       q1, q1z, numBranches);
-       hookup (q->next->next, q2, q2z, numBranches);
-       pllUpdatePartials (tr, pr, q, PLL_FALSE);
-     }
-   }
-  return (PLL_TRUE);
-}
-
-/** @ingroup rearrangementGroup
-    @brief Compute a list of possible SPR moves
-    
-    Iteratively tries all possible SPR moves that can be performed by
-    pruning the subtree rooted at \a p and testing all possible placements
-    in a radius of at least \a mintrav nodea and at most \a maxtrav nodes from
-    \a p. Note that \a tr->thoroughInsertion affects the behaviour of the function (see note).
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param p
-      Node specifying the root of the pruned subtree, i.e. where to prune.
-
-    @param mintrav
-      Minimum distance from \a p where to try inserting the pruned subtree
-
-    @param maxtrav
-      Maximum distance from \a p where to try inserting the pruned subtree
-
-    @note
-      Setting \a tr->thoroughInsertion affects this function. For each tested SPR
-      the new branch lengths will also be optimized. This computes better likelihoods
-      but also slows down the method considerably.
-*/
-static void 
-pllComputeSPR (pllInstance * tr, partitionList * pr, nodeptr p, int mintrav, int maxtrav, pllRearrangeList * bestList)
-{
-
-  tr->startLH = tr->endLH = tr->likelihood;
-
-  /* TODO: Add cutoff code */
-
-  tr->bestOfNode = PLL_UNLIKELY;
-
-  pllTestSPR (tr, pr, p, mintrav, maxtrav, bestList);
-}
-
-/** @ingroup rearrangementGroup
-    @brief Return the yielded likelihood of an NNI move, without altering the topology
-
-    This function performs the NNI move of type \a swapType at node \a p, optimizes
-    the branch with endpoints \a p  and \a p->back and evalutes the resulting likelihood.
-    It then restores the topology  to the origin and returns the likelihood that the NNI
-    move yielded.
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param p
-      Where to perform the NNI move
-
-    @param swapType
-      What type of NNI move to perform
-
-    @return
-      The likelihood yielded from the NNI
-*/
-static double 
-pllTestNNILikelihood (pllInstance * tr, partitionList * pr, nodeptr p, int swapType)
-{
-  double lh;
-  double z0[PLL_NUM_BRANCHES];
-  int i;
-
-  /* store the origin branch lengths and likelihood. The original branch lengths could
-  be passed as a parameter in order to avoid duplicate computations because of the two
-  NNI moves */
-  for (i = 0; i < pr->numberOfPartitions; ++ i)
-   {
-     z0[i] = p->z[i];
-   }
-
-  /* perform NNI */
-  pllTopologyPerformNNI(tr, p, swapType);
-  /* recompute the likelihood vectors of the two subtrees rooted at p and p->back,
-     optimize the branch lengths and evaluate the likelihood  */
-  pllUpdatePartials (tr, pr, p,       PLL_FALSE);
-  pllUpdatePartials (tr, pr, p->back, PLL_FALSE);
-  update (tr, pr, p);
-  pllEvaluateLikelihood (tr, pr, p, PLL_FALSE, PLL_FALSE);
-  lh = tr->likelihood;
-
-  /* restore topology */
-  pllTopologyPerformNNI(tr, p, swapType);
-  pllUpdatePartials (tr, pr, p,       PLL_FALSE);
-  pllUpdatePartials (tr, pr, p->back, PLL_FALSE);
-  //update (tr, pr, p);
-  pllEvaluateLikelihood (tr, pr, p, PLL_FALSE, PLL_FALSE);
-  for (i = 0; i < pr->numberOfPartitions; ++ i)
-   {
-     p->z[i] = p->back->z[i] = z0[i];
-   }
-
-  return lh;
-}
-/** @ingroup rearrangementGroup
-    @brief Compares NNI likelihoods at a node and store in the rearrangement list
-
-    Compares the two possible NNI moves that can be performed at node \a p, and
-    if the likelihood improves from the one of the original topology, then 
-    it picks the one that yields the highest likelihood and tries to insert it in
-    the list of best rearrangement moves
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param bestList
-      Rearrangement moves list
-*/
-static void pllTestNNI (pllInstance * tr, partitionList * pr, nodeptr p, pllRearrangeList * bestList)
-{
-  double lh0, lh1, lh2;
-  pllRearrangeInfo rearr;
-
-  /* store the original likelihood */
-  lh0 = tr->likelihood;
-
-  lh1 = pllTestNNILikelihood (tr, pr, p, PLL_NNI_P_NEXT);
-  lh2 = pllTestNNILikelihood (tr, pr, p, PLL_NNI_P_NEXTNEXT);
-
-  if (lh0 > lh1 && lh0 > lh2) return;
-
-  /* set the arrangement structure */
-  rearr.rearrangeType  = PLL_REARRANGE_NNI;
-  rearr.likelihood     = PLL_MAX (lh1, lh2);
-  rearr.NNI.originNode = p;
-  rearr.NNI.swapType   = (lh1 > lh2) ? PLL_NNI_P_NEXT : PLL_NNI_P_NEXTNEXT;
-
-  /* try to store it in the best list */
-  pllStoreRearrangement (bestList, &rearr);
-}
-
-/** @ingroup rearrangementGroup
-    @brief Recursive traversal of the tree structure for testing NNI moves
- 
-    Recursively traverses the tree structure and tests all allowed NNI
-    moves in the area specified by \a mintrav and \a maxtrav. For more
-    information and details on the function arguments check ::pllSearchNNI
-*/
-static void 
-pllTraverseNNI (pllInstance *tr, partitionList *pr, nodeptr p, int mintrav, int maxtrav, pllRearrangeList * bestList)
-{
-  if (isTip (p->number, tr->mxtips)) return;
-
-  /* if we are at the right radius then compute the NNIs for nodes p->next and p->next->next */
-  if (!mintrav)
-   {
-     pllTestNNI (tr, pr, p->next, bestList);
-     pllTestNNI (tr, pr, p->next->next, bestList);
-   }
-  
-  /* and then avoid computing the NNIs for nodes p->next->back and p->next->next->back as they are
-  the same to the ones computed in the above two lines. This way we do not need to resolve conflicts
-  later on as in the old code */
-  if (maxtrav)
-   {
-     if (!isTip (p->next->back->number, tr->mxtips))       
-       pllTraverseNNI (tr, pr, p->next->back,       mintrav ? mintrav - 1 : 0, maxtrav - 1, bestList);
-     if (!isTip (p->next->next->back->number, tr->mxtips)) 
-       pllTraverseNNI (tr, pr, p->next->next->back, mintrav ? mintrav - 1 : 0, maxtrav - 1, bestList);
-   }
-}
-
-/** @ingroup rearrangementGroup
-    @brief Compute a list of possible NNI moves
-    
-    Iteratively tries all possible NNI moves at each node that is at
-    least \a mintrav and at most \a maxtrav nodes far from node \a p.
-    At each NNI move, the likelihood is tested and if it is higher than
-    the likelihood of an element in the sorted (by likelihood) list 
-    \a bestList, or if there is still empty space in \a bestList, it is
-    inserted at the corresponding position.
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param p
-      Node specifying the point where the NNI will be performed.
-
-    @param mintrav
-      Minimum distance from \a p where the NNI can be tested 
-
-    @param maxtrav
-      Maximum distance from \a p where to try NNIs
-*/
-static void
-pllSearchNNI (pllInstance * tr, partitionList * pr, nodeptr p, int mintrav, int maxtrav, pllRearrangeList * bestList)
-{
-  /* avoid conflicts by precomputing the NNI of the first node */
-
-  if (mintrav == 0) 
-  pllTestNNI (tr, pr, p, bestList);
-  
-  pllTraverseNNI (tr, pr, p, mintrav, maxtrav, bestList);
-  if (maxtrav)
-    pllTraverseNNI (tr, pr, p->back, mintrav, maxtrav - 1, bestList);
-
-}
-
-/** @ingroup rearrangementGroup
-    @brief Create rollback information for an SPR move
-    
-    Creates a structure of type ::pllRollbackInfo and fills it with rollback
-    information about the SPR move described in \a rearr. The rollback info
-    is stored in the PLL instance in a LIFO manner.
-
-    @param tr
-      PLL instance
-
-    @param rearr
-      Description of the SPR move
-
-    @param numBranches
-      Number of partitions
-*/
-static void 
-pllCreateSprInfoRollback (pllInstance * tr, pllRearrangeInfo * rearr, int numBranches)
-{
-  pllRollbackInfo * sprRb;
-  nodeptr p, q;
-  int i;
-
-  p = rearr->SPR.removeNode;
-  q = rearr->SPR.insertNode;
-
-  sprRb = (pllRollbackInfo *) rax_malloc (sizeof (pllRollbackInfo) + 4 * numBranches * sizeof (double));
-  sprRb->SPR.zp   = (double *) ((char *)sprRb + sizeof (pllRollbackInfo));
-  sprRb->SPR.zpn  = (double *) ((char *)sprRb + sizeof (pllRollbackInfo) + numBranches * sizeof (double));
-  sprRb->SPR.zpnn = (double *) ((char *)sprRb + sizeof (pllRollbackInfo) + 2 * numBranches * sizeof (double));
-  sprRb->SPR.zqr  = (double *) ((char *)sprRb + sizeof (pllRollbackInfo) + 3 * numBranches * sizeof (double));
-
-  for (i = 0; i < numBranches; ++ i)
-   {
-     sprRb->SPR.zp[i]   = p->z[i];
-     sprRb->SPR.zpn[i]  = p->next->z[i];
-     sprRb->SPR.zpnn[i] = p->next->next->z[i];
-     sprRb->SPR.zqr[i]  = q->z[i];
-   }
-
-  sprRb->SPR.pn  = p->next->back;
-  sprRb->SPR.pnn = p->next->next->back;
-  sprRb->SPR.r   = q->back;
-  sprRb->SPR.q   = q;
-  sprRb->SPR.p   = p;
-
-  sprRb->rearrangeType = PLL_REARRANGE_SPR;
-
-  pllStackPush (&(tr->rearrangeHistory), (void *) sprRb);
-}
-
-/** @ingroup rearrangementGroup
-    @brief Create rollback information for an NNI move
-
-    Creates a structure of type ::pllRollbackInfo and fills it with rollback
-    information about the SPR move described in \a rearr. The rollback info
-    is stored in the PLL instance in a LIFO manner
-
-    @param tr
-      PLL instance
-
-    @param rearr
-      Description of the NNI move
-*/
-static void
-pllCreateNniInfoRollback (pllInstance * tr, pllRearrangeInfo * rearr)
-{
-  /*TODO: add the branches ? */
-  pllRollbackInfo * ri;
-
-  ri = (pllRollbackInfo *) rax_malloc (sizeof (pllRollbackInfo));
-
-  ri->rearrangeType = PLL_REARRANGE_NNI;
-
-  ri->NNI.origin   = rearr->NNI.originNode;
-  ri->NNI.swapType = rearr->NNI.swapType;
-
-  pllStackPush (&(tr->rearrangeHistory), (void *) ri);
-  
-}
-
-
-/** @ingroup rearrangementGroup
-    @brief Generic function for creating rollback information
-
-    Creates a structure of type ::pllRollbackInfo and fills it with rollback
-    information about the move described in \a rearr. The rollback info
-    is stored in the PLL instance in a LIFO manner
-
-    @param tr
-      PLL instance
-
-    @param rearr
-      Description of the NNI move
-
-    @param numBranches
-      Number of partitions
-*/
-static void
-pllCreateRollbackInfo (pllInstance * tr, pllRearrangeInfo * rearr, int numBranches)
-{
-  switch (rearr->rearrangeType)
-   {
-     case PLL_REARRANGE_NNI:
-       pllCreateNniInfoRollback (tr, rearr);
-       break;
-     case PLL_REARRANGE_SPR:
-       pllCreateSprInfoRollback (tr, rearr, numBranches);
-       break;
-     default:
-       break;
-   }
-
-}
-
-
-/** @ingroup rearrangementGroup
-    @brief Rollback an SPR move
-
-    Perform a rollback (undo) on the last SPR move.
-    
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param ri
-      Rollback information
-*/
-static void
-pllRollbackSPR (partitionList * pr, pllRollbackInfo * ri)
-{
-  int numBranches;
-
-  numBranches = pr->perGeneBranchLengths ? pr->numberOfPartitions : 1;
-
-  hookup (ri->SPR.p->next,       ri->SPR.pn,      ri->SPR.zpn,  numBranches);
-  hookup (ri->SPR.p->next->next, ri->SPR.pnn,     ri->SPR.zpnn, numBranches); 
-  hookup (ri->SPR.p,             ri->SPR.p->back, ri->SPR.zp,   numBranches);
-  hookup (ri->SPR.q,             ri->SPR.r,       ri->SPR.zqr,  numBranches);
-
-  rax_free (ri);
-}
-
-/** @ingroup rearrangementGroup
-    @brief Rollback an NNI move
-
-    Perform a rollback (undo) on the last NNI move.
-    
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param ri
-      Rollback information
-*/
-static void
-pllRollbackNNI (pllInstance * tr, partitionList * pr, pllRollbackInfo * ri)
-{
-  nodeptr p = ri->NNI.origin;
-
-  pllTopologyPerformNNI(tr, p, ri->NNI.swapType);
-  pllUpdatePartials (tr, pr, p,       PLL_FALSE);
-  pllUpdatePartials (tr, pr, p->back, PLL_FALSE);
-  update (tr, pr, p);
-  pllEvaluateLikelihood (tr, pr, p, PLL_FALSE, PLL_FALSE);
-  
-  rax_free (ri);
-}
-
-/** @ingroup rearrangementGroup
-    @brief Rollback the last committed rearrangement move
-    
-    Perform a rollback (undo) on the last committed rearrangement move.
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @return
-      Returns \b PLL_TRUE is the rollback was successful, otherwise \b PLL_FALSE
-      (if no rollback was done)
-*/
-int 
-pllRearrangeRollback (pllInstance * tr, partitionList * pr)
-{
-  pllRollbackInfo * ri;
-  
-  ri = (pllRollbackInfo *) pllStackPop (&(tr->rearrangeHistory));
-  if (!ri) return (PLL_FALSE);
-
-  switch (ri->rearrangeType)
-   {
-     case PLL_REARRANGE_NNI:
-       pllRollbackNNI (tr, pr, ri);
-       break;
-     case PLL_REARRANGE_SPR:
-       pllRollbackSPR (pr, ri);
-       break;
-     default:
-       rax_free (ri);
-       return (PLL_FALSE);
-   }
-
-  return (PLL_TRUE);
-  
-}
-
-
-/** @ingroup rearrangementGroup
-    @brief Commit a rearrangement move
-
-    Applies the rearrangement move specified in \a rearr to the tree topology in \a tr. 
-    In case of SPR moves, if
-    \a tr->thoroughInsertion is set to \b PLL_TRUE, the new branch lengths are also optimized. 
-    The function stores rollback information in pllInstance::rearrangeHistory if \a saveRollbackInfo
-    is set to \b PLL_TRUE. This way, the rearrangement move can be rolled back (undone) by calling
-    ::pllRearrangeRollback
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param rearr
-      An element of a \a pllRearrangeInfo structure that contains information about the rearrangement move
-
-    @param saveRollbackInfo
-      If set to \b PLL_TRUE, rollback info will be kept for undoing the rearrangement move
-*/
-void
-pllRearrangeCommit (pllInstance * tr, partitionList * pr, pllRearrangeInfo * rearr, int saveRollbackInfo)
-{
-  int numBranches;
-
-  numBranches = pr->perGeneBranchLengths ? pr->numberOfPartitions : 1;
-
-  if (saveRollbackInfo)
-    pllCreateRollbackInfo (tr, rearr, numBranches);
-
-  switch (rearr->rearrangeType)
-   {
-     case PLL_REARRANGE_NNI:
-       pllTopologyPerformNNI(tr, rearr->NNI.originNode, rearr->NNI.swapType);
-       pllUpdatePartials (tr, pr, rearr->NNI.originNode, PLL_FALSE);
-       pllUpdatePartials (tr, pr, rearr->NNI.originNode->back, PLL_FALSE);
-       update (tr, pr, rearr->NNI.originNode);
-       pllEvaluateLikelihood (tr, pr, rearr->NNI.originNode, PLL_FALSE, PLL_FALSE);
-       break;
-     case PLL_REARRANGE_SPR:
-       removeNodeBIG (tr, pr, rearr->SPR.removeNode, numBranches);
-       insertBIG     (tr, pr, rearr->SPR.removeNode, rearr->SPR.insertNode);
-       break;
-     default:
-       break;
-   }
-}
-
-
-/******** new rearrangement functions ****************/
-
-/* change this to return the number of new elements in the list */
-/** @ingroup rearrangementGroup
-    @brief Search for rearrangement topologies
-    
-    Search for possible rearrangement moves of type \a rearrangeType in the
-    annular area defined by the minimal resp. maximal radii \a mintrav resp.
-    \a maxtrav. If the resulting likelihood is better than the current, try
-    to insert the move specification in \a bestList, which is a sorted list
-    that holds the rearrange info of the best moves sorted by likelihood
-    (desccending order).
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param rearrangeType
-      Type of rearrangement. Can be \b PLL_REARRANGE_SPR or \b PLL_REARRANGE_NNI
-
-    @param p
-      Point of origin, i.e. where to start searching from
-
-    @param mintrav
-      The minimal radius of the annulus
-
-    @param maxtrav
-      The maximal radius of the annulus
-
-    @param bestList
-      List that holds the details of the best rearrangement moves found
-
-    @note
-      If \a bestList is not empty, the existing entries will not be altered unless
-      better rearrangement moves (that means yielding better likelihood) are found
-      and the list is full, in which case the entries with the worst likelihood will be
-      thrown away.
-*/
-void
-pllRearrangeSearch (pllInstance * tr, partitionList * pr, int rearrangeType, nodeptr p, int mintrav, int maxtrav, pllRearrangeList * bestList)
-{
-  switch (rearrangeType)
-   {
-     case PLL_REARRANGE_SPR:
-       pllComputeSPR (tr, pr, p, mintrav, maxtrav, bestList);
-       break;
-
-     case PLL_REARRANGE_NNI:
-       pllSearchNNI (tr, pr, p, mintrav, maxtrav, bestList);
-       break;
-
-     case PLL_REARRANGE_TBR:
-       break;
-     default:
-       break;
-   }
-}
-
-
-static int
-determineRearrangementSetting(pllInstance *tr, partitionList *pr,
-    bestlist *bestT, bestlist *bt)
-{
-  int i, mintrav, maxtrav, bestTrav, impr, index, MaxFast, *perm = (int*) NULL;
-  double startLH;
-  pllBoolean cutoff;
-
-  MaxFast = 26;
-
-  startLH = tr->likelihood;
-
-  cutoff = tr->doCutoff;
-  tr->doCutoff = PLL_FALSE;
-
-  mintrav = 1;
-  maxtrav = 5;
-
-  bestTrav = maxtrav = 5;
-
-  impr = 1;
-
-  resetBestTree(bt);
-
-  if (tr->permuteTreeoptimize)
-    {
-      int n = tr->mxtips + tr->mxtips - 2;
-      perm = (int *) rax_malloc(sizeof(int) * (n + 1));
-      makePermutation(perm, n, tr);
-    }
-
-  while (impr && maxtrav < MaxFast)
-    {
-      recallBestTree(bestT, 1, tr, pr);
-      nodeRectifier(tr);
-
-      if (maxtrav > tr->ntips - 3)
-        maxtrav = tr->ntips - 3;
-
-      tr->startLH = tr->endLH = tr->likelihood;
-
-      for (i = 1; i <= tr->mxtips + tr->mxtips - 2; i++)
-        {
-
-          if (tr->permuteTreeoptimize)
-            index = perm[i];
-          else
-            index = i;
-
-          tr->bestOfNode = PLL_UNLIKELY;
-          if (rearrangeBIG(tr, pr, tr->nodep[index], mintrav, maxtrav))
-            {
-              if (tr->endLH > tr->startLH)
-                {
-                  restoreTreeFast(tr, pr);
-                  tr->startLH = tr->endLH = tr->likelihood;
-                }
-            }
-        }
-
-      pllOptimizeBranchLengths(tr, pr, 8);
-      saveBestTree(bt, tr,
-          pr->perGeneBranchLengths ? pr->numberOfPartitions : 1);
-
-      if (tr->likelihood > startLH)
-        {
-          startLH = tr->likelihood;
-          bestTrav = maxtrav;
-          impr = 1;
-        }
-      else
-        {
-          impr = 0;
-        }
-      maxtrav += 5;
-
-      if (tr->doCutoff)
-        {
-          tr->lhCutoff = (tr->lhAVG) / ((double) (tr->lhDEC));
-
-          tr->itCount = tr->itCount + 1;
-          tr->lhAVG = 0;
-          tr->lhDEC = 0;
-        }
-    }
-
-  recallBestTree(bt, 1, tr, pr);
-  tr->doCutoff = cutoff;
-
-  if (tr->permuteTreeoptimize)
-    rax_free(perm);
-
-  return bestTrav;
-}
-
-
-static void hash_dealloc_bipentry (void * entry)
-{
-  pllBipartitionEntry * e = (pllBipartitionEntry *)entry;
-
-  if(e->bitVector)     rax_free(e->bitVector);
-  if(e->treeVector)    rax_free(e->treeVector);
-  if(e->supportVector) rax_free(e->supportVector);
-
-}
-
-/** @ingroup rearrangementGroup
-    @brief RAxML algorithm for ML search
-
-    RAxML algorithm for searching the Maximum Likelihood tree and model.
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @param estimateModel
-      If true, model parameters are optimized in a ML framework.
-
-    @note
-      For datasets with a large number of taxa, setting tr->searchConvergenceCriterion to
-    PLL_TRUE can improve the execution time in up to 50% looking for topology convergence.
-*/
-int
-pllRaxmlSearchAlgorithm(pllInstance * tr, partitionList * pr,
-    pllBoolean estimateModel)
-{
-  pllEvaluateLikelihood(tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-  pllOptimizeBranchLengths(tr, pr, 32);
-
-  unsigned int vLength = 0;
-  int i, impr, bestTrav, rearrangementsMax = 0, rearrangementsMin = 0,
-      thoroughIterations = 0, fastIterations = 0;
-
-  double lh, previousLh, difference, epsilon;
-  bestlist *bestT, *bt;
-  infoList iList;
-  pllOptimizeBranchLengths(tr, pr, 32);
-
-  pllHashTable *h = NULL;
-  //hashtable *h = NULL;
-  unsigned int **bitVectors = (unsigned int**) NULL;
-
-  /* Security check... These variables might have not been initialized! */
-  if (tr->stepwidth == 0) tr->stepwidth = 5;
-  if (tr->max_rearrange == 0) tr->max_rearrange = 21;
-
-  if (tr->searchConvergenceCriterion)
-    {
-      bitVectors = initBitVector(tr->mxtips, &vLength);
-      //h = initHashTable(tr->mxtips * 4);
-      h = pllHashInit (tr->mxtips * 4);
-    }
-
-  bestT = (bestlist *) rax_malloc(sizeof(bestlist));
-  bestT->ninit = 0;
-  initBestTree(bestT, 1, tr->mxtips);
-
-  bt = (bestlist *) rax_malloc(sizeof(bestlist));
-  bt->ninit = 0;
-  initBestTree(bt, 20, tr->mxtips);
-
-  initInfoList(&iList, 50);
-
-  difference = 10.0;
-  epsilon = tr->likelihoodEpsilon;
-
-  tr->thoroughInsertion = 0;
-
-  if (estimateModel)
-    {
-      pllEvaluateLikelihood(tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-      pllOptimizeModelParameters(tr, pr, 10.0);
-    }
-  else
-    pllOptimizeBranchLengths(tr, pr, 64);
-
-  saveBestTree(bestT, tr,
-      pr->perGeneBranchLengths ? pr->numberOfPartitions : 1);
-
-  if (!tr->initialSet)
-    bestTrav = tr->bestTrav = determineRearrangementSetting(tr, pr, bestT, bt);
-  else
-    bestTrav = tr->bestTrav = tr->initial;
-
-  if (estimateModel)
-    {
-      pllEvaluateLikelihood(tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-      pllOptimizeModelParameters(tr, pr, 5.0);
-    }
-  else
-    pllOptimizeBranchLengths(tr, pr, 32);
-
-  saveBestTree(bestT, tr,
-      pr->perGeneBranchLengths ? pr->numberOfPartitions : 1);
-  impr = 1;
-  if (tr->doCutoff)
-    tr->itCount = 0;
-
-  while (impr)
-    {
-      recallBestTree(bestT, 1, tr, pr);
-
-      if (tr->searchConvergenceCriterion)
-        {
-          int bCounter = 0;
-
-          if (fastIterations > 1)
-            cleanupHashTable(h, (fastIterations % 2));
-
-          bitVectorInitravSpecial(bitVectors, tr->nodep[1]->back, tr->mxtips,
-              vLength, h, fastIterations % 2, PLL_BIPARTITIONS_RF,
-              (branchInfo *) NULL, &bCounter, 1, PLL_FALSE, PLL_FALSE, 0);
-
-          assert(bCounter == tr->mxtips - 3);
-
-          if (fastIterations > 0)
-            {
-              double rrf = convergenceCriterion(h, tr->mxtips);
-
-              if (rrf <= 0.01) /* 1% cutoff */
-                {
-                  cleanupHashTable(h, 0);
-                  cleanupHashTable(h, 1);
-                  goto cleanup_fast;
-                }
-            }
-        }
-
-      fastIterations++;
-
-      pllOptimizeBranchLengths(tr, pr, 32);
-
-      saveBestTree(bestT, tr,
-          pr->perGeneBranchLengths ? pr->numberOfPartitions : 1);
-
-      lh = previousLh = tr->likelihood;
-
-      treeOptimizeRapid(tr, pr, 1, bestTrav, bt, &iList);
-
-      impr = 0;
-
-      for (i = 1; i <= bt->nvalid; i++)
-        {
-          recallBestTree(bt, i, tr, pr);
-
-          pllOptimizeBranchLengths(tr, pr, 8);
-
-          difference = (
-              (tr->likelihood > previousLh) ?
-                  tr->likelihood - previousLh : previousLh - tr->likelihood);
-          if (tr->likelihood > lh && difference > epsilon)
-            {
-              impr = 1;
-              lh = tr->likelihood;
-              saveBestTree(bestT, tr,
-                  pr->perGeneBranchLengths ? pr->numberOfPartitions : 1);
-            }
-        }
-    }
-
-  if (tr->searchConvergenceCriterion)
-    {
-      cleanupHashTable(h, 0);
-      cleanupHashTable(h, 1);
-    }
-
-  cleanup_fast:
-
-  tr->thoroughInsertion = 1;
-  impr = 1;
-
-  recallBestTree(bestT, 1, tr, pr);
-  if (estimateModel)
-    {
-      pllEvaluateLikelihood(tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-      pllOptimizeModelParameters(tr, pr, 1.0);
-    }
-  else
-    pllOptimizeBranchLengths(tr, pr, 32);
-
-  while (1)
-    {
-      recallBestTree(bestT, 1, tr, pr);
-      if (impr)
-        {
-          rearrangementsMin = 1;
-          rearrangementsMax = tr->stepwidth;
-
-          if (tr->searchConvergenceCriterion)
-            {
-              int bCounter = 0;
-
-              if (thoroughIterations > 1)
-                cleanupHashTable(h, (thoroughIterations % 2));
-
-              bitVectorInitravSpecial(bitVectors, tr->nodep[1]->back,
-                  tr->mxtips, vLength, h, thoroughIterations % 2,
-                  PLL_BIPARTITIONS_RF, (branchInfo *) NULL, &bCounter, 1,
-                  PLL_FALSE, PLL_FALSE, 0);
-
-              assert(bCounter == tr->mxtips - 3);
-
-              if (thoroughIterations > 0)
-                {
-                  double rrf = convergenceCriterion(h, tr->mxtips);
-
-                  if (rrf <= 0.01) /* 1% cutoff */
-                    {
-                      goto cleanup;
-                    }
-                }
-            }
-
-          thoroughIterations++;
-        }
-      else
-        {
-          rearrangementsMax += tr->stepwidth;
-          rearrangementsMin += tr->stepwidth;
-          if (rearrangementsMax > tr->max_rearrange)
-            goto cleanup;
-        }
-      pllOptimizeBranchLengths(tr, pr, 32);
-
-      previousLh = lh = tr->likelihood;
-      saveBestTree(bestT, tr,
-          pr->perGeneBranchLengths ? pr->numberOfPartitions : 1);
-
-      treeOptimizeRapid(tr, pr, rearrangementsMin, rearrangementsMax, bt,
-          &iList);
-
-      impr = 0;
-
-      for (i = 1; i <= bt->nvalid; i++)
-        {
-          recallBestTree(bt, i, tr, pr);
-
-          pllOptimizeBranchLengths(tr, pr, 8);
-
-          difference = (
-              (tr->likelihood > previousLh) ?
-                  tr->likelihood - previousLh : previousLh - tr->likelihood);
-          if (tr->likelihood > lh && difference > epsilon)
-            {
-              impr = 1;
-              lh = tr->likelihood;
-              saveBestTree(bestT, tr,
-                  pr->perGeneBranchLengths ? pr->numberOfPartitions : 1);
-            }
-        }
-
-    }
-
-  cleanup:
-  if (tr->searchConvergenceCriterion)
-    {
-      freeBitVectors(bitVectors, 2 * tr->mxtips);
-      rax_free(bitVectors);
-      //freeHashTable(h);
-      //rax_free(h);
-      pllHashDestroy(&h, hash_dealloc_bipentry);
-    }
-
-  freeBestTree(bestT);
-  rax_free(bestT);
-  freeBestTree(bt);
-  rax_free(bt);
-
-  freeInfoList(&iList);
-
-  if (estimateModel) {
-      pllOptimizeModelParameters(tr, pr, epsilon);
-  }
-  pllOptimizeBranchLengths(tr, pr, 64);
-
-  return 0;
-}
-
diff --git a/pllrepo/src/semaphore.h b/pllrepo/src/semaphore.h
deleted file mode 100644
index c6e9407..0000000
--- a/pllrepo/src/semaphore.h
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Module: semaphore.h
- *
- * Purpose:
- *	Semaphores aren't actually part of the PThreads standard.
- *	They are defined by the POSIX Standard:
- *
- *		POSIX 1003.1b-1993	(POSIX.1b)
- *
- * --------------------------------------------------------------------------
- *
- *      Pthreads-win32 - POSIX Threads Library for Win32
- *      Copyright(C) 1998 John E. Bossom
- *      Copyright(C) 1999,2005 Pthreads-win32 contributors
- * 
- *      Contact Email: rpj at callisto.canberra.edu.au
- * 
- *      The current list of contributors is contained
- *      in the file CONTRIBUTORS included with the source
- *      code distribution. The list can also be seen at the
- *      following World Wide Web location:
- *      http://sources.redhat.com/pthreads-win32/contributors.html
- * 
- *      This library is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU Lesser General Public
- *      License as published by the Free Software Foundation; either
- *      version 2 of the License, or (at your option) any later version.
- * 
- *      This library is distributed in the hope that it will be useful,
- *      but WITHOUT ANY WARRANTY; without even the implied warranty of
- *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *      Lesser General Public License for more details.
- * 
- *      You should have received a copy of the GNU Lesser General Public
- *      License along with this library in the file COPYING.LIB;
- *      if not, write to the Free Software Foundation, Inc.,
- *      59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- */
-#if !defined( SEMAPHORE_H )
-#define SEMAPHORE_H
-
-#undef PTW32_SEMAPHORE_LEVEL
-
-#if defined(_POSIX_SOURCE)
-#define PTW32_SEMAPHORE_LEVEL 0
-/* Early POSIX */
-#endif
-
-#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309
-#undef PTW32_SEMAPHORE_LEVEL
-#define PTW32_SEMAPHORE_LEVEL 1
-/* Include 1b, 1c and 1d */
-#endif
-
-#if defined(INCLUDE_NP)
-#undef PTW32_SEMAPHORE_LEVEL
-#define PTW32_SEMAPHORE_LEVEL 2
-/* Include Non-Portable extensions */
-#endif
-
-#define PTW32_SEMAPHORE_LEVEL_MAX 3
-
-#if !defined(PTW32_SEMAPHORE_LEVEL)
-#define PTW32_SEMAPHORE_LEVEL PTW32_SEMAPHORE_LEVEL_MAX
-/* Include everything */
-#endif
-
-#if defined(__GNUC__) && ! defined (__declspec)
-# error Please upgrade your GNU compiler to one that supports __declspec.
-#endif
-
-/*
- * When building the library, you should define PTW32_BUILD so that
- * the variables/functions are exported correctly. When using the library,
- * do NOT define PTW32_BUILD, and then the variables/functions will
- * be imported correctly.
- */
-#if !defined(PTW32_STATIC_LIB)
-#  if defined(PTW32_BUILD)
-#    define PTW32_DLLPORT __declspec (dllexport)
-#  else
-#    define PTW32_DLLPORT __declspec (dllimport)
-#  endif
-#else
-#  define PTW32_DLLPORT
-#endif
-
-/*
- * This is a duplicate of what is in the autoconf config.h,
- * which is only used when building the pthread-win32 libraries.
- */
-
-#if !defined(PTW32_CONFIG_H)
-#  if defined(WINCE)
-#    define NEED_ERRNO
-#    define NEED_SEM
-#  endif
-#  if defined(__MINGW64__)
-#    define HAVE_STRUCT_TIMESPEC
-#    define HAVE_MODE_T
-#  elif defined(_UWIN) || defined(__MINGW32__)
-#    define HAVE_MODE_T
-#  endif
-#endif
-
-/*
- *
- */
-
-#if PTW32_SEMAPHORE_LEVEL >= PTW32_SEMAPHORE_LEVEL_MAX
-#if defined(NEED_ERRNO)
-#include "need_errno.h"
-#else
-#include <errno.h>
-#endif
-#endif /* PTW32_SEMAPHORE_LEVEL >= PTW32_SEMAPHORE_LEVEL_MAX */
-
-#define _POSIX_SEMAPHORES
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif				/* __cplusplus */
-
-#if !defined(HAVE_MODE_T)
-typedef unsigned int mode_t;
-#endif
-
-
-typedef struct sem_t_ * sem_t;
-
-PTW32_DLLPORT int __cdecl sem_init (sem_t * sem,
-			    int pshared,
-			    unsigned int value);
-
-PTW32_DLLPORT int __cdecl sem_destroy (sem_t * sem);
-
-PTW32_DLLPORT int __cdecl sem_trywait (sem_t * sem);
-
-PTW32_DLLPORT int __cdecl sem_wait (sem_t * sem);
-
-PTW32_DLLPORT int __cdecl sem_timedwait (sem_t * sem,
-				 const struct timespec * abstime);
-
-PTW32_DLLPORT int __cdecl sem_post (sem_t * sem);
-
-PTW32_DLLPORT int __cdecl sem_post_multiple (sem_t * sem,
-				     int count);
-
-PTW32_DLLPORT int __cdecl sem_open (const char * name,
-			    int oflag,
-			    mode_t mode,
-			    unsigned int value);
-
-PTW32_DLLPORT int __cdecl sem_close (sem_t * sem);
-
-PTW32_DLLPORT int __cdecl sem_unlink (const char * name);
-
-PTW32_DLLPORT int __cdecl sem_getvalue (sem_t * sem,
-				int * sval);
-
-#if defined(__cplusplus)
-}				/* End of extern "C" */
-#endif				/* __cplusplus */
-
-#undef PTW32_SEMAPHORE_LEVEL
-#undef PTW32_SEMAPHORE_LEVEL_MAX
-
-#endif				/* !SEMAPHORE_H */
diff --git a/pllrepo/src/ssort.c b/pllrepo/src/ssort.c
deleted file mode 100644
index b08cbe7..0000000
--- a/pllrepo/src/ssort.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file ssort.c
- * Detailed description to appear soon.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include "mem_alloc.h"
-
-/*  string sorting implementation from:
- *  Bentley J. L., Sedgewick R.: Fast Algorithms for Sorting and Searching 
- *  Strings. In Proceedings of ACM-SIAM Symposium on Discrete Algorithms 
- *  (SODA) 1997.
- */
-
-static void 
-vecswap (int i, int j, int n, char ** x, int * oi)
-{
-  while (n-- > 0)
-   {
-     PLL_SWAP_PTR (x[i], x[j]);
-     PLL_SWAP_INT (oi[i], oi[j]);
-
-     ++ i; ++ j;
-   }
-}
-
-static void 
-ssort1 (char ** x, int n, int depth, int * oi)
-{
-  int           a, b, c, d, r, v;
-
-  if (n <= 1) return;
-
-  a = rand() % n;
-
-  PLL_SWAP_PTR (x[0], x[a]);
-  PLL_SWAP_INT (oi[0], oi[a]);
-
-  v = x[0][depth];
-
-  a = b = 1;
-  c = d = n - 1;
-
-  for (;;)
-   {
-     while (b <= c && (r = x[b][depth] - v) <= 0)
-      {
-        if (r == 0)
-         {
-           PLL_SWAP_PTR (x[a], x[b]);
-           PLL_SWAP_INT (oi[a], oi[b]);
-           ++ a;
-         }
-        ++ b;
-      }
-     while (b <= c && (r = x[c][depth] - v) >= 0)
-      {
-        if (r == 0)
-         {
-           PLL_SWAP_PTR (x[c], x[d]);
-           PLL_SWAP_INT (oi[c], oi[d]);
-           -- d;
-         }
-        -- c;
-      }
-     if (b > c) break;
-     PLL_SWAP_PTR (x[b], x[c]);
-     PLL_SWAP_INT (oi[b], oi[c]);
-     ++ b; -- c;
-   }
-  r = PLL_MIN (a,     b - a);      vecswap (0, b - r, r, x, oi);
-  r = PLL_MIN (d - c, n - d - 1);  vecswap (b, n - r, r, x, oi);
-  r = b - a; ssort1 (x, r, depth, oi);
-  if (x[r][depth] != 0)
-   {
-     ssort1 (x + r, a + n - d - 1, depth + 1, oi + r);
-   }
-  r = d - c; ssort1 (x + n - r, r, depth, oi + n - r);
-}
-
-int * 
-pllssort1main (char ** x, int n)
-{
-  int * oi;
-  int i;
-
-  oi = (int *) rax_malloc (n * sizeof (int));
-  for (i = 0; i < n; ++ i)
-   {
-     oi[i] = i;
-   }
-  ssort1 (x, n, 0, oi);
-  
-  return (oi);
-}
-
diff --git a/pllrepo/src/stack.c b/pllrepo/src/stack.c
deleted file mode 100644
index 062cf2e..0000000
--- a/pllrepo/src/stack.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file stack.c
- * @brief Generic stack implementation
- *
- * Detailed description to appear soon.
- */
-#include <stdio.h>
-#include "stack.h"
-#include "mem_alloc.h"
-
-int pllStackSize (pllStack ** stack)
-{
-  pllStack * top;
-  int size = 0;
-  top = *stack;
- 
-  while (top)
-  {
-    ++ size;
-    top = top->next;
-  }
-  
-  return (size);
-}
-
-int 
-pllStackPush (pllStack ** head, void * item)
-{
-  pllStack * new;
- 
-  new = (pllStack *) rax_malloc (sizeof (pllStack));
-  if (!new) return (0);
- 
-  new->item = item;
-  new->next = *head;
-  *head     = new;
- 
-  return (1);
-}
-
-void * pllStackPop (pllStack ** head)
-{
-  void * item;
-  pllStack * tmp;
-  if (!*head) return (NULL);
- 
-  tmp     = (*head);
-  item    = (*head)->item;
-  (*head) = (*head)->next;
-  rax_free (tmp);
- 
-  return (item);
-}
- 
-void 
-pllStackClear (pllStack ** stack)
-{
-  while (*stack) pllStackPop (stack);
-}
-
diff --git a/pllrepo/src/stack.h b/pllrepo/src/stack.h
deleted file mode 100644
index 2ec64bd..0000000
--- a/pllrepo/src/stack.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file stack.h
- * @brief Generic stack implementation
- *
- * Detailed description to appear soon.
- */
-#ifndef __pll_STACK__
-#define __pll_STACK__
-
-struct pllStack
-{
-  void * item;
-  struct pllStack * next;
-};
-
-typedef struct pllStack pllStack;
-
-void  pllStackClear (pllStack ** stack);
-void * pllStackPop (pllStack ** head);
-int pllStackPush (pllStack ** head, void * item);
-int pllStackSize (pllStack ** stack);
-
-#endif
diff --git a/pllrepo/src/topologies.c b/pllrepo/src/topologies.c
deleted file mode 100644
index f19bf3d..0000000
--- a/pllrepo/src/topologies.c
+++ /dev/null
@@ -1,778 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file topologies.c
- * @brief Miscellanous functions working with tree topology
-*/
-#include "mem_alloc.h"
-
-#ifndef WIN32
-#include <sys/times.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <unistd.h> 
-#endif
-
-#include <math.h>
-#include <time.h> 
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <assert.h>
-
-#include "pll.h"
-#include "pllInternal.h"
-
-static void saveTopolRELLRec(pllInstance *tr, nodeptr p, topolRELL *tpl, int *i, int numsp)
-{
-  int k;
-  if(isTip(p->number, numsp))
-    return;
-  else
-    {
-      nodeptr q = p->next;      
-      while(q != p)
-	{	  
-	  tpl->connect[*i].p = q;
-	  tpl->connect[*i].q = q->back; 
-	  
-	  if(tr->grouped ||  tr->constrained)
-	    {
-	      tpl->connect[*i].cp = tr->constraintVector[q->number];
-	      tpl->connect[*i].cq = tr->constraintVector[q->back->number]; 
-	    }
-	  
-	  for(k = 0; k < PLL_NUM_BRANCHES; k++)
-	    tpl->connect[*i].z[k] = q->z[k];
-	  *i = *i + 1;
-
-	  saveTopolRELLRec(tr, q->back, tpl, i, numsp);
-	  q = q->next;
-	}
-    }
-}
-
-static void saveTopolRELL(pllInstance *tr, topolRELL *tpl)
-{
-  nodeptr p = tr->start;
-  int k, i = 0;
-      
-  tpl->likelihood = tr->likelihood;
-  tpl->start      = 1;
-      
-  tpl->connect[i].p = p;
-  tpl->connect[i].q = p->back;
-  
-  if(tr->grouped ||  tr->constrained)
-    {
-      tpl->connect[i].cp = tr->constraintVector[p->number];
-      tpl->connect[i].cq = tr->constraintVector[p->back->number]; 
-    }
-
-  for(k = 0; k < PLL_NUM_BRANCHES; k++)
-    tpl->connect[i].z[k] = p->z[k];
-  i++;
-      
-  saveTopolRELLRec(tr, p->back, tpl, &i, tr->mxtips);
-
-  assert(i == 2 * tr->mxtips - 3);
-}
-
-
-static void restoreTopolRELL(pllInstance *tr, topolRELL *tpl, int numBranches)
-{
-  int i;
-  
-  for (i = 0; i < 2 * tr->mxtips - 3; i++) 
-    {
-      hookup(tpl->connect[i].p, tpl->connect[i].q, tpl->connect[i].z,  numBranches);
-      tr->constraintVector[tpl->connect[i].p->number] = tpl->connect[i].cp;
-      tr->constraintVector[tpl->connect[i].q->number] = tpl->connect[i].cq;
-    }
-  
-
-  tr->likelihood = tpl->likelihood;
-  tr->start      = tr->nodep[tpl->start];
-  /* TODO */
-}
-
-
-
-/** @brief Initializes space as large as the tree
-  *
-  * @param rl
-  *   RELL 
-  *
-  * @param tr
-  *   PLL instance
-  *
-  * @param n
-  *   Number of
-  *
-  * @todo
-  *   Don't know what is this used for. Something with RELL?
-  *
-  */
-void initTL(topolRELL_LIST *rl, pllInstance *tr, int n)
-{
-  int i;
-
-  rl->max = n; 
-  rl->t = (topolRELL **)rax_malloc(sizeof(topolRELL *) * n);
-
-  for(i = 0; i < n; i++)
-    {
-      rl->t[i] = (topolRELL *)rax_malloc(sizeof(topolRELL));
-      rl->t[i]->connect = (connectRELL *)rax_malloc((2 * tr->mxtips - 3) * sizeof(connectRELL));
-      rl->t[i]->likelihood = PLL_UNLIKELY;     
-    }
-}
-
-/** @brief Deallocate the space associated with this structure
-  *
-  * @paral rl
-  *   This structure
-  *
-  * @todo
-  *   fill the description
-  */
-void freeTL(topolRELL_LIST *rl)
-{
-  int i;
-  for(i = 0; i < rl->max; i++)    
-    {
-      rax_free(rl->t[i]->connect);          
-      rax_free(rl->t[i]);
-    }
-  rax_free(rl->t);
-}
-
-
-void restoreTL(topolRELL_LIST *rl, pllInstance *tr, int n, int numBranches)
-{
-  assert(n >= 0 && n < rl->max);    
-
-  restoreTopolRELL(tr, rl->t[n], numBranches);
-}
-
-
-
-/** @brief Reset this structure
-  *
-  * Reset the likelihoods in this structure
-  *
-  * @param rl
-  *   This structure
-  *
-  * @todo
-  *   Complete this
-  */
-void resetTL(topolRELL_LIST *rl)
-{
-  int i;
-
-  for(i = 0; i < rl->max; i++)    
-    rl->t[i]->likelihood = PLL_UNLIKELY;          
-}
-
-
-
-/** @brief Save 
-  *
-  * Save this topology?
-  *
-  * @todo 
-  *  Complete this
-  */
-void saveTL(topolRELL_LIST *rl, pllInstance *tr, int index)
-{ 
-  assert(index >= 0 && index < rl->max);    
-    
-  if(tr->likelihood > rl->t[index]->likelihood)        
-    saveTopolRELL(tr, rl->t[index]); 
-}
-
-
-static void  *tipValPtr (nodeptr p)
-{ 
-  return  (void *) & p->number;
-}
-
-
-static int  cmpTipVal (void *v1, void *v2)
-{
-  int  i1, i2;
-  
-  i1 = *((int *) v1);
-  i2 = *((int *) v2);
-  return  (i1 < i2) ? -1 : ((i1 == i2) ? 0 : 1);
-}
-
-
-/*  These are the only routines that need to UNDERSTAND topologies */
-
-/** @brief Allocate and initialize space for a tree topology
-    
-    Allocate and initialize a \a topol structure for a tree topology of
-    \a maxtips tips
-
-    @param
-      Number of tips of topology
-
-    @return
-      Pointer to the allocated \a topol structure
-*/
-topol  *setupTopol (int maxtips)
-{
-  topol   *tpl;
-
-  if (! (tpl = (topol *) rax_malloc(sizeof(topol))) || 
-      ! (tpl->links = (connptr) rax_malloc((2*maxtips-3) * sizeof(pllConnect))))
-    {
-      printf("ERROR: Unable to get topology memory");
-      tpl = (topol *) NULL;
-    }
-  else 
-    {
-      tpl->likelihood  = PLL_UNLIKELY;
-      tpl->start       = (node *) NULL;
-      tpl->nextlink    = 0;
-      tpl->ntips       = 0;
-      tpl->nextnode    = 0;    
-      tpl->scrNum      = 0;     /* position in sorted list of scores */
-      tpl->tplNum      = 0;     /* position in sorted list of trees */	      
-    }
-  
-  return  tpl;
-} 
-
-
-/** @brief Deallocate the space occupied by a \a topol structure
-    
-    Deallocate the space occupied by a \a topol structure
-
-    @param tpl
-      The \a topol structure that is to be deallocated
-*/
-void freeTopol (topol *tpl)
-{
-  rax_free(tpl->links);
-  rax_free(tpl);
-} 
-
-
-static int saveSubtree (nodeptr p, topol *tpl, int numsp, int numBranches)  
-{
-  connptr  r, r0;
-  nodeptr  q, s;
-  int      t, t0, t1, k;
-
-  r0 = tpl->links;
-  r = r0 + (tpl->nextlink)++;
-  r->p = p;
-  r->q = q = p->back;
-
-  for(k = 0; k < numBranches; k++)
-    r->z[k] = p->z[k];
-
-  r->descend = 0;                     /* No children (yet) */
-
-  if (isTip(q->number, numsp)) 
-    {
-      r->valptr = tipValPtr(q);         /* Assign value */
-    }
-  else 
-    {                              /* Internal node, look at children */
-      s = q->next;                      /* First child */
-      do 
-	{
-	  t = saveSubtree(s, tpl, numsp, numBranches);        /* Generate child's subtree */
-
-	  t0 = 0;                         /* Merge child into list */
-	  t1 = r->descend;
-	  while (t1 && (cmpTipVal(r0[t1].valptr, r0[t].valptr) < 0)) {
-	    t0 = t1;
-	    t1 = r0[t1].sibling;
-          }
-	  if (t0) r0[t0].sibling = t;  else  r->descend = t;
-	  r0[t].sibling = t1;
-
-	  s = s->next;                    /* Next child */
-        } while (s != q);
-
-      r->valptr = r0[r->descend].valptr;   /* Inherit first child's value */
-      }                                 /* End of internal node processing */
-
-  return  (r - r0);
-}
-
-/** @brief Get the node with the smallest tip value
-    
-    Recursively finds and returns the tip with the smallest value around a node
-    \a p0, or returns \a p0 if it is a tip.
-
-    @param p0
-      Node around which to at which the recursion starts
-
-    @param numsp
-      Number of species (tips) in the tree
-
-    @todo
-      Why do we return p0 immediately if it is a tip? Perhaps one of the two other nodes,
-      i.e. p0->next and p0->next->next, is a tip as well with a smaller number than p0.
-*/
-static nodeptr minSubtreeTip (nodeptr  p0, int numsp)
-{ 
-  nodeptr  minTip, p, testTip;
-
-  if (isTip(p0->number, numsp)) 
-    return p0;
-
-  p = p0->next;
-
-  minTip = minSubtreeTip(p->back, numsp);
-
-  while ((p = p->next) != p0) 
-    {
-      testTip = minSubtreeTip(p->back, numsp);
-      if (cmpTipVal(tipValPtr(testTip), tipValPtr(minTip)) < 0)
-        minTip = testTip;
-    }
-  return minTip;
-} 
-
-
-/** @brief
-*/
-static nodeptr  minTreeTip (nodeptr  p, int numsp)
-{
-  nodeptr  minp, minpb;
-
-  minp  = minSubtreeTip(p, numsp);
-  minpb = minSubtreeTip(p->back, numsp);
-  return (cmpTipVal(tipValPtr(minp), tipValPtr(minpb)) < 0 ? minp : minpb);
-}
-
-/** @brief Save the tree topology in a \a topol structure
-    
-    Save the current tree topology in \a topol structure \a tpl.
-
-*/
-void saveTree (pllInstance *tr, topol *tpl, int numBranches)
-/*  Save a tree topology in a standard order so that first branches
- *  from a node contain lower value tips than do second branches from
- *  the node.  The root tip should have the lowest value of all.
- */
-{
-  connptr  r;  
-  
-  tpl->nextlink = 0;                             /* Reset link pointer */
-  r = tpl->links + saveSubtree(minTreeTip(tr->start, tr->mxtips), tpl, tr->mxtips, numBranches);  /* Save tree */
-  r->sibling = 0;
-  
-  tpl->likelihood = tr->likelihood;
-  tpl->start      = tr->start;
-  tpl->ntips      = tr->ntips;
-  tpl->nextnode   = tr->nextnode;    
-  
-} /* saveTree */
-
-
-/* @brief Transform tree to a given topology and evaluate likelihood
-
-   Transform our current tree topology to the one stored in \a tpl and
-   evaluates the likelihood
-
-   @param tr
-     PLL instance
-
-   @param pr
-     List of partitions
-
-   @return
-     \b PLL_TRUE
-
-   @todo
-     Remove the return value, unnecessary
-
-*/
-pllBoolean restoreTree (topol *tpl, pllInstance *tr, partitionList *pr)
-{ 
-  connptr  r;
-  nodeptr  p, p0;    
-  int  i;
-
-  /* first of all set all backs to NULL so that tips do not point anywhere */
-  for (i = 1; i <= 2*(tr->mxtips) - 2; i++) 
-    {  
-      /* Uses p = p->next at tip */
-      p0 = p = tr->nodep[i];
-      do 
-	{
-	  p->back = (nodeptr) NULL;
-	  p = p->next;
-	} 
-      while (p != p0);
-    }
-
-  /*  Copy connections from topology */
-
-  /* then connect the nodes together */
-  for (r = tpl->links, i = 0; i < tpl->nextlink; r++, i++)     
-    hookup(r->p, r->q, r->z, pr->perGeneBranchLengths?pr->numberOfPartitions:1);
-
-  tr->likelihood = tpl->likelihood;
-  tr->start      = tpl->start;
-  tr->ntips      = tpl->ntips;
-  
-  tr->nextnode   = tpl->nextnode;    
-
-  pllEvaluateLikelihood (tr, pr, tr->start, PLL_TRUE, PLL_FALSE);
-  return PLL_TRUE;
-}
-
-
-
-/** @brief Initialize a list of best trees
-    
-    Initialize a list that will contain the best \a newkeep tree topologies,
-    i.e. the ones that yield the best likelihood. Inside the list initialize
-    space for \a newkeep + 1 topologies of \a numsp tips. The additional
-    topology is the starting one
-
-    @param bt
-      Pointer to \a bestlist to be initialized
-
-    @param newkeep
-      Number of new topologies to keep
-
-    @param numsp
-      Number of species (tips)
-
-    @return
-      number of tree topology slots in the list (minus the starting one)
-
-    @todo
-      Is there a reason that this function is so complex? Many of the checks
-      are unnecessary as the function is called only at two places in the
-      code with newkeep=1 and newkeep=20
-*/
-int initBestTree (bestlist *bt, int newkeep, int numsp)
-{ /* initBestTree */
-  int  i;
-
-  bt->nkeep = 0;
-
-  if (bt->ninit <= 0) 
-    {
-      if (! (bt->start = setupTopol(numsp)))  return  0;
-      bt->ninit    = -1;
-      bt->nvalid   = 0;
-      bt->numtrees = 0;
-      bt->best     = PLL_UNLIKELY;
-      bt->improved = PLL_FALSE;
-      bt->byScore  = (topol **) rax_malloc((newkeep + 1) * sizeof(topol *));
-      bt->byTopol  = (topol **) rax_malloc((newkeep + 1) * sizeof(topol *));
-      if (! bt->byScore || ! bt->byTopol) {
-        printf( "initBestTree: malloc failure\n");
-        return 0;
-      }
-    }
-  else if (PLL_ABS(newkeep) > bt->ninit) {
-    if (newkeep <  0) newkeep = -(bt->ninit);
-    else newkeep = bt->ninit;
-  }
-
-  if (newkeep < 1) {    /*  Use negative newkeep to clear list  */
-    newkeep = -newkeep;
-    if (newkeep < 1) newkeep = 1;
-    bt->nvalid = 0;
-    bt->best = PLL_UNLIKELY;
-  }
-  
-  if (bt->nvalid >= newkeep) {
-    bt->nvalid = newkeep;
-    bt->worst = bt->byScore[newkeep]->likelihood;
-  }
-  else 
-    {
-      bt->worst = PLL_UNLIKELY;
-    }
-  
-  for (i = bt->ninit + 1; i <= newkeep; i++) 
-    {    
-      if (! (bt->byScore[i] = setupTopol(numsp)))  break;
-      bt->byTopol[i] = bt->byScore[i];
-      bt->ninit = i;
-    }
-  
-  return  (bt->nkeep = PLL_MIN(newkeep, bt->ninit));
-} /* initBestTree */
-
-
-
-void resetBestTree (bestlist *bt)
-{ /* resetBestTree */
-  bt->best     = PLL_UNLIKELY;
-  bt->worst    = PLL_UNLIKELY;
-  bt->nvalid   = 0;
-  bt->improved = PLL_FALSE;
-} /* resetBestTree */
-
-
-pllBoolean  freeBestTree(bestlist *bt)
-{ /* freeBestTree */
-  while (bt->ninit >= 0)  freeTopol(bt->byScore[(bt->ninit)--]);
-    
-  /* VALGRIND */
-
-  rax_free(bt->byScore);
-  rax_free(bt->byTopol);
-
-  /* VALGRIND END */
-
-  freeTopol(bt->start);
-  return PLL_TRUE;
-} /* freeBestTree */
-
-
-/*  Compare two trees, assuming that each is in standard order.  Return
- *  -1 if first preceeds second, 0 if they are identical, or +1 if first
- *  follows second in standard order.  Lower number tips preceed higher
- *  number tips.  A tip preceeds a corresponding internal node.  Internal
- *  nodes are ranked by their lowest number tip.
- */
-
-static int  cmpSubtopol (connptr p10, connptr p1, connptr p20, connptr p2)
-{
-  connptr  p1d, p2d;
-  int  cmp;
-  
-  if (! p1->descend && ! p2->descend)          /* Two tips */
-    return cmpTipVal(p1->valptr, p2->valptr);
-  
-  if (! p1->descend) return -1;                /* p1 = tip, p2 = node */
-  if (! p2->descend) return  1;                /* p2 = tip, p1 = node */
-  
-  p1d = p10 + p1->descend;
-  p2d = p20 + p2->descend;
-  while (1) {                                  /* Two nodes */
-    if ((cmp = cmpSubtopol(p10, p1d, p20, p2d)))  return cmp; /* Subtrees */
-    if (! p1d->sibling && ! p2d->sibling)  return  0; /* Lists done */
-    if (! p1d->sibling) return -1;             /* One done, other not */
-    if (! p2d->sibling) return  1;             /* One done, other not */
-    p1d = p10 + p1d->sibling;                  /* Neither done */
-    p2d = p20 + p2d->sibling;
-  }
-}
-
-
-
-static int  cmpTopol (void *tpl1, void *tpl2)
-{ 
-  connptr  r1, r2;
-  int      cmp;    
-  
-  r1 = ((topol *) tpl1)->links;
-  r2 = ((topol *) tpl2)->links;
-  cmp = cmpTipVal(tipValPtr(r1->p), tipValPtr(r2->p));
-  if (cmp)      	
-    return cmp;     
-  return  cmpSubtopol(r1, r1, r2, r2);
-} 
-
-
-
-static int  cmpTplScore (void *tpl1, void *tpl2)
-{ 
-  double  l1, l2;
-  
-  l1 = ((topol *) tpl1)->likelihood;
-  l2 = ((topol *) tpl2)->likelihood;
-  return  (l1 > l2) ? -1 : ((l1 == l2) ? 0 : 1);
-}
-
-
-
-/*  Find an item in a sorted list of n items.  If the item is in the list,
- *  return its index.  If it is not in the list, return the negative of the
- *  position into which it should be inserted.
- */
-
-static int  findInList (void *item, void *list[], int n, int (* cmpFunc)(void *, void *))
-{
-  int  mid, hi, lo, cmp = 0;
-  
-  if (n < 1) return  -1;                    /*  No match; first index  */
-  
-  lo = 1;
-  mid = 0;
-  hi = n;
-  while (lo < hi) {
-    mid = (lo + hi) >> 1;
-    cmp = (* cmpFunc)(item, list[mid-1]);
-    if (cmp) {
-      if (cmp < 0) hi = mid;
-      else lo = mid + 1;
-    }
-    else  return  mid;                        /*  Exact match  */
-  }
-  
-  if (lo != mid) {
-    cmp = (* cmpFunc)(item, list[lo-1]);
-    if (cmp == 0) return lo;
-  }
-  if (cmp > 0) lo++;                         /*  Result of step = 0 test  */
-  return  -lo;
-} 
-
-
-
-static int  findTreeInList (bestlist *bt, pllInstance *tr, int numBranches)
-{
-  topol  *tpl;
-  
-  tpl = bt->byScore[0];
-  saveTree(tr, tpl, numBranches);
-  return  findInList((void *) tpl, (void **) (& (bt->byTopol[1])),
-		     bt->nvalid, cmpTopol);
-} 
-
-
-/** @brief Save the current tree in the \a bestlist structure
-    
-    Save the current tree topology in \a bestlist structure \a bt.
-
-    @param tr
-      The PLL instance
-    
-    @param bt
-      The \a bestlist structure
-    
-    @param numBranches
-      Number of branches u
-
-    @return
-      it is never used
-
-    @todo
-      What to do with the return value? Should we simplify the code?
-*/
-int  saveBestTree (bestlist *bt, pllInstance *tr, int numBranches)
-{    
-  topol  *tpl, *reuse;
-  int  tplNum, scrNum, reuseScrNum, reuseTplNum, i, oldValid, newValid;
-  
-  tplNum = findTreeInList(bt, tr, numBranches);
-  tpl = bt->byScore[0];
-  oldValid = newValid = bt->nvalid;
-  
-  if (tplNum > 0) {                      /* Topology is in list  */
-    reuse = bt->byTopol[tplNum];         /* Matching topol  */
-    reuseScrNum = reuse->scrNum;
-    reuseTplNum = reuse->tplNum;
-  }
-  /* Good enough to keep? */
-  else if (tr->likelihood < bt->worst)  return 0;
-  
-  else {                                 /* Topology is not in list */
-    tplNum = -tplNum;                    /* Add to list (not replace) */
-    if (newValid < bt->nkeep) bt->nvalid = ++newValid;
-    reuseScrNum = newValid;              /* Take worst tree */
-    reuse = bt->byScore[reuseScrNum];
-    reuseTplNum = (newValid > oldValid) ? newValid : reuse->tplNum;
-    if (tr->likelihood > bt->start->likelihood) bt->improved = PLL_TRUE;
-  }
-  
-  scrNum = findInList((void *) tpl, (void **) (& (bt->byScore[1])),
-		      oldValid, cmpTplScore);
-  scrNum = PLL_ABS(scrNum);
-  
-  if (scrNum < reuseScrNum)
-    for (i = reuseScrNum; i > scrNum; i--)
-      (bt->byScore[i] = bt->byScore[i-1])->scrNum = i;
-  
-  else if (scrNum > reuseScrNum) {
-    scrNum--;
-    for (i = reuseScrNum; i < scrNum; i++)
-      (bt->byScore[i] = bt->byScore[i+1])->scrNum = i;
-  }
-  
-  if (tplNum < reuseTplNum)
-    for (i = reuseTplNum; i > tplNum; i--)
-      (bt->byTopol[i] = bt->byTopol[i-1])->tplNum = i;
-  
-  else if (tplNum > reuseTplNum) {
-    tplNum--;
-    for (i = reuseTplNum; i < tplNum; i++)
-      (bt->byTopol[i] = bt->byTopol[i+1])->tplNum = i;
-  }
-  
-  
-  
-  tpl->scrNum = scrNum;
-  tpl->tplNum = tplNum;
-  bt->byTopol[tplNum] = bt->byScore[scrNum] = tpl;
-  bt->byScore[0] = reuse;
-  
-  if (scrNum == 1)  bt->best = tr->likelihood;
-  if (newValid == bt->nkeep) bt->worst = bt->byScore[newValid]->likelihood;
-  
-  return  scrNum;
-} 
-
-
-/** @brief Restore the best tree from \a bestlist structure
-    
-    Restore the \a rank-th best tree from the \a bestlist structure \a bt.
-
-    @param bt
-      The \a bestlist structure containing the stored best trees
-
-    @param rank
-      The rank (by score) of the tree we want to retrieve
-
-    @param tr
-      PLL instance
-
-    @param pr
-      List of partitions
-
-    @return
-      Index (rank) of restored topology in \a bestlist
-*/
-int  recallBestTree (bestlist *bt, int rank, pllInstance *tr, partitionList *pr)
-{ 
-  if (rank < 1)  rank = 1;
-  if (rank > bt->nvalid)  rank = bt->nvalid;
-  if (rank > 0)  if (! restoreTree(bt->byScore[rank], tr, pr)) return PLL_FALSE;
-  return  rank;
-}
-
-
-
-
diff --git a/pllrepo/src/trash.c b/pllrepo/src/trash.c
deleted file mode 100644
index 5247c25..0000000
--- a/pllrepo/src/trash.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file trash.c
- */
-#include "mem_alloc.h"
-
-#ifndef WIN32
-#include <sys/times.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <unistd.h>  
-#endif
-
-#include <limits.h>
-#include <math.h>
-#include <time.h> 
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <assert.h>
-
-#include "pll.h"
-#include "pllInternal.h"
- 
-  
-/** @brief Reorder nodes in PLL tree
-
-    Re-order the internal nodes of the tree of PLL instance \a tr in a preorder
-    traversal such that they start from \a p
-    
-    @param tr
-      PLL instance
-
-    @param np
-      Array of node pointers
-
-    @param p
-      Node from where the preorder traversal should start
-
-    @param count
-
-    @todo
-      why not insert a break in the for loop when the node is found?
-
-*/
-static void reorderNodes(pllInstance *tr, nodeptr *np, nodeptr p, int *count)
-{
-  int i, found = 0;
-
-  if(isTip(p->number, tr->mxtips))    
-    return;
-  else
-    {              
-      for(i = tr->mxtips + 1; (i <= (tr->mxtips + tr->mxtips - 1)) && (found == 0); i++)
-	{
-	  if (p == np[i] || p == np[i]->next || p == np[i]->next->next)
-	    {
-	      if(p == np[i])			       
-		tr->nodep[*count + tr->mxtips + 1] = np[i];		 		
-	      else
-		{
-		  if(p == np[i]->next)		  
-		    tr->nodep[*count + tr->mxtips + 1] = np[i]->next;		     	   
-		  else		   
-		    tr->nodep[*count + tr->mxtips + 1] = np[i]->next->next;		    		    
-		}
-
-	      found = 1;	      	     
-	      *count = *count + 1;
-	    }
-	} 
-      
-      assert(found != 0);
-     
-      reorderNodes(tr, np, p->next->back, count);     
-      reorderNodes(tr, np, p->next->next->back, count);                
-    }
-}
-
-void nodeRectifier(pllInstance *tr)
-{
-  nodeptr *np = (nodeptr *)rax_malloc(2 * tr->mxtips * sizeof(nodeptr));
-  int i;
-  int count = 0;
-  
-  tr->start       = tr->nodep[1];
-  tr->rooted      = PLL_FALSE;
-
-  /* TODO why is tr->rooted set to PLL_FALSE here ?*/
-  
-  for(i = tr->mxtips + 1; i <= (tr->mxtips + tr->mxtips - 1); i++)
-    np[i] = tr->nodep[i];           
-  
-  reorderNodes(tr, np, tr->start->back, &count); 
-
- 
-  rax_free(np);
-}
-
-nodeptr findAnyTip(nodeptr p, int numsp)
-{ 
-  return  isTip(p->number, numsp) ? p : findAnyTip(p->next->back, numsp);
-} 
-
diff --git a/pllrepo/src/treeIO.c b/pllrepo/src/treeIO.c
deleted file mode 100644
index 0a63b40..0000000
--- a/pllrepo/src/treeIO.c
+++ /dev/null
@@ -1,236 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file treeIO.c
- */
-#include "mem_alloc.h"
-
-#include "mem_alloc.h"
-
-#ifndef WIN32
-#include <sys/times.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <unistd.h> 
-#endif
-
-#include <math.h>
-#include <time.h> 
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <assert.h>
-
-#include "pll.h"
-#include "pllInternal.h"
-
-extern char *likelihood_key;
-extern char *ntaxa_key;
-extern char *smoothed_key;
-extern int partCount;
-
-int countTips(nodeptr p, int numsp)
-{
-  if(isTip(p->number, numsp))  
-    return 1;    
-  {
-    nodeptr q;
-    int tips = 0;
-
-    q = p->next;
-    while(q != p)
-      { 
-	tips += countTips(q->back, numsp);
-	q = q->next;
-      } 
-    
-    return tips;
-  }
-}
-
-
-static double getBranchLength(pllInstance *tr, partitionList *pr, int perGene, nodeptr p)
-{
-  double 
-    z = 0.0,
-    x = 0.0;
-  int numBranches = pr->perGeneBranchLengths?pr->numberOfPartitions:1;
-
-  assert(perGene != PLL_NO_BRANCHES);
-	      
-  if(numBranches == 1)
-    {
-      assert(tr->fracchange != -1.0);
-      z = p->z[0];
-      if (z < PLL_ZMIN) 
-	z = PLL_ZMIN;      	 
-      
-      x = -log(z) * tr->fracchange;           
-    }
-  else
-    {
-      if(perGene == PLL_SUMMARIZE_LH)
-	{
-	  int 
-	    i;
-	  
-	  double 
-	    avgX = 0.0;
-		      
-	  for(i = 0; i < numBranches; i++)
-	    {
-	      assert(pr->partitionData[i]->partitionContribution != -1.0);
-	      assert(pr->partitionData[i]->fracchange != -1.0);
-	      z = p->z[i];
-	      if(z < PLL_ZMIN) 
-		z = PLL_ZMIN;      	 
-	      x = -log(z) * pr->partitionData[i]->fracchange;
-	      avgX += x * pr->partitionData[i]->partitionContribution;
-	    }
-
-	  x = avgX;
-	}
-      else
-	{	
-	  assert(pr->partitionData[perGene]->fracchange != -1.0);
-	  assert(perGene >= 0 && perGene < numBranches);
-	  
-	  z = p->z[perGene];
-	  
-	  if(z < PLL_ZMIN) 
-	    z = PLL_ZMIN;      	 
-	  
-	  x = -log(z) * pr->partitionData[perGene]->fracchange;
-	}
-    }
-
-  return x;
-}
-
-static char *pllTreeToNewickREC(char *treestr, pllInstance *tr, partitionList *pr, nodeptr p, pllBoolean printBranchLengths, pllBoolean printNames,
-			    pllBoolean printLikelihood, pllBoolean rellTree, pllBoolean finalPrint, int perGene, pllBoolean branchLabelSupport, pllBoolean printSHSupport)
-{
-  char  *nameptr;            
-      
-  if(isTip(p->number, tr->mxtips)) 
-    {	       	  
-      if(printNames)
-	{
-	  nameptr = tr->nameList[p->number];     
-	  sprintf(treestr, "%s", nameptr);
-	}
-      else
-	sprintf(treestr, "%d", p->number);    
-	
-      while (*treestr) treestr++;
-    }
-  else 
-    {                 	 
-      *treestr++ = '(';
-      treestr = pllTreeToNewickREC(treestr, tr, pr, p->next->back, printBranchLengths, printNames, printLikelihood, rellTree,
-			       finalPrint, perGene, branchLabelSupport, printSHSupport);
-      *treestr++ = ',';
-      treestr = pllTreeToNewickREC(treestr, tr, pr, p->next->next->back, printBranchLengths, printNames, printLikelihood, rellTree,
-			       finalPrint, perGene, branchLabelSupport, printSHSupport);
-      if(p == tr->start->back) 
-	{
-	  *treestr++ = ',';
-	  treestr = pllTreeToNewickREC(treestr, tr, pr, p->back, printBranchLengths, printNames, printLikelihood, rellTree,
-				   finalPrint, perGene, branchLabelSupport, printSHSupport);
-	}
-      *treestr++ = ')';                    
-    }
-
-  if(p == tr->start->back) 
-    {	      	 
-      if(printBranchLengths && !rellTree)
-	sprintf(treestr, ":0.0;\n");
-      else
-	sprintf(treestr, ";\n");	 	  	
-    }
-  else 
-    {                   
-      if(rellTree || branchLabelSupport || printSHSupport)
-	{	 	 
-	  if(( !isTip(p->number, tr->mxtips)) && 
-	     ( !isTip(p->back->number, tr->mxtips)))
-	    {			      
-	      assert(p->bInf != (branchInfo *)NULL);
-	      
-	      if(rellTree)
-		sprintf(treestr, "%d:%8.20f", p->bInf->support, p->z[0]);
-	      if(branchLabelSupport)
-		sprintf(treestr, ":%8.20f[%d]", p->z[0], p->bInf->support);
-	      if(printSHSupport)
-		sprintf(treestr, ":%8.20f[%d]", getBranchLength(tr, pr, perGene, p), p->bInf->support);
-	      
-	    }
-	  else		
-	    {
-	      if(rellTree || branchLabelSupport)
-		sprintf(treestr, ":%8.20f", p->z[0]);	
-	      if(printSHSupport)
-		sprintf(treestr, ":%8.20f", getBranchLength(tr, pr, perGene, p));
-	    }
-	}
-      else
-	{
-	  if(printBranchLengths)	    
-	    sprintf(treestr, ":%8.20f", getBranchLength(tr, pr, perGene, p));
-	  else	    
-	    sprintf(treestr, "%s", "\0");	    
-	}      
-    }
-  
-  while (*treestr) treestr++;
-  return  treestr;
-}
-
-
-char *pllTreeToNewick(char *treestr, pllInstance *tr, partitionList *pr, nodeptr p, pllBoolean printBranchLengths, pllBoolean printNames, pllBoolean printLikelihood,
-		  pllBoolean rellTree, pllBoolean finalPrint, int perGene, pllBoolean branchLabelSupport, pllBoolean printSHSupport)
-{ 
-
-  if(rellTree)
-    assert(!branchLabelSupport && !printSHSupport);
-
-  if(branchLabelSupport)
-    assert(!rellTree && !printSHSupport);
-
-  if(printSHSupport)
-    assert(!branchLabelSupport && !rellTree);
-
- 
-  pllTreeToNewickREC(treestr, tr, pr, p, printBranchLengths, printNames, printLikelihood, rellTree,
-		 finalPrint, perGene, branchLabelSupport, printSHSupport);  
-    
-  
-  while (*treestr) treestr++;
-  
-  return treestr;
-}
-
diff --git a/pllrepo/src/treeIO.h b/pllrepo/src/treeIO.h
deleted file mode 100644
index c91a1ab..0000000
--- a/pllrepo/src/treeIO.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * treeIO.h
- *
- *  Created on: Nov 22, 2012
- *      Author: tung
- */
-
-/*
-I just put some declarations of the functions that I need here.
-Please extend this file. It's important to have a header file.
-It make things much easier for the integration with other software.
-*/
-
-#ifndef TREEIO_H_
-#define TREEIO_H_
-
-#include "pll.h"
-
-char *pllTreeToNewick(char *treestr, tree *tr, nodeptr p, pllBoolean printBranchLengths, pllBoolean printNames, pllBoolean printLikelihood,
-		  pllBoolean rellTree, pllBoolean finalPrint, int perGene, pllBoolean branchLabelSupport, pllBoolean printSHSupport);
-double getBranchLength(pllInstance *tr, partitionList *pr, int perGene, nodeptr p);
-
-#endif /* TREEIO_H_ */
diff --git a/pllrepo/src/utils.c b/pllrepo/src/utils.c
deleted file mode 100644
index e7d0c42..0000000
--- a/pllrepo/src/utils.c
+++ /dev/null
@@ -1,3734 +0,0 @@
-/** 
- * PLL (version 1.0.0) a software library for phylogenetic inference
- * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
- *
- * Derived from 
- * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
- * trees by Alexandros Stamatakis
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- * 
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * For any other enquiries send an Email to Tomas Flouri
- * Tomas.Flouri at h-its.org
- *
- * When publishing work that uses PLL please cite PLL
- * 
- * @file utils.c
- *  
- * @brief Miscellaneous general utility and helper functions
- */
-#ifdef WIN32
-#include <direct.h>
-#endif
-
-#ifndef WIN32
-#include <sys/times.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <unistd.h>
-#endif
-
-#include <math.h>
-#include <time.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <stdarg.h>
-#include <limits.h>
-#include <assert.h>
-#include <errno.h>
-#include "cycle.h"
-
-
-#if ! (defined(__ppc) || defined(__powerpc__) || defined(PPC))
-#if (defined(__AVX) || defined(__SSE3))
-#include <xmmintrin.h>
-#endif
-/*
-   special bug fix, enforces denormalized numbers to be flushed to zero,
-   without this program is a tiny bit faster though.
-#include <emmintrin.h> 
-#define MM_DAZ_MASK    0x0040
-#define MM_DAZ_ON    0x0040
-#define MM_DAZ_OFF    0x0000
-*/
-#endif
-
-#include "pll.h"
-#include "pllInternal.h"
-
-#define GLOBAL_VARIABLES_DEFINITION
-
-#include "globalVariables.h"
-
-/* mappings of BIN/DNA/AA alphabet to numbers */
-
-static const char PLL_MAP_BIN[256] =
- {
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  3, -1, -1,
-    1,  2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  3,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
-  };
-
-static const char PLL_MAP_NT[256] =
- {
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15,
-   -1,  1, 14,  2, 13, -1, -1,  4, 11, -1, -1, 12, -1,  3, 15, 15,
-   -1, -1,  5,  6,  8,  8,  7,  9, 15, 10, -1, -1, -1, -1, -1, -1,
-   -1,  1, 14,  2, 13, -1, -1,  4, 11, -1, -1, 12, -1,  3, 15, 15,
-   -1, -1,  5,  6,  8,  8,  7,  9, 15, 10, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
- };
-
-static const char PLL_MAP_AA[256] =
- {
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 22, -1, -1, 22, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 22,
-   -1,  0, 20,  4,  3,  6, 13,  7,  8,  9, -1, 11, 10, 12,  2, -1,
-   14,  5,  1, 15, 16, -1, 19, 17, 22, 18, 21, -1, -1, -1, -1, -1,
-   -1,  0, 20,  4,  3,  6, 13,  7,  8,  9, -1, 11, 10, 12,  2, -1,
-   14,  5,  1, 15, 16, -1, 19, 17, 22, 18, 21, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
- };
-
-
-
-
-
-static void pllTreeInitDefaults (pllInstance * tr, int tips);
-static void getInnerBranchEndPointsRecursive (nodeptr p, int tips, int * i, node **nodes);
-#if (!defined(_FINE_GRAIN_MPI) && !defined(_USE_PTHREADS))
-static void initializePartitionsSequential(pllInstance *tr, partitionList *pr);
-#endif
-
-/** @defgroup instanceLinkingGroup Linking topology, partition scheme and alignment to the PLL instance
-    
-    This set of functions handles the linking of topology, partition scheme and multiple sequence alignment
-    with the PLL instance
-*/
-/***************** UTILITY FUNCTIONS **************************/
-
-#if (!defined(_SVID_SOURCE) && !defined(_BSD_SOURCE) && !defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE) && !defined(_POSIX_SOURCE))
-static char *
-my_strtok_r (char * s, const char * delim, char **save_ptr)
-{  
-  char *token;
-   
-  /* Scan leading delimiters */
-  if (s == NULL)
-    s = *save_ptr;
-   
-  s += strspn (s, delim);
-  if (*s == '\0')
-   {
-     *save_ptr = s;
-     return NULL;
-   }
-   
-  /* Find the end of the token. */
-  token = s;
-  s = strpbrk (token, delim);
-  if (!s)
-    *save_ptr = strchr (token, '\0');
-  else
-   {
-     /* Terminate the token and make *SAVE_PTR point past it */
-     *s = '\0';
-     *save_ptr = s + 1;
-   }
-   
-  return token;
-}
-#endif
-
-#if (defined(_SVID_SOURCE) || defined(_BSD_SOURCE) || defined(_POSIX_C_SOURCE) || defined(_XOPEN_SOURCE) || defined(_POSIX_SOURCE))
-#define STRTOK_R strtok_r
-#else
-#define STRTOK_R my_strtok_r
-#endif
-
-
-
-
-void storeExecuteMaskInTraversalDescriptor(pllInstance *tr, partitionList *pr)
-{
-  int model;
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-    tr->td[0].executeModel[model] = pr->partitionData[model]->executeModel;
-
-}
-
-void storeValuesInTraversalDescriptor(pllInstance *tr, partitionList *pr, double *value)
-{
-  int model;
-
-  for(model = 0; model < pr->numberOfPartitions; model++)
-    tr->td[0].parameterValues[model] = value[model];
-}
-
-const unsigned int *getBitVector(int dataType)
-{
-  assert(PLL_MIN_MODEL < dataType && dataType < PLL_MAX_MODEL);
-
-  return pLengths[dataType].bitVector;
-}
-
-/*
-int getStates(int dataType)
-{
-  assert(PLL_MIN_MODEL < dataType && dataType < PLL_MAX_MODEL);
-
-  return pLengths[dataType].states;
-}
-*/
-
-int getUndetermined(int dataType)
-{
-  assert(PLL_MIN_MODEL < dataType && dataType < PLL_MAX_MODEL);
-
-  return pLengths[dataType].undetermined;
-}
-
-const partitionLengths *getPartitionLengths(pInfo *p)
-{
-  int 
-    dataType  = p->dataType,
-              states    = p->states,
-              tipLength = p->maxTipStates;
-
-  assert(states != -1 && tipLength != -1);
-
-  assert(PLL_MIN_MODEL < dataType && dataType < PLL_MAX_MODEL);
-
-  /*pLength.leftLength = pLength.rightLength = states * states;
-    pLength.eignLength = states;
-    pLength.evLength   = states * states;
-    pLength.eiLength   = states * states;
-    pLength.substRatesLength = (states * states - states) / 2;
-    pLength.frequenciesLength = states;
-    pLength.tipVectorLength   = tipLength * states;
-    pLength.symmetryVectorLength = (states * states - states) / 2;
-    pLength.frequencyGroupingLength = states;
-    pLength.nonGTR = PLL_FALSE;*/
-  return (&pLengths[dataType]); 
-}
-
-size_t discreteRateCategories(int rateHetModel)
-{
-  size_t 
-    result;
-
-  switch(rateHetModel)
-  {
-    case PLL_CAT:
-      result = 1;
-      break;
-    case PLL_GAMMA:
-      result = 4;
-      break;
-    default:
-      assert(0);
-  }
-
-  return result;
-}
-
-
-
-double gettime(void)
-{
-#ifdef WIN32
-  time_t tp;
-  struct tm localtm;
-  tp = time(NULL);
-  localtm = *localtime(&tp);
-  return 60.0*localtm.tm_min + localtm.tm_sec;
-#else
-  struct timeval ttime;
-  gettimeofday(&ttime , NULL);
-  return ttime.tv_sec + ttime.tv_usec * 0.000001;
-#endif
-}
-
-int gettimeSrand(void)
-{
-#ifdef WIN32
-  time_t tp;
-  struct tm localtm;
-  tp = time(NULL);
-  localtm = *localtime(&tp);
-  return 24*60*60*localtm.tm_yday + 60*60*localtm.tm_hour + 60*localtm.tm_min  + localtm.tm_sec;
-#else
-  struct timeval ttime;
-  gettimeofday(&ttime , NULL);
-  return ttime.tv_sec + ttime.tv_usec;
-#endif
-}
-
-double randum (long  *seed)
-{
-  long  sum, mult0, mult1, seed0, seed1, seed2, newseed0, newseed1, newseed2;
-  double res;
-
-  mult0 = 1549;
-  seed0 = *seed & 4095;
-  sum  = mult0 * seed0;
-  newseed0 = sum & 4095;
-  sum >>= 12;
-  seed1 = (*seed >> 12) & 4095;
-  mult1 =  406;
-  sum += mult0 * seed1 + mult1 * seed0;
-  newseed1 = sum & 4095;
-  sum >>= 12;
-  seed2 = (*seed >> 24) & 255;
-  sum += mult0 * seed2 + mult1 * seed1;
-  newseed2 = sum & 255;
-
-  *seed = newseed2 << 24 | newseed1 << 12 | newseed0;
-  res = 0.00390625 * (newseed2 + 0.000244140625 * (newseed1 + 0.000244140625 * newseed0));
-
-  return res;
-}
-
-
-/********************* END UTILITY FUNCTIONS ********************/
-
-
-/******************************some functions for the likelihood computation ****************************/
-
-
-/** @brief Check whether a node is a tip.
-    
-    Checks whether the node with number \a number is a tip.
-    
-    @param number
-     Node number to be checked
-   
-    @param maxTips
-     Number of tips in the tree
-   
-    @return
-      \b PLL_TRUE if tip, \b PLL_FALSE otherwise
-  */
-pllBoolean isTip(int number, int maxTips)
-{
-  assert(number > 0);
-
-  if(number <= maxTips)
-    return PLL_TRUE;
-  else
-    return PLL_FALSE;
-}
-
-/** @brief Set the orientation of a node
-
-    Sets the orientation of node \a p. That means, it will reset the orientation
-    \a p->next->x and \a p->next->next->x to 0 and of \a p->x to 1, meaning that
-    the conditional likelihood vector for that node is oriented on \a p, i.e.
-    the conditional likelihood vector represents the subtree rooted at \a p and
-    not any other of the two nodes.
-
-    @param p
-      Node which we want to orient
-*/
-void getxnode (nodeptr p)
-{
-  nodeptr  s;
-
-  if ((s = p->next)->x || (s = s->next)->x)
-  {
-    p->x = s->x;
-    s->x = 0;
-  }
-
-  assert(p->x);
-}
-
-
-/** @brief Connect two nodes and assign branch lengths 
-  * 
-  * Connect the two nodes \a p and \a q in each partition \e i with a branch of
-  * length \a z[i]
-  *
-  * @param p
-  *   Node \a p
-  * 
-  * @param q
-  *   Node \a q
-  *
-  * @param numBranches
-  *   Number of partitions
-  */
-void hookup (nodeptr p, nodeptr q, double *z, int numBranches)
-{
-  int i;
-
-  p->back = q;
-  q->back = p;
-
-  for(i = 0; i < numBranches; i++)
-    p->z[i] = q->z[i] = z[i];
-}
-
-/* connects node p with q and assigns the branch lengths z for the whole vector*/
-void hookupFull (nodeptr p, nodeptr q, double *z)
-{
-  //int i;
-
-  p->back = q;
-  q->back = p;
-
-  memcpy(p->z, z, PLL_NUM_BRANCHES*sizeof(double) );
-  memcpy(q->z, z, PLL_NUM_BRANCHES*sizeof(double) );
-  //for(i = 0; i < numBranches; i++)
-  //  p->z[i] = q->z[i] = z[i];
-
-}
-
-/* connect node p with q and assign the default branch lengths */
-void hookupDefault (nodeptr p, nodeptr q)
-{
-  int i;
-
-  p->back = q;
-  q->back = p;
-
-  for(i = 0; i < PLL_NUM_BRANCHES; i++)
-    p->z[i] = q->z[i] = PLL_DEFAULTZ;
-
-}
-
-
-/***********************reading and initializing input ******************/
-
-
-
-pllBoolean whitechar (int ch)
-{
-  return (ch == ' ' || ch == '\n' || ch == '\t' || ch == '\r');
-}
-/*
-static unsigned int KISS32(void)
-{
-  static unsigned int 
-    x = 123456789, 
-      y = 362436069,
-      z = 21288629,
-      w = 14921776,
-      c = 0;
-
-  unsigned int t;
-
-  x += 545925293;
-  y ^= (y<<13); 
-  y ^= (y>>17); 
-  y ^= (y<<5);
-  t = z + w + c; 
-  z = w; 
-  c = (t>>31); 
-  w = t & 2147483647;
-
-  return (x+y+w);
-}
-*/
-
-/** @brief Get a random subtree
-
-    Returns the root node of a randomly picked subtree of the tree in PLL
-    instance \a tr. The picked subtree is guaranteed to have height over
-    1, that is, the direct descendents of the returned (root) node are not tips.
-
-    @param tr
-      PLL instance
-
-    @return
-      The root node of the randomly picked subtree
-*/
-nodeptr pllGetRandomSubtree(pllInstance *tr)
-{
-  nodeptr p;
-  do
-  {
-    int exitDirection = rand() % 3; 
-    p = tr->nodep[(rand() % (tr->mxtips - 2)) + 1 + tr->mxtips];
-    switch(exitDirection)
-    {
-      case 0:
-        break;
-      case 1:
-        p = p->next;
-        break;
-      case 2:
-        p = p->next->next;
-        break;
-      default:
-        assert(0);
-    }
-  }
-  while(isTip(p->next->back->number, tr->mxtips) && isTip(p->next->next->back->number, tr->mxtips));
-  assert(!isTip(p->number, tr->mxtips));
-  return p;
-}
-/* small example program that executes ancestral state computations 
-   on the entire subtree rooted at p.
-
-   Note that this is a post-order traversal.
-*/
-
-  
-void computeAllAncestralVectors(nodeptr p, pllInstance *tr, partitionList *pr)
-{
-  /* if this is not a tip, for which evidently it does not make sense 
-     to compute the ancestral sequence because we have the real one ....
-  */
-
-  if(!isTip(p->number, tr->mxtips))
-    {
-      /* descend recursively to compute the ancestral states in the left and right subtrees */
-
-      computeAllAncestralVectors(p->next->back, tr, pr);
-      computeAllAncestralVectors(p->next->next->back, tr, pr);
-      
-      /* then compute the ancestral state at node p */
-
-      pllUpdatePartialsAncestral(tr, pr, p);
-
-      /* and print it to terminal, the two booleans that are set to PLL_TRUE here 
-         tell the function to print the marginal probabilities as well as 
-         a discrete inner sequence, that is, ACGT etc., always selecting and printing 
-         the state that has the highest probability */
-
-      printAncestralState(p, PLL_TRUE, PLL_TRUE, tr, pr);
-    }
-}
-
-
-
-void initializePartitionData(pllInstance *localTree, partitionList * localPartitions)
-{
-  /* in ancestralVectorWidth we store the total length in bytes (!) of 
-     one conditional likelihood array !
-     we need to know this length such that in the pthreads version the master thread can actually 
-     gather the scattered ancestral probabilities from the threads such that they can be printed to screen!
-  */
-
-  size_t 
-    maxCategories = (size_t)localTree->maxCategories;
-
-  size_t 
-    ancestralVectorWidth = 0,
-    model; 
-
-  int 
-    tid  = localTree->threadID,
-    innerNodes = localTree->mxtips - 2;
-
-  if(tid > 0)
-      localTree->rateCategory    = (int *)    rax_calloc((size_t)localTree->originalCrunchedLength, sizeof(int));           
-
-  for(model = 0; model < (size_t)localPartitions->numberOfPartitions; model++)
-    {
-      size_t 
-        width = localPartitions->partitionData[model]->width;
-
-      const partitionLengths 
-        *pl = getPartitionLengths(localPartitions->partitionData[model]);
-
-      /* 
-         globalScaler needs to be 2 * localTree->mxtips such that scalers of inner AND tip nodes can be added without a case switch
-         to this end, it must also be initialized with zeros -> calloc
-      */
-
-      localPartitions->partitionData[model]->globalScaler    = (unsigned int *)rax_calloc(2 *(size_t)localTree->mxtips, sizeof(unsigned int));
-
-      rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->left),  PLL_BYTE_ALIGNMENT, (size_t)pl->leftLength * (maxCategories + 1) * sizeof(double));
-      rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->right), PLL_BYTE_ALIGNMENT, (size_t)pl->rightLength * (maxCategories + 1) * sizeof(double));
-      localPartitions->partitionData[model]->EIGN              = (double*)rax_malloc((size_t)pl->eignLength * sizeof(double));
-      rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->EV),    PLL_BYTE_ALIGNMENT, (size_t)pl->evLength * sizeof(double));
-      localPartitions->partitionData[model]->EI                = (double*)rax_malloc((size_t)pl->eiLength * sizeof(double));
-      localPartitions->partitionData[model]->substRates        = (double *)rax_malloc((size_t)pl->substRatesLength * sizeof(double));
-      localPartitions->partitionData[model]->frequencies       = (double*)rax_malloc((size_t)pl->frequenciesLength * sizeof(double));
-      localPartitions->partitionData[model]->freqExponents     = (double*)rax_malloc(pl->frequenciesLength * sizeof(double));
-      localPartitions->partitionData[model]->empiricalFrequencies       = (double*)rax_malloc((size_t)pl->frequenciesLength * sizeof(double));
-      rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->tipVector), PLL_BYTE_ALIGNMENT, (size_t)pl->tipVectorLength * sizeof(double));
-      //localPartitions->partitionData[model]->partitionName      = NULL;   // very imporatant since it is deallocated in pllPartitionDestroy
-      
-       if(localPartitions->partitionData[model]->dataType == PLL_AA_DATA
-               && (localPartitions->partitionData[model]->protModels == PLL_LG4M || localPartitions->partitionData[model]->protModels == PLL_LG4X))
-        {
-          int 
-            k;
-          
-          for(k = 0; k < 4; k++)
-            {       
-              localPartitions->partitionData[model]->EIGN_LG4[k]              = (double*)rax_malloc(pl->eignLength * sizeof(double));
-              rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->EV_LG4[k]), PLL_BYTE_ALIGNMENT, pl->evLength * sizeof(double));
-              localPartitions->partitionData[model]->EI_LG4[k]                = (double*)rax_malloc(pl->eiLength * sizeof(double));
-              localPartitions->partitionData[model]->substRates_LG4[k]        = (double *)rax_malloc(pl->substRatesLength * sizeof(double));
-              localPartitions->partitionData[model]->frequencies_LG4[k]       = (double*)rax_malloc(pl->frequenciesLength * sizeof(double));
-              rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->tipVector_LG4[k]), PLL_BYTE_ALIGNMENT, pl->tipVectorLength * sizeof(double));
-            }
-        }
-
-      localPartitions->partitionData[model]->symmetryVector    = (int *)rax_malloc((size_t)pl->symmetryVectorLength  * sizeof(int));
-      localPartitions->partitionData[model]->frequencyGrouping = (int *)rax_malloc((size_t)pl->frequencyGroupingLength  * sizeof(int));
-
-      localPartitions->partitionData[model]->perSiteRates      = (double *)rax_malloc(sizeof(double) * maxCategories);
-
-      localPartitions->partitionData[model]->nonGTR = PLL_FALSE;
-
-      localPartitions->partitionData[model]->gammaRates = (double*)rax_malloc(sizeof(double) * 4);
-      localPartitions->partitionData[model]->yVector = (unsigned char **)rax_malloc(sizeof(unsigned char*) * ((size_t)localTree->mxtips + 1));
-
-
-      localPartitions->partitionData[model]->xVector = (double **)rax_calloc(sizeof(double*), (size_t)localTree->mxtips);
-
-      if (localPartitions->partitionData[model]->ascBias)
-       {
-         localPartitions->partitionData[model]->ascOffset    = 4 * localPartitions->partitionData[model]->states * localPartitions->partitionData[model]->states;
-         localPartitions->partitionData[model]->ascVector    = (double *)rax_malloc(innerNodes * 
-                                                                                    localPartitions->partitionData[model]->ascOffset * 
-                                                                                    sizeof(double));
-         localPartitions->partitionData[model]->ascExpVector = (int *)rax_calloc(innerNodes *
-                                                                                 localPartitions->partitionData[model]->states,
-                                                                                 sizeof(int));
-         localPartitions->partitionData[model]->ascSumBuffer = (double *)rax_malloc(localPartitions->partitionData[model]->ascOffset * sizeof(double)); 
-       }
-
-
-      /* 
-         Initializing the xVector array like this is absolutely required !!!!
-         I don't know which programming genious removed this, but it must absolutely stay in here!!!!
-      */
-      
-      {
-        int k;
-        
-        for(k = 0; k < localTree->mxtips; k++)
-              localPartitions->partitionData[model]->xVector[k] = (double*)NULL;       
-      }
-
-
-      localPartitions->partitionData[model]->xSpaceVector = (size_t *)rax_calloc((size_t)localTree->mxtips, sizeof(size_t));
-
-      const size_t span = (size_t)(localPartitions->partitionData[model]->states) *
-              discreteRateCategories(localTree->rateHetModel);
-
-#ifdef __MIC_NATIVE
-
-      // Alexey: sum buffer buffer padding for Xeon PHI
-      const int aligned_width = width % PLL_VECTOR_WIDTH == 0 ? width : width + (PLL_VECTOR_WIDTH - (width % PLL_VECTOR_WIDTH));
-
-      rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->sumBuffer), PLL_BYTE_ALIGNMENT, aligned_width *
-                                                                                      span *
-                                                                                      sizeof(double));
-
-      // Alexey: fill padding entries with 1. (will be corrected with site weights, s. below)
-      {
-          int k;
-          for (k = width*span; k < aligned_width*span; ++k)
-              localPartitions->partitionData[model]->sumBuffer[k] = 1.;
-      }
-
-#else
-
-      rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->sumBuffer), PLL_BYTE_ALIGNMENT, width *
-                                              span *
-                                              sizeof(double));
-#endif
-
-      /* Initialize buffers to store per-site log likelihoods */
-
-      rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->perSiteLikelihoods), PLL_BYTE_ALIGNMENT, width * sizeof(double));
-
-      /* initialize data structures for per-site likelihood scaling */
-
-      if(localTree->fastScaling)
-        {
-           localPartitions->partitionData[model]->expVector      = (int **)NULL;
-           localPartitions->partitionData[model]->expSpaceVector = (size_t *)NULL;
-        }
-      else
-        {        
-          localPartitions->partitionData[model]->expVector      = (int **)rax_malloc(sizeof(int*) * innerNodes);
-           
-          /* 
-             Initializing the expVector array like this is absolutely required !!!!
-             Not doing this can (and did) cause segmentation faults !!!!
-          */
-          
-          {
-            int k;
-
-            for(k = 0; k < innerNodes; k++)
-              localPartitions->partitionData[model]->expVector[k] = (int*)NULL; 
-          }
-
-          localPartitions->partitionData[model]->expSpaceVector = (size_t *)rax_calloc(innerNodes, sizeof(size_t));
-        }
-
-      /* data structure to store the marginal ancestral probabilities in the sequential version or for each thread */
-
-      rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->ancestralBuffer), PLL_BYTE_ALIGNMENT, width *
-                                                                                 (size_t)(localPartitions->partitionData[model]->states) *
-                                                                                 sizeof(double));
-
-      /* count and accumulate how many bytes we will need for storing a full ancestral vector. for this we addf over the per-partition space requirements in bytes */
-      /* ancestralVectorWidth += ((size_t)(pr->partitionData[model]->upper - pr->partitionData[model]->lower) * (size_t)(localPartitions->partitionData[model]->states) * sizeof(double)); */
-      ancestralVectorWidth += ((size_t)(localPartitions->partitionData[model]->upper - localPartitions->partitionData[model]->lower) * (size_t)(localPartitions->partitionData[model]->states) * sizeof(double));
-      /* :TODO: do we have to use the original tree for that   */
-
-#ifdef __MIC_NATIVE
-
-      rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->wgt), PLL_BYTE_ALIGNMENT, aligned_width * sizeof(int));
-
-      // Alexey: fill padding entries with 0.
-      {
-          int k;
-          for (k = width; k < aligned_width; ++k)
-              localPartitions->partitionData[model]->wgt[k] = 0;
-      }
-#else
-      rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->wgt), PLL_BYTE_ALIGNMENT, width * sizeof(int));
-#endif
-
-      /* rateCategory must be assigned using rax_calloc() at start up there is only one rate category 0 for all sites */
-
-      localPartitions->partitionData[model]->rateCategory = (int *)rax_calloc(width, sizeof(int));
-
-      if(width > 0 && localTree->saveMemory)
-        {
-          localPartitions->partitionData[model]->gapVectorLength = ((int)width / 32) + 1;
-          assert(4 == sizeof(unsigned int));
-          localPartitions->partitionData[model]->gapVector = (unsigned int*)rax_calloc((size_t)localPartitions->partitionData[model]->gapVectorLength * 2 * (size_t)localTree->mxtips, sizeof(unsigned int));
-          rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->gapColumn),PLL_BYTE_ALIGNMENT, ((size_t)localTree->mxtips) *
-                                                                               ((size_t)(localPartitions->partitionData[model]->states)) *
-                                                                               discreteRateCategories(localTree->rateHetModel) * sizeof(double));
-        }
-      else
-        {
-          localPartitions->partitionData[model]->gapVectorLength = 0;
-          localPartitions->partitionData[model]->gapVector = (unsigned int*)NULL;
-          localPartitions->partitionData[model]->gapColumn = (double*)NULL;
-        }              
-    }
-}
-
-int virtual_width( int n ) {
-    const int global_vw = 2;
-    return (n+1) / global_vw * global_vw;
-}
-
-
-void initMemorySavingAndRecom(pllInstance *tr, partitionList *pr)
-{
-  pllInstance  
-    *localTree = tr; 
-  partitionList
-    *localPartitions = pr;
-  size_t model; 
-
-  /* initialize gap bit vectors at tips when memory saving option is enabled */
-
-  if(localTree->saveMemory)
-    {
-      for(model = 0; model < (size_t)localPartitions->numberOfPartitions; model++)
-        {
-          int        
-            undetermined = getUndetermined(localPartitions->partitionData[model]->dataType);
-
-          size_t
-            i,
-            j,
-            width =  localPartitions->partitionData[model]->width;
-
-          if(width > 0)
-            {                                        
-              for(j = 1; j <= (size_t)(localTree->mxtips); j++)
-                for(i = 0; i < width; i++)
-                  if(localPartitions->partitionData[model]->yVector[j][i] == undetermined)
-                    localPartitions->partitionData[model]->gapVector[localPartitions->partitionData[model]->gapVectorLength * j + i / 32] |= mask32[i % 32];
-            }     
-        }
-    }
-  /* recom */
-  if(localTree->useRecom)
-    allocRecompVectorsInfo(localTree);
-  else
-    localTree->rvec = (recompVectors*)NULL;
-  /* E recom */
-}
-
-/** @brief Get the length of a specific branch
-
-    Get the length of the branch specified by node \a p and \a p->back
-    of partition \a partition_id.
-    The branch length is decoded from the PLL representation.
-
-    @param tr
-      PLL instance
-
-    @param p
-      Specifies one end-point of the branch. The other one is \a p->back
-
-    @param partition_id
-      Specifies the partition
-
-    @return
-      The branch length
-*/
-double pllGetBranchLength (pllInstance *tr, nodeptr p, int partition_id)
-{
-  //assert(partition_id < tr->numBranches);
-  assert(partition_id < PLL_NUM_BRANCHES);
-  assert(partition_id >= 0);
-  assert(tr->fracchange != -1.0);
-  double z = p->z[partition_id];
-  if(z < PLL_ZMIN) z = PLL_ZMIN;
-  if(z > PLL_ZMAX) z = PLL_ZMAX;
-  return (-log(z) * tr->fracchange);
-}
-
-/** @brief Set the length of a specific branch
-
-    Set the length of the branch specified by node \a p and \a p->back
-    of partition \a partition_id.
-    The function encodes the branch length to the PLL representation.
-
-    @param tr
-      PLL instance
-
-    @param p
-      Specifies one end-point of the branch. The other one is \a p->back
-
-    @param partition_id
-      Specifies the partition
-
-    @param bl
-      Branch length
-*/
-void pllSetBranchLength (pllInstance *tr, nodeptr p, int partition_id, double bl)
-{
-  //assert(partition_id < tr->numBranches);
-  assert(partition_id < PLL_NUM_BRANCHES);
-  assert(partition_id >= 0);
-  assert(tr->fracchange != -1.0);
-  double z;
-  z = exp((-1 * bl)/tr->fracchange);
-  if(z < PLL_ZMIN) z = PLL_ZMIN;
-  if(z > PLL_ZMAX) z = PLL_ZMAX;
-  p->z[partition_id] = z;
-}
-
-#if (!defined(_FINE_GRAIN_MPI) && !defined(_USE_PTHREADS))
-static void initializePartitionsSequential(pllInstance *tr, partitionList *pr)
-{ 
-  size_t
-    model;
-
-  for(model = 0; model < (size_t)pr->numberOfPartitions; model++)
-    assert(pr->partitionData[model]->width == pr->partitionData[model]->upper - pr->partitionData[model]->lower);
-
-  initializePartitionData(tr, pr);
-
-  /* figure in tip sequence data per-site pattern weights */ 
-  for(model = 0; model < (size_t)pr->numberOfPartitions; model++)
-  {
-    size_t
-      j;
-    size_t lower = pr->partitionData[model]->lower;
-    size_t width = pr->partitionData[model]->upper - lower;
-
-    for(j = 1; j <= (size_t)tr->mxtips; j++)
-    {
-      pr->partitionData[model]->yVector[j] = &(tr->yVector[j][pr->partitionData[model]->lower]);
-    }
-
-    memcpy((void*)(&(pr->partitionData[model]->wgt[0])),         (void*)(&(tr->aliaswgt[lower])),      sizeof(int) * width);
-  }  
-
-  initMemorySavingAndRecom(tr, pr);
-}
-#endif
-
-
-/* interface to outside  */
-//void initializePartitions(pllInstance *tr, pllInstance *localTree, partitionList *pr, partitionList *localPr, int tid, int n)
-//{
-//#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-//  initializePartitionsMaster(tr,localTree,pr,localPr,tid,n);
-//#else
-//  initializePartitionsSequential(tr, pr);
-//#endif
-//}
-
-static void freeLinkageList( linkageList* ll)
-{
-  int i;    
-
-  for(i = 0; i < ll->entries; i++)    
-    rax_free(ll->ld[i].partitionList);         
-
-  rax_free(ll->ld);
-  rax_free(ll);   
-}
-
-/** @brief free all data structures associated to a partition
-    
-    frees all data structures allocated for this partition
-
-    @param partitions
-      the pointer to the partition list
-
-    @param tips  
-       number of tips in the tree      
-*/
-void 
-pllPartitionsDestroy (pllInstance * tr, partitionList ** partitions)
-{
-  int i, j, tips;
-  partitionList * pl = *partitions;
-
-#ifdef _USE_PTHREADS
-  int tid = tr->threadID;
-  if (MASTER_P) {
-     pllMasterBarrier (tr, pl, PLL_THREAD_EXIT_GRACEFULLY);
-     pllStopPthreads (tr);
-    }
-#endif
-
-  tips = tr->mxtips;
-
-#ifdef _USE_PTHREADS
-  if (MASTER_P) {
-#endif
-#ifdef _FINE_GRAIN_MPI
-if (MASTER_P) {
-     pllMasterBarrier (tr, pl, PLL_THREAD_EXIT_GRACEFULLY);
-#endif
-  freeLinkageList(pl->alphaList);
-  freeLinkageList(pl->freqList); 
-  freeLinkageList(pl->rateList);
-#ifdef _FINE_GRAIN_MPI
-}
-#endif
-
-#ifdef _USE_PTHREADS
-  }
-#endif
-  for (i = 0; i < pl->numberOfPartitions; ++ i)
-   {
-     rax_free (pl->partitionData[i]->gammaRates);
-     rax_free (pl->partitionData[i]->perSiteRates);
-     rax_free (pl->partitionData[i]->globalScaler);
-     rax_free (pl->partitionData[i]->left);
-     rax_free (pl->partitionData[i]->right);
-     rax_free (pl->partitionData[i]->EIGN);
-     rax_free (pl->partitionData[i]->EV);
-     rax_free (pl->partitionData[i]->EI);
-     rax_free (pl->partitionData[i]->substRates);
-     rax_free (pl->partitionData[i]->frequencies);
-     rax_free (pl->partitionData[i]->freqExponents);
-     rax_free (pl->partitionData[i]->empiricalFrequencies);
-     rax_free (pl->partitionData[i]->tipVector);
-     rax_free (pl->partitionData[i]->symmetryVector);
-     rax_free (pl->partitionData[i]->frequencyGrouping);
-     for (j = 0; j < tips; ++ j)
-       rax_free (pl->partitionData[i]->xVector[j]);
-     rax_free (pl->partitionData[i]->xVector);
-     rax_free (pl->partitionData[i]->yVector);
-     rax_free (pl->partitionData[i]->xSpaceVector);
-     rax_free (pl->partitionData[i]->sumBuffer);
-     rax_free (pl->partitionData[i]->ancestralBuffer);
-     rax_free (pl->partitionData[i]->wgt);
-     rax_free (pl->partitionData[i]->rateCategory);
-     rax_free (pl->partitionData[i]->gapVector);
-     rax_free (pl->partitionData[i]->gapColumn);
-     rax_free (pl->partitionData[i]->perSiteLikelihoods);
-     rax_free (pl->partitionData[i]->partitionName);
-     rax_free (pl->partitionData[i]->expSpaceVector);
-     /*TODO: Deallocate all entries of expVector */
-     if (pl->partitionData[i]->expVector)
-      {
-        for (j = 0; j < tips - 2; ++ j)
-          rax_free (pl->partitionData[i]->expVector[j]);
-      }
-     rax_free (pl->partitionData[i]->expVector);
-     rax_free (pl->partitionData[i]);
-   }
-  rax_free (pl->partitionData);
-  rax_free (pl);
-
-  *partitions = NULL;
-
-#if (defined(_USE_PTHREADS) || defined(_FINE_GRAIN_MPI))
-     rax_free (tr->y_ptr);
-#endif
-}
-
-/** @ingroup instanceLinkingGroup
-    @brief Correspondance check between partitions and alignment
-
-    This function checks whether the partitions to be created and the given
-    alignment correspond, that is, whether each site of the alignment is
-    assigned to exactly one partition.
-
-    @param parts
-      A list of partitions suggested by the caller
-
-    @param alignmentData
-      The multiple sequence alignment
-    
-    @return
-      Returns \a 1 in case of success, otherwise \a 0
-*/
-int
-pllPartitionsValidate (pllQueue * parts, pllAlignmentData * alignmentData)
-{
-  int nparts;
-  char * used;
-  struct pllQueueItem * elm;
-  struct pllQueueItem * regionItem;
-  pllPartitionRegion * region;
-  pllPartitionInfo * pi;
-  int i;
-
-  /* check if the list contains at least one partition */
-  nparts = pllQueueSize (parts);
-  if (!nparts)          
-    return (0);   
-
-  /* pllBoolean array for marking that a site was assigned a partition */
-  used = (char *) rax_calloc (alignmentData->sequenceLength, sizeof (char));
-
-  /* traverse all partitions and their respective regions and mark sites */
-  for (elm = parts->head; elm; elm = elm->next)
-   {
-     pi = (pllPartitionInfo *) elm->item;
-     
-     for (regionItem = pi->regionList->head; regionItem; regionItem = regionItem->next)
-      {
-        region = (pllPartitionRegion *) regionItem->item;
-        
-        if (region->start < 1 || region->end > alignmentData->sequenceLength) 
-         {
-           rax_free (used);
-           return (0);
-         }
-
-        for (i = region->start - 1; i < region->end; i += region->stride)
-         {
-           if (used[i])
-            {
-              rax_free (used);
-              return (0);
-            }
-           used[i] = 1; 
-         }
-      }
-   }
-
-  /* check whether all sites were assigned a partition */
-  for (i = 0; i < alignmentData->sequenceLength; ++ i)
-    if (used[i] != 1)
-     {
-       rax_free (used);
-       return (0);
-     }
-
-  rax_free (used);
-  return (1);
-}
-
-/** @brief Swap two sites in a buffer
-    
-    Swaps sites \a s1 and \a s2 in buffer \a buf which consists of \a nTaxa + 1
-    taxa (i.e. rows), and the first row contains no information, i.e. it is not
-    accessed.
-
-    @param buffer
-      Memory buffer
-
-    @param s1
-      First site
-
-    @param s2
-      Second site
-
-    @param nTaxa
-      Number of taxa, i.e. size of site
-*/
-static __inline void
-swapSite (unsigned char ** buf, int s1, int s2, int nTaxa)
-{
-  int i;
-  int x;
-
-  for (i = 1; i <= nTaxa; ++ i)
-  {
-    x = buf[i][s1];
-    buf[i][s1] = buf[i][s2];
-    buf[i][s2] = x;
-  }
-}
-
-/** @brief Constructs the list of partitions according to the proposed partition scheme
-    
-    A static function that construcs the \a partitionList structure according to
-    the partition scheme \b AFTER the sites have been repositioned in contiguous
-    regions according to the partition scheme.
-
-    @param bounds  An array of the new starting and ending posititons of sites
-    in the alignment for each partition.  This array is of size 2 * \a nparts.
-    The elements are always couples (lower,upper). The upper bounds is a site
-    that is not included in the partition
-
-    @param nparts The number of partitions to be created
-
-    @todo Fix the bug in PLL 
-*/
-static partitionList * createPartitions (pllQueue * parts, int * bounds)
-{
-  partitionList * pl;
-  pllPartitionInfo * pi;
-  struct pllQueueItem * elm;
-  int i, j;
-
-  pl = (partitionList *) rax_malloc (sizeof (partitionList));
-  
-  // TODO: fix this
-  pl->perGeneBranchLengths =      0;
-
-  // TODO: change PLL_NUM_BRANCHES to number of partitions I guess
-  pl->partitionData = (pInfo **) rax_calloc (PLL_NUM_BRANCHES, sizeof (pInfo *));
-  
-  for (i = 0, elm = parts->head; elm; elm = elm->next, ++ i)
-   {
-     pi = (pllPartitionInfo *) elm->item;
-
-     /* check whether the data type is valid, and in case it's not, deallocate
-        and return NULL */
-     if (pi->dataType <= PLL_MIN_MODEL || pi->dataType >= PLL_MAX_MODEL)
-      {
-        for (j = 0; j < i; ++ j)
-         {
-           rax_free (pl->partitionData[j]->partitionName);
-           rax_free (pl->partitionData[j]);
-         }
-        rax_free (pl->partitionData);
-        rax_free (pl);
-        return (NULL);
-      }
-
-     pl->partitionData[i] = (pInfo *) rax_malloc (sizeof (pInfo));
-
-     pl->partitionData[i]->lower = bounds[i << 1];
-     pl->partitionData[i]->upper = bounds[(i << 1) + 1];
-     pl->partitionData[i]->width = bounds[(i << 1) + 1] - bounds[i << 1];
-     pl->partitionData[i]->partitionWeight = 1.0 * (double) pl->partitionData[i]->width;
-
-     //the two flags below are required to allow users to set 
-     //alpha parameters and substitution rates in the Q matrix 
-     //to fixed values. These parameters will then not be optimized 
-     //in the model parameter optimization functions
-     //by default we assume that all parameters are being optimized, i.e., 
-     //this has to be explicitly set by the user 
-     
-     pl->partitionData[i]->optimizeAlphaParameter    = PLL_TRUE;
-     pl->partitionData[i]->optimizeSubstitutionRates = PLL_TRUE;
-     pl->partitionData[i]->dataType                  = pi->dataType;
-     pl->partitionData[i]->protModels                = -1;
-     pl->partitionData[i]->protUseEmpiricalFreqs     = -1;
-     pl->partitionData[i]->maxTipStates              = pLengths[pi->dataType].undetermined + 1;
-     pl->partitionData[i]->optimizeBaseFrequencies   = pi->optimizeBaseFrequencies;
-     pl->partitionData[i]->ascBias                   = pi->ascBias;
-     pl->partitionData[i]->parsVect                  = NULL;
-
-
-
-     if (pi->dataType == PLL_AA_DATA)
-      {
-        if(pl->partitionData[i]->protModels != PLL_GTR)
-          pl->partitionData[i]->optimizeSubstitutionRates = PLL_FALSE;
-        pl->partitionData[i]->protUseEmpiricalFreqs     = pi->protUseEmpiricalFreqs;
-        pl->partitionData[i]->protModels                = pi->protModels;
-      }
-
-     pl->partitionData[i]->states                = pLengths[pl->partitionData[i]->dataType].states;
-     pl->partitionData[i]->numberOfCategories    =        1;
-     pl->partitionData[i]->autoProtModels        =        0;
-     pl->partitionData[i]->nonGTR                =        PLL_FALSE;
-     pl->partitionData[i]->partitionContribution =     -1.0;
-     pl->partitionData[i]->partitionLH           =      0.0;
-     pl->partitionData[i]->fracchange            =      1.0;
-     pl->partitionData[i]->executeModel          =     PLL_TRUE;
-
-
-     pl->partitionData[i]->partitionName         = (char *) rax_malloc ((strlen (pi->partitionName) + 1) * sizeof (char));
-     strcpy (pl->partitionData[i]->partitionName, pi->partitionName);
-   }
-
-  return (pl);
-}
-
-
-/** @ingroup instanceLinkingGroup
-    @brief Constructs the proposed partition scheme 
-
-    This function constructs the proposed partition scheme. It assumes
-    that the partition scheme is correct.
-
-    @note This function \b does \b not validate the partition scheme.
-    The user must manually call the ::pllPartitionsValidate function
-    for validation
-    
-    @param parts
-      A list of partitions suggested by the caller
-
-    @param alignmentData
-      The multiple sequence alignment
-
-    @return
-      Returns a pointer to \a partitionList structure of partitions in case of success, \b NULL otherwise
-*/
-partitionList * pllPartitionsCommit (pllQueue * parts, pllAlignmentData * alignmentData)
-{
-  int * oi;
-  int i, j, dst;
-  struct pllQueueItem * elm;
-  struct pllQueueItem * regionItem;
-  pllPartitionRegion * region;
-  pllPartitionInfo * pi;
-  partitionList * pl;
-  int * newBounds;
-  int k, nparts;
-  int tmpvar;
- 
-
-  dst = k = 0;
-  oi  = (int *) rax_malloc (alignmentData->sequenceLength * sizeof (int));
-  for (i = 0; i < alignmentData->sequenceLength; ++ i) oi[i] = i;
-
-  nparts = pllQueueSize (parts);
-  newBounds = (int *) rax_malloc (2 * nparts * sizeof (int));
-
-  /* reposition the sites in the alignment */
-  for (elm = parts->head; elm; elm = elm->next, ++ k)
-   {
-     pi = (pllPartitionInfo *) elm->item;
-     
-     newBounds[k << 1] = dst;   /* set the lower column for this partition */
-     for (regionItem = pi->regionList->head; regionItem; regionItem = regionItem->next)
-      {
-        region = (pllPartitionRegion *) regionItem->item;
-
-        for (i = region->start - 1; i < region->end && i < alignmentData->sequenceLength; i += region->stride)
-         {
-           if (oi[i] == i)
-            {
-              swapSite (alignmentData->sequenceData, dst, i, alignmentData->sequenceCount);
-              tmpvar = oi[i];
-              oi[i] = oi[dst];
-              oi[dst++] = tmpvar;
-            }
-           else
-            {
-              j = i;
-              while (oi[j] != i) j = oi[j];
-
-              swapSite (alignmentData->sequenceData, dst, j, alignmentData->sequenceCount);
-              tmpvar = oi[j];
-              oi[j] = oi[dst];
-              oi[dst++] = tmpvar;
-            }
-         }
-      }
-     newBounds[(k << 1) + 1] = dst;    /* set the uppwer limit for this partition */
-   }
-  if ((pl = createPartitions (parts, newBounds)))
-   { 
-     pl->numberOfPartitions = nparts;
-     pl->dirty = PLL_FALSE;
-   }
-  
-  rax_free (newBounds);
-  rax_free (oi);
-
-  return (pl);
-}
-
-/** @brief Copy a site to another buffer
-
-    Copies site \a from from buffer \a src to \a to in buffer \a dst. Both buffers
-    must consist of \a nTaxa + 1 taxa and the first row contains no information, i.e.
-    it is not accessed.
-
-    @param dst
-      Destination buffer
-
-    @param src
-      Source buffer
-
-    @param to
-      At which position in \a dst to copy the site to
-
-    @param from
-      Which site from \a src to copy
-
-    @param nTaxa
-      Number of taxa, i.e. size of site
-*/
-static __inline void
-copySite (unsigned char ** dst, unsigned char ** src, int to, int from, int nTaxa)
-{
-  int i;
-
-  for (i = 1; i <= nTaxa; ++ i)
-   {
-     dst[i][to] = src[i][from];
-   }
-}
-
-/** @brief Remove duplicate sites from alignment and update weights vector
-
-    Removes duplicate sites from the alignment given the partitions list
-    and updates the weight vector of the alignment and the boundaries
-    (upper, lower, width) for each partition.
-
-    @param alignmentData
-      The multiple sequence alignment
-    
-    @param pl
-      List of partitions
-
-*/
-void 
-pllAlignmentRemoveDups (pllAlignmentData * alignmentData, partitionList * pl)
-{
-  int i, j, k, p;
-  char *** sites;
-  void ** memptr;
-  int ** oi;
-  int dups = 0;
-  int lower;
-
-  /* allocate space for the transposed alignments (sites) for every partition */
-  sites  = (char ***) rax_malloc (pl->numberOfPartitions * sizeof (char **));
-  memptr = (void **)  rax_malloc (pl->numberOfPartitions * sizeof (void *));
-  oi     = (int **)   rax_malloc (pl->numberOfPartitions * sizeof (int *));
-
-  /* transpose the sites by partition */
-  for (p = 0; p < pl->numberOfPartitions; ++ p)
-   {
-     sites[p]  = (char **) rax_malloc (sizeof (char *) * pl->partitionData[p]->width);
-     memptr[p] = rax_malloc (sizeof (char) * (alignmentData->sequenceCount + 1) * pl->partitionData[p]->width);
-
-     for (i = 0; i < pl->partitionData[p]->width; ++ i)
-      {
-        sites[p][i] = (char *) ((char*)memptr[p] + sizeof (char) * i * (alignmentData->sequenceCount + 1));
-      }
-
-     for (i = 0; i < pl->partitionData[p]->width; ++ i)
-      {
-        for (j = 0; j < alignmentData->sequenceCount; ++ j)
-         {
-           sites[p][i][j] = alignmentData->sequenceData[j + 1][pl->partitionData[p]->lower + i]; 
-         }
-        sites[p][i][j] = 0;
-      }
-
-     oi[p] = pllssort1main (sites[p], pl->partitionData[p]->width);
-
-     for (i = 0; i < pl->partitionData[p]->width; ++ i) oi[p][i] = 1;
-
-     for (i = 1; i < pl->partitionData[p]->width; ++ i)
-      {
-        if (! strcmp (sites[p][i], sites[p][i - 1]))
-         {
-           ++ dups;
-           oi[p][i] = 0;
-         }
-      }
-   }
-
-  /* allocate memory for the alignment without duplicates*/
-  rax_free (alignmentData->sequenceData[1]);
-  rax_free (alignmentData->siteWeights);
-
-  alignmentData->sequenceLength = alignmentData->sequenceLength - dups;
-  alignmentData->sequenceData[0] = (unsigned char *) rax_malloc ((alignmentData->sequenceLength + 1) * sizeof (unsigned char) * alignmentData->sequenceCount);
-  for (i = 0; i < alignmentData->sequenceCount; ++ i)
-   {
-     alignmentData->sequenceData[i + 1] = (unsigned char *) (alignmentData->sequenceData[0] + sizeof (unsigned char) * i * (alignmentData->sequenceLength + 1));
-     alignmentData->sequenceData[i + 1][alignmentData->sequenceLength] = 0;
-   }
-
-  alignmentData->siteWeights    = (int *) rax_malloc ((alignmentData->sequenceLength) * sizeof (int));
-  alignmentData->siteWeights[0] = 1;
-
-  /* transpose sites back to alignment */
-  for (p = 0, k = 0; p < pl->numberOfPartitions; ++ p)
-   {
-     lower = k;
-     for (i = 0; i < pl->partitionData[p]->width; ++ i)
-      {
-        if (!oi[p][i])
-         {
-           ++ alignmentData->siteWeights[k - 1];
-         }
-        else
-         {
-           alignmentData->siteWeights[k] = 1;
-           for (j = 0; j < alignmentData->sequenceCount; ++ j)
-            {
-              alignmentData->sequenceData[j + 1][k] = sites[p][i][j];
-            }
-           ++ k;
-         }
-      }
-     pl->partitionData[p]->lower = lower;
-     pl->partitionData[p]->upper = k;
-     pl->partitionData[p]->width = k - lower;
-   }
-
-  /* deallocate storage for transposed alignment (sites) */
-  for (p = 0; p < pl->numberOfPartitions; ++ p)
-   {
-     rax_free (oi[p]);
-     rax_free (memptr[p]);
-     rax_free (sites[p]);
-   }
-  rax_free (oi);
-  rax_free (sites);
-  rax_free (memptr);
-}
-
-
-/** @brief Compute the empirical frequencies of a partition
-  
-    Compute the empirical frequencies of partition \a partition and store them in
-    \a pfreqs.
-
-    @param partition
-      The partition for which to compute empirical frequencies
-
-    @param alignmentData
-      The multiple sequence alignment
-
-    @param smoothFrequencies
-      Not needed?
-
-    @param bitMask
-      The bitmask
-
-    @param pfreqs
-      Array of size \a partition->states where the empirical frequencies for this partition are stored
-*/
-static int genericBaseFrequenciesAlignment (pInfo * partition, 
-                                              pllAlignmentData * alignmentData, 
-                                              pllBoolean smoothFrequencies,
-                                              const unsigned int * bitMask, 
-                                              double * pfreqs)
-{
-  double 
-    wj, 
-    acc,
-    sumf[64],   
-    temp[64];
- 
-  int     
-    i, 
-    j, 
-    k, 
-    l,
-    numFreqs,
-    lower,
-    upper;
-
-  unsigned char  *yptr;  
-  const char * map;
-  
-  switch (partition->dataType)
-   {
-     case PLL_BINARY_DATA:
-       map = PLL_MAP_BIN;
-     case PLL_DNA_DATA:
-       map = PLL_MAP_NT;
-       break;
-     case PLL_AA_DATA:
-       map = PLL_MAP_AA;
-       break;
-     default:
-       assert(0);
-   }
-
-  numFreqs = partition->states;
-  lower    = partition->lower;
-  upper    = partition->upper;
-
-  for(l = 0; l < numFreqs; l++)     
-    pfreqs[l] = 1.0 / ((double)numFreqs);
-          
-  for (k = 1; k <= 8; k++) 
-    {                                                   
-      for(l = 0; l < numFreqs; l++)
-        sumf[l] = 0.0;
-              
-      for (i = 1; i <= alignmentData->sequenceCount; i++) 
-        {                
-          yptr = alignmentData->sequenceData[i];
-          
-          for(j = lower; j < upper; j++) 
-            {
-              if (map[yptr[j]] < 0) return (0);
-              unsigned int code = bitMask[(unsigned char)map[yptr[j]]];
-              assert(code >= 1);
-              
-              for(l = 0; l < numFreqs; l++)
-                {
-                  if((code >> l) & 1)
-                    temp[l] = pfreqs[l];
-                  else
-                    temp[l] = 0.0;
-                }                             
-              
-              for(l = 0, acc = 0.0; l < numFreqs; l++)
-                {
-                  if(temp[l] != 0.0)
-                    acc += temp[l];
-                }
-              
-              wj = alignmentData->siteWeights[j] / acc;
-              
-              for(l = 0; l < numFreqs; l++)
-                {
-                  if(temp[l] != 0.0)                
-                    sumf[l] += wj * temp[l];                                                                                               
-                }
-            }
-        }                     
-      
-      for(l = 0, acc = 0.0; l < numFreqs; l++)
-        {
-          if(sumf[l] != 0.0)
-            acc += sumf[l];
-        }
-              
-      for(l = 0; l < numFreqs; l++)
-        pfreqs[l] = sumf[l] / acc;           
-    }
-
-   /* TODO: What is that? */
-/*
-  if(smoothFrequencies)         
-   {;
-    smoothFreqs(numFreqs, pfreqs,  tr->partitionData[model].frequencies, &(tr->partitionData[model]));     
-   }
-  else    
-    {
-      pllBoolean
-        zeroFreq = PLL_FALSE;
-
-      char 
-        typeOfData[1024];
-
-      getDataTypeString(tr, model, typeOfData);  
-
-      for(l = 0; l < numFreqs; l++)
-        {
-          if(pfreqs[l] == 0.0)
-            {
-              printBothOpen("Empirical base frequency for state number %d is equal to zero in %s data partition %s\n", l, typeOfData, tr->partitionData[model].partitionName);
-              printBothOpen("Since this is probably not what you want to do, RAxML will soon exit.\n\n");
-              zeroFreq = PLL_TRUE;
-            }
-        }
-
-      if(zeroFreq)
-        exit(-1);
-
-      for(l = 0; l < numFreqs; l++)
-        {
-          assert(pfreqs[l] > 0.0);
-          tr->partitionData[model].frequencies[l] = pfreqs[l];
-        }     
-    }  
-*/
-  return (1);
-  
-}
-
-static void  genericBaseFrequenciesInstance (pInfo * partition, 
-                                             pllInstance * tr, 
-                                             pllBoolean smoothFrequencies,
-                                             const unsigned int * bitMask, 
-                                             double * pfreqs)
-{
-  double 
-    wj, 
-    acc,
-    sumf[64],   
-    temp[64];
- 
-  int     
-    i, 
-    j, 
-    k, 
-    l,
-    numFreqs,
-    lower,
-    upper;
-
-  unsigned char  *yptr;  
-
-  numFreqs = partition->states;
-  lower    = partition->lower;
-  upper    = partition->upper;
-
-  for(l = 0; l < numFreqs; l++)     
-    pfreqs[l] = 1.0 / ((double)numFreqs);
-          
-  for (k = 1; k <= 8; k++) 
-    {                                                   
-      for(l = 0; l < numFreqs; l++)
-        sumf[l] = 0.0;
-              
-      for (i = 1; i <= tr->mxtips; i++) 
-        {                
-          yptr = tr->yVector[i];
-          
-          for(j = lower; j < upper; j++) 
-            {
-              unsigned int code = bitMask[yptr[j]];
-              assert(code >= 1);
-              
-              for(l = 0; l < numFreqs; l++)
-                {
-                  if((code >> l) & 1)
-                    temp[l] = pfreqs[l];
-                  else
-                    temp[l] = 0.0;
-                }                             
-              
-              for(l = 0, acc = 0.0; l < numFreqs; l++)
-                {
-                  if(temp[l] != 0.0)
-                    acc += temp[l];
-                }
-              
-              wj = tr->aliaswgt[j] / acc;
-              
-              for(l = 0; l < numFreqs; l++)
-                {
-                  if(temp[l] != 0.0)                
-                    sumf[l] += wj * temp[l];                                                                                               
-                }
-            }
-        }                     
-      
-      for(l = 0, acc = 0.0; l < numFreqs; l++)
-        {
-          if(sumf[l] != 0.0)
-            acc += sumf[l];
-        }
-              
-      for(l = 0; l < numFreqs; l++)
-        pfreqs[l] = sumf[l] / acc;           
-    }
-
-   /* TODO: What is that? */
-/*
-  if(smoothFrequencies)         
-   {;
-    smoothFreqs(numFreqs, pfreqs,  tr->partitionData[model].frequencies, &(tr->partitionData[model]));     
-   }
-  else    
-    {
-      pllBoolean
-        zeroFreq = PLL_FALSE;
-
-      char 
-        typeOfData[1024];
-
-      getDataTypeString(tr, model, typeOfData);  
-
-      for(l = 0; l < numFreqs; l++)
-        {
-          if(pfreqs[l] == 0.0)
-            {
-              printBothOpen("Empirical base frequency for state number %d is equal to zero in %s data partition %s\n", l, typeOfData, tr->partitionData[model].partitionName);
-              printBothOpen("Since this is probably not what you want to do, RAxML will soon exit.\n\n");
-              zeroFreq = PLL_TRUE;
-            }
-        }
-
-      if(zeroFreq)
-        exit(-1);
-
-      for(l = 0; l < numFreqs; l++)
-        {
-          assert(pfreqs[l] > 0.0);
-          tr->partitionData[model].frequencies[l] = pfreqs[l];
-        }     
-    }  
-*/
-
-  
-}
-
-/**  Compute the empirical base frequencies of an alignment
-
-     Computes the empirical base frequencies per partition of an alignment \a alignmentData
-     given the partition structure \a pl.
-
-     @param alignmentData The alignment structure for which to compute the empirical base frequencies
-     @param pl            List of partitions
-     @return Returns a list of frequencies for each partition
-*/
-double ** pllBaseFrequenciesAlignment (pllAlignmentData * alignmentData, partitionList * pl)
-{
-  int
-    i,
-    model;
-
-  double 
-    **freqs = (double **) rax_malloc (pl->numberOfPartitions * sizeof (double *));
-
-  for (model = 0; model < pl->numberOfPartitions; ++ model)
-    {
-      freqs[model] = (double *) rax_malloc (pl->partitionData[model]->states * sizeof (double));
-      
-      switch  (pl->partitionData[model]->dataType)
-        {
-        case PLL_BINARY_DATA:
-        case PLL_AA_DATA:
-        case PLL_DNA_DATA:
-          if (!genericBaseFrequenciesAlignment (pl->partitionData[model], 
-                                                alignmentData, 
-                                                pLengths[pl->partitionData[model]->dataType].smoothFrequencies,
-                                                pLengths[pl->partitionData[model]->dataType].bitVector,
-                                                freqs[model]
-                                               ))
-            return (NULL);
-          break;
-        default:
-          {
-            errno = PLL_UNKNOWN_MOLECULAR_DATA_TYPE;
-            for (i = 0; i <= model; ++ i) rax_free (freqs[i]);
-            rax_free (freqs);
-            return (double **)NULL;
-          }
-        }
-    }
-  
-  return (freqs);
-}
-
-/**  Compute the empirical base frequencies of the alignment incorporated in the instance
-
-     Computes the empirical base frequencies per partition of the alignment
-     incorporated in the instance \a tr given the partition structure \a pl.
-
-     @param tr The instance for which to compute the empirical base frequencies
-     @param pl List of partitions
-     @return Returns a list of frequencies for each partition
-*/
-double ** pllBaseFrequenciesInstance (pllInstance * tr, partitionList * pl)
-{
-  int
-    i,
-    model;
-
-  double 
-    **freqs = (double **) rax_malloc (pl->numberOfPartitions * sizeof (double *));
-
-  for (model = 0; model < pl->numberOfPartitions; ++ model)
-    {
-      freqs[model] = (double *) rax_malloc (pl->partitionData[model]->states * sizeof (double));
-      
-      switch  (pl->partitionData[model]->dataType)
-        {
-        case PLL_AA_DATA:
-        case PLL_DNA_DATA:
-        case PLL_BINARY_DATA:
-          genericBaseFrequenciesInstance (pl->partitionData[model], 
-                                          tr, 
-                                          pLengths[pl->partitionData[model]->dataType].smoothFrequencies,
-                                          pLengths[pl->partitionData[model]->dataType].bitVector,
-                                          freqs[model]
-                                          );
-          break;
-        default:
-          {
-            errno = PLL_UNKNOWN_MOLECULAR_DATA_TYPE;
-            for (i = 0; i <= model; ++ i) rax_free (freqs[i]);
-            rax_free (freqs);
-            return (double **)NULL;
-          }
-        }
-    }
-  
-  return (freqs);
-}
-
-void
-pllEmpiricalFrequenciesDestroy (double *** empiricalFrequencies, int models)
-{
-  int i;
-
-  for (i = 0; i < models; ++ i)
-   {
-     rax_free ((*empiricalFrequencies)[i]);
-   }
-  rax_free (*empiricalFrequencies);
-
-  *empiricalFrequencies = NULL;
-}
-
-int pllLoadAlignment (pllInstance * tr, pllAlignmentData * alignmentData, partitionList * partitions)
-{
-  int i;
-  nodeptr node;
-  pllHashItem * hItem;
-
-  if (tr->mxtips != alignmentData->sequenceCount) return (0);
-
-  tr->aliaswgt = (int *) rax_malloc (alignmentData->sequenceLength * sizeof (int));
-  memcpy (tr->aliaswgt, alignmentData->siteWeights, alignmentData->sequenceLength * sizeof (int));
-
-  tr->originalCrunchedLength = alignmentData->sequenceLength;
-  tr->rateCategory           = (int *)   rax_calloc (tr->originalCrunchedLength, sizeof (int));
-  tr->patrat                 = (double*) rax_malloc((size_t)tr->originalCrunchedLength * sizeof(double));
-  tr->patratStored           = (double*) rax_malloc((size_t)tr->originalCrunchedLength * sizeof(double));
-  tr->lhs                    = (double*) rax_malloc((size_t)tr->originalCrunchedLength * sizeof(double));
-
-  /* allocate memory for the alignment */
-  tr->yVector    = (unsigned char **) rax_malloc ((alignmentData->sequenceCount + 1) * sizeof (unsigned char *));                                                                                                                                                                      
-
-  tr->yVector[0] = (unsigned char *)  rax_malloc (sizeof (unsigned char) * (alignmentData->sequenceLength + 1) * alignmentData->sequenceCount);
-  for (i = 1; i <= alignmentData->sequenceCount; ++ i) 
-   {                     
-     tr->yVector[i] = (unsigned char *) (tr->yVector[0] + (i - 1) * (alignmentData->sequenceLength + 1) * sizeof (unsigned char));
-     tr->yVector[i][alignmentData->sequenceLength] = 0;
-   }                     
-                         
-  /* place sequences to tips */                              
-  for (i = 1; i <= alignmentData->sequenceCount; ++ i)                      
-   {                     
-     if (!pllHashSearch (tr->nameHash, alignmentData->sequenceLabels[i],(void **)&node)) 
-      {
-        //rax_free (tr->originalCrunchedLength);
-        rax_free (tr->rateCategory);
-        rax_free (tr->patrat);
-        rax_free (tr->patratStored);
-        rax_free (tr->lhs);
-        rax_free (tr->yVector[0]);
-        rax_free (tr->yVector);
-        return (0);
-      }
-     memcpy (tr->yVector[node->number], alignmentData->sequenceData[i], alignmentData->sequenceLength);
-   }
-
-  /* Do the base substitution (from A,C,G....  ->   0,1,2,3....)*/
-  pllBaseSubstitute (tr, partitions);
-
-  /* Populate tipNames */
-  tr->tipNames = (char **) rax_calloc(tr->mxtips + 1, sizeof (char *));
-  for (i = 0; (unsigned int)i < tr->nameHash->size; ++ i)
-   {
-     hItem = tr->nameHash->Items[i];
-
-     for (; hItem; hItem = hItem->next)
-      {
-        tr->tipNames[((nodeptr)hItem->data)->number] = hItem->str; 
-      }
-   }
-
-  return (1);
-}
-
-pllInstance * pllCreateInstance (pllInstanceAttr * attr)
-{
-  pllInstance * tr;
-
-  if (attr->rateHetModel != PLL_GAMMA && attr->rateHetModel != PLL_CAT) return NULL;
-
-  tr = (pllInstance *) rax_calloc (1, sizeof (pllInstance));
-
-  tr->threadID          = 0;
-  tr->rateHetModel      = attr->rateHetModel;
-  tr->fastScaling       = attr->fastScaling;
-  tr->saveMemory        = attr->saveMemory;
-  tr->useRecom          = attr->useRecom;
-  tr->likelihoodEpsilon = 0.01;
-  
-  tr->randomNumberSeed = attr->randomNumberSeed;
-  tr->parsimonyScore   = NULL;
-
-  /* remove it from the library */
-  tr->useMedian         = PLL_FALSE;
-
-  tr->maxCategories     = (attr->rateHetModel == PLL_GAMMA) ? 4 : 25;
-
-  tr->numberOfThreads   = attr->numberOfThreads;
-  tr->rearrangeHistory  = NULL;
-
-  /* Lock the slave processors at this point */
-#ifdef _FINE_GRAIN_MPI
-  pllLockMPI (tr);
-#endif
-
-  return (tr);
-}
-
-/** @brief Initialize PLL tree structure with default values
-    
-    Initialize PLL tree structure with default values and allocate 
-    memory for its elements.
-
-    @todo
-      STILL NOT FINISHED
-*/
-static void pllTreeInitDefaults (pllInstance * tr, int tips)
-{
-  nodeptr p0, p, q;
-  int i, j;
-  int inner;
-
-  
-
-  /* TODO: make a proper static setupTree function */
-
-  inner = tips - 1;
-
-  tr->mxtips = tips;
-
-  tr->bigCutoff = PLL_FALSE;
-  tr->treeStringLength = tr->mxtips * (PLL_NMLNGTH + 128) + 256 + tr->mxtips * 2;
-  tr->tree_string = (char *) rax_calloc ( tr->treeStringLength, sizeof(char));
-  tr->tree0 = (char*)rax_calloc((size_t)tr->treeStringLength, sizeof(char));
-  tr->tree1 = (char*)rax_calloc((size_t)tr->treeStringLength, sizeof(char));
-  tr->constraintVector = (int *)rax_malloc((2 * tr->mxtips) * sizeof(int));
-  
-  p0 = (nodeptr) rax_malloc ((tips + 3 * inner) * sizeof (node));
-  assert (p0);
-
-  tr->nodeBaseAddress  = p0;
-
-  tr->nameList         = (char **)   rax_malloc ((tips + 1) * sizeof (char *));
-  tr->nodep            = (nodeptr *) rax_malloc ((2 * tips) * sizeof (nodeptr));
-
-  tr->autoProteinSelectionType = PLL_AUTO_ML;
-
-  assert (tr->nameList && tr->nodep);
-
-  tr->nodep[0] = NULL;          
-
-
-  /* TODO: The line below was commented... why? */
-  tr->fracchange = -1;
-  tr->rawFracchange = -1;
-
-  for (i = 1; i <= tips; ++ i)
-   {
-     p = p0++;
-
-     //p->hash      = KISS32();     
-     p->x         = 0;
-     p->xBips     = 0;
-     p->number    = i;
-     p->next      = p;
-     p->back      = NULL;
-     p->bInf      = NULL;
-     tr->nodep[i]  = p;
-   }
-
-  for (i = tips + 1; i <= tips + inner; ++i)
-   {
-     q = NULL;
-     for (j = 1; j <= 3; ++ j)
-     {
-       p = p0++;
-       if (j == 1)
-        {
-          p->xBips = 1;
-          p->x = 1; //p->x     = 1;
-        }
-       else
-        {
-          p->xBips = 0;
-          p->x     = 0;
-        }
-       p->number = i;
-       p->next   = q;
-       p->bInf   = NULL;
-       p->back   = NULL;
-       p->hash   = 0;
-       q         = p;
-     }
-    p->next->next->next = p;
-    tr->nodep[i]         = p;
-   }
-
-  tr->likelihood  = PLL_UNLIKELY;
-  tr->start       = NULL;
-  tr->ntips       = 0;
-  tr->nextnode    = 0;
-
-  for (i = 0; i < PLL_NUM_BRANCHES; ++ i) tr->partitionSmoothed[i] = PLL_FALSE;
-
-  tr->bitVectors = NULL;
-  tr->vLength    = 0;
-  //tr->h          = NULL;
-
-  /* TODO: Fix hash type */
-  tr->nameHash   = pllHashInit (10 * tr->mxtips);
-
-  /* TODO: do these options really fit here or should they be put elsewhere? */
-  tr->td[0].count            = 0;
-  tr->td[0].ti               = (traversalInfo *) rax_malloc (sizeof(traversalInfo) * (size_t)tr->mxtips);
-  tr->td[0].parameterValues  = (double *) rax_malloc(sizeof(double) * (size_t)PLL_NUM_BRANCHES);
-  tr->td[0].executeModel     = (pllBoolean *) rax_malloc (sizeof(pllBoolean) * (size_t)PLL_NUM_BRANCHES);
-  tr->td[0].executeModel[0]  = PLL_TRUE;                                                                                                                                                                                                                                    
-  for (i = 0; i < PLL_NUM_BRANCHES; ++ i) tr->td[0].executeModel[i] = PLL_TRUE;
-}
-
-
-/* @brief Check a parsed tree for inclusion in the current tree
-   
-   Check whether the set of leaves (taxa) of the parsed tree \a nTree is a
-   subset of the leaves of the currently loaded tree.
-
-   @param pInst
-     PLL instance
-
-   @param nTree
-     Parsed newick tree structure
-
-   @return
-     Returns \b PLL_TRUE in case it is a subset, otherwise \b PLL_FALSE
-*/
-static int
-checkTreeInclusion (pllInstance * pInst, pllNewickTree * nTree)
-{
-  pllStack * sList;
-  pllNewickNodeInfo * sItem;
-  void * dummy;
-
-  if (!pInst->nameHash) return (PLL_FALSE);
-
-  for (sList = nTree->tree; sList; sList = sList->next)
-   {
-     sItem = (pllNewickNodeInfo *) sList->item;
-     if (!sItem->rank)   /* leaf */
-      {
-        if (!pllHashSearch (pInst->nameHash, sItem->name, &dummy)) return (PLL_FALSE);
-      }
-   }
-
-  return (PLL_TRUE);
-}
-
-static void
-updateBranchLength (nodeptr p, double old_fracchange, double new_fracchange)
-{
-  double z;
-  int j;
-
-  for (j = 0; j < PLL_NUM_BRANCHES; ++ j)
-   {
-     z = exp ((log (p->z[j]) * old_fracchange) / new_fracchange);
-     if (z < PLL_ZMIN) z = PLL_ZMIN;
-     if (z > PLL_ZMAX) z = PLL_ZMAX;
-     p->z[j] = p->back->z[j] = z;
-   }
-}
-
-static void
-updateAllBranchLengthsRecursive (nodeptr p, int tips, double old_fracchange, double new_fracchange)
-{
-  updateBranchLength (p, old_fracchange, new_fracchange);
-
-  if (!isTip (p->number, tips))
-   {
-     updateAllBranchLengthsRecursive (p->next->back,       tips, old_fracchange, new_fracchange);
-     updateAllBranchLengthsRecursive (p->next->next->back, tips, old_fracchange, new_fracchange);
-   }
-}
-
-static void
-updateAllBranchLengths (pllInstance * tr, double old_fracchange, double new_fracchange)
-{
-  nodeptr p;
-
-  p = tr->start;
-  assert (isTip(p->number, tr->mxtips));
-
-  updateAllBranchLengthsRecursive (p->back, tr->mxtips, old_fracchange, new_fracchange);
-
-}
-
-
-/** @brief Relink the taxa
-    
-    Relink the taxa by performing a preorder traversal of the unrooted binary tree.
-    We assume that the tree is rooted such that the root is the only node of
-    out-degree 3 and in-degree 0, while all the other inner nodes have in-degree
-    1 and out-degree 2. Finally, the leaves have in-degree 1 and out-degree 0.
-
-    @param pInst
-      PLL instance
-
-    @param nTree
-      Parsed newick tree structure
-
-    @param taxaExist
-      Is the set of taxa of \a nTree a subset of the taxa of the current tree
-
-    @return
-*/
-static int
-linkTaxa (pllInstance * pInst, pllNewickTree * nTree, int taxaExist)
-{
-  nodeptr 
-    parent,
-    child;
-  pllStack 
-    * nodeStack = NULL,
-    * current;
-  int
-    i,
-    j,
-    inner = nTree->tips + 1,
-    leaf  = 1;
-  double z;
-  pllNewickNodeInfo * nodeInfo;
-
-  if (!taxaExist) pllTreeInitDefaults (pInst, nTree->tips);
-
-  /* Place the ternary root node 3 times on the stack such that later on
-     three nodes use it as their parent */
-  current = nTree->tree;
-  for (parent = pInst->nodep[inner], i  = 0; i < 3; ++ i, parent = parent->next)
-    pllStackPush (&nodeStack, parent);
-  ++ inner;
-
-  /* now traverse the rest of the nodes */
-  for (current = current->next; current; current = current->next)
-   {
-     parent   = (nodeptr) pllStackPop (&nodeStack);
-     nodeInfo = (pllNewickNodeInfo *) current->item;
-
-     /* if inner node place it twice on the stack (out-degree 2) */
-     if (nodeInfo->rank)
-      {
-        child = pInst->nodep[inner ++];
-        pllStackPush (&nodeStack, child->next);
-        pllStackPush (&nodeStack, child->next->next);
-      }
-     else /* check if taxon already exists, i.e. we loaded another tree topology */
-      {
-        if (taxaExist)
-         {
-           assert (pllHashSearch (pInst->nameHash, nodeInfo->name, (void **) &child));
-         }
-        else
-         {
-           child = pInst->nodep[leaf];
-           pInst->nameList[leaf] = strdup (nodeInfo->name);
-           pllHashAdd (pInst->nameHash, pllHashString(pInst->nameList[leaf], pInst->nameHash->size), pInst->nameList[leaf], (void *) (pInst->nodep[leaf]));
-           ++ leaf;
-         }
-      }
-     assert (parent);
-     /* link parent and child */
-     parent->back = child;
-     child->back  = parent;
-
-     if (!taxaExist) pInst->fracchange = 1;
-
-     /* set the branch length */
-     z = exp ((-1 * atof (nodeInfo->branch)) / pInst->fracchange);
-     if (z < PLL_ZMIN) z = PLL_ZMIN;
-     if (z > PLL_ZMAX) z = PLL_ZMAX;
-     for (j = 0; j < PLL_NUM_BRANCHES; ++ j)
-       parent->z[j] = child->z[j] = z;
-   }
-  pllStackClear (&nodeStack);
-
-  return PLL_TRUE;
-}
-
-/** @brief Get the instantaneous rate matrix
-    
-    Obtain the instantaneous rate matrix (Q) for partitionm \a model
-    of the partition list \a pr, and store it in an array \a outBuffer.
-    
-    @param tr        PLL instance
-    @param pr        List of partitions
-    @param model     Index of partition to use
-    @param outBuffer Where to store the instantaneous rate matrix 
-
-    @todo Currently, the Q matrix can be only obtained for DNA GTR data.
-
-    @return Returns \b PLL_TRUE in case of success, otherwise \b PLL_FALSE
-*/
-int pllGetInstRateMatrix (partitionList * pr, int model, double * outBuffer)
-{
-  if (pr->partitionData[model]->dataType != PLL_DNA_DATA) return (PLL_FALSE);
-
-  int  i;
-  double mean = 0;
-  double * substRates = pr->partitionData[model]->substRates;
-  double * freqs = pr->partitionData[model]->frequencies;
-  
-  /* normalize substitution rates */
-  for (i = 0; i < 6; ++ i)  substRates[i] /= substRates[5];
-
-  outBuffer[0 * 4 + 1] = (substRates[0] * freqs[1]);
-  outBuffer[0 * 4 + 2] = (substRates[1] * freqs[2]);
-  outBuffer[0 * 4 + 3] = (substRates[2] * freqs[3]);
-
-  outBuffer[1 * 4 + 0] = (substRates[0] * freqs[0]);
-  outBuffer[1 * 4 + 2] = (substRates[3] * freqs[2]);
-  outBuffer[1 * 4 + 3] = (substRates[4] * freqs[3]);
-
-  outBuffer[2 * 4 + 0] = (substRates[1] * freqs[0]);
-  outBuffer[2 * 4 + 1] = (substRates[3] * freqs[1]);
-  outBuffer[2 * 4 + 3] = (substRates[5] * freqs[3]);
-
-  outBuffer[3 * 4 + 0] = (substRates[2] * freqs[0]);
-  outBuffer[3 * 4 + 1] = (substRates[4] * freqs[1]);
-  outBuffer[3 * 4 + 2] = (substRates[5] * freqs[2]);
-
-  outBuffer[0 * 4 + 0] = -(substRates[0] * freqs[1] + substRates[1] * freqs[2] + substRates[2] * freqs[3]);
-  outBuffer[1 * 4 + 1] = -(substRates[0] * freqs[0] + substRates[3] * freqs[2] + substRates[4] * freqs[3]);
-  outBuffer[2 * 4 + 2] = -(substRates[1] * freqs[0] + substRates[3] * freqs[1] + substRates[5] * freqs[3]);
-  outBuffer[3 * 4 + 3] = -(substRates[2] * freqs[0] + substRates[4] * freqs[1] + substRates[5] * freqs[2]);
-
-  for (i = 0; i <  4; ++ i) mean         += freqs[i] * (-outBuffer[i * 4 + i]);
-  for (i = 0; i < 16; ++ i) outBuffer[i] /= mean;
-
-  return (PLL_TRUE);
-}
-
-/** @ingroup instanceLinkingGroup
-    @brief Initializes the PLL tree topology according to a parsed newick tree
-
-    Set the tree topology based on a parsed and validated newick tree
-
-    @param tree
-      The PLL instance
-
-    @param nt
-      The \a pllNewickTree wrapper structure that contains the parsed newick tree
-
-    @param useDefaultz
-      If set to \b PLL_TRUE then the branch lengths will be reset to the default
-      value.
-*/
-void
-pllTreeInitTopologyNewick (pllInstance * tr, pllNewickTree * newick, int useDefaultz)
-{
-  linkTaxa (tr, newick, tr->nameHash && checkTreeInclusion (tr, newick));
-
-  tr->start = tr->nodep[1];
-
-  if (useDefaultz == PLL_TRUE)
-    resetBranches (tr);
-}
-
-/** @brief Get the node oriented pointer from a round-about node
-
-    Returns the pointer of the round-about node $p$ that has the orientation, i.e.
-    has the \a x flag set to 1. In case a tip is passed, then the returned pointer
-    is the same as the input.
-
-    @param pInst  PLL instance
-    @param p      One of the three pointers of a round-about node
-
-    @return  Returns the the pointer that has the orientation
-*/
-nodeptr pllGetOrientedNodePointer (pllInstance * pInst, nodeptr p)
-{
-  if (p->number <= pInst->mxtips || p->x) return p;
-
-  if (p->next->x) return p->next;
-
-  return p->next->next;
-}
-
-
-//void
-//pllTreeInitTopologyNewick (pllInstance * tr, pllNewickTree * nt, int useDefaultz)
-//{
-//  pllStack * nodeStack = NULL;
-//  pllStack * head;
-//  pllNewickNodeInfo * item;
-//  int i, j, k;
-//  
-///*
-//  for (i = 0; i < partitions->numberOfPartitions; ++ i)
-//   {
-//     partitions->partitionData[i] = (pInfo *) rax_malloc (sizeof (pInfo));
-//     partitions->partitionData[i]->partitionContribution = -1.0;
-//     partitions->partitionData[i]->partitionLH           =  0.0;
-//     partitions->partitionData[i]->fracchange            =  1.0;
-//   }
-//*/
-// 
-//
-// if (tr->nameHash)
-//  {
-//    if (checkTreeInclusion (tr, nt))
-//     {
-//       printf ("It is a subset\n");
-//     }
-//    else
-//     {
-//       printf ("It is not a subset\n");
-//     }
-//  }
-//  
-//  pllTreeInitDefaults (tr, nt->tips);
-//
-//  i = nt->tips + 1;
-//  j = 1;
-//  nodeptr v;
-//  
-//  
-//  for (head = nt->tree; head; head = head->next)
-//  {
-//    item = (pllNewickNodeInfo *) head->item;
-//    if (!nodeStack)
-//     {
-//       pllStackPush (&nodeStack, tr->nodep[i]);
-//       pllStackPush (&nodeStack, tr->nodep[i]->next);
-//       pllStackPush (&nodeStack, tr->nodep[i]->next->next);
-//       ++i;
-//     }
-//    else
-//     {
-//       v = (nodeptr) pllStackPop (&nodeStack);
-//       if (item->rank)  /* internal node */
-//        {
-//          v->back           = tr->nodep[i];
-//          tr->nodep[i]->back = v; //t->nodep[v->number]
-//          pllStackPush (&nodeStack, tr->nodep[i]->next);
-//          pllStackPush (&nodeStack, tr->nodep[i]->next->next);
-//          double z = exp((-1 * atof(item->branch))/tr->fracchange);
-//          if(z < PLL_ZMIN) z = PLL_ZMIN;
-//          if(z > PLL_ZMAX) z = PLL_ZMAX;
-//          for (k = 0; k < PLL_NUM_BRANCHES; ++ k)
-//             v->z[k] = tr->nodep[i]->z[k] = z;
-//
-//          ++ i;
-//        }
-//       else             /* leaf */
-//        {
-//          v->back           = tr->nodep[j];
-//          tr->nodep[j]->back = v; //t->nodep[v->number];
-//
-//          double z = exp((-1 * atof(item->branch))/tr->fracchange);
-//          if(z < PLL_ZMIN) z = PLL_ZMIN;
-//          if(z > PLL_ZMAX) z = PLL_ZMAX;
-//          for (k = 0; k < PLL_NUM_BRANCHES; ++ k)
-//            v->z[k] = tr->nodep[j]->z[k] = z;
-//            
-//          //t->nameList[j] = strdup (item->name);
-//          tr->nameList[j] = (char *) rax_malloc ((strlen (item->name) + 1) * sizeof (char));
-//          strcpy (tr->nameList[j], item->name);
-//          
-//          pllHashAdd (tr->nameHash, tr->nameList[j], (void *) (tr->nodep[j]));
-//          ++ j;
-//        }
-//     }
-//  }
-//  
-//  tr->start = tr->nodep[1];
-//  
-//  pllStackClear (&nodeStack);
-//
-//  if (useDefaultz == PLL_TRUE) 
-//    resetBranches (tr);
-//}
-
-/** @brief Initialize PLL tree with a random topology
-
-    Initializes the PLL tree with a randomly created topology
-
-    @todo
-      Perhaps pass a seed?
-
-    @param tr
-      The PLL instance
-
-    @param tips
-      Number of tips
-
-    @param nameList
-      A set of \a tips names representing the taxa labels
-*/
-void 
-pllTreeInitTopologyRandom (pllInstance * tr, int tips, char ** nameList)
-{
-  int i;
-  pllTreeInitDefaults (tr, tips);
-
-  for (i = 1; i <= tips; ++ i)
-   {
-     tr->nameList[i] = (char *) rax_malloc ((strlen (nameList[i]) + 1) * sizeof (char));
-     strcpy (tr->nameList[i], nameList[i]);
-     pllHashAdd (tr->nameHash, pllHashString(tr->nameList[i], tr->nameHash->size), tr->nameList[i], (void *) (tr->nodep[i]));
-   }
-  
-
-  pllMakeRandomTree (tr);
-}
-
-
-/** @brief Initialize a tree that corresponds to a given (already parsed) alignment 
-
-    Initializes the PLL tree such that it corresponds to the given alignment
-
-    @todo
-      nothing 
-
-    @param tr
-      The PLL instance
-
-    @param alignmentData
-      Parsed alignment
-*/
-void 
-pllTreeInitTopologyForAlignment (pllInstance * tr, pllAlignmentData * alignmentData)
-{
-  int
-    tips = alignmentData->sequenceCount,
-    i;
-
-  char 
-    **nameList = alignmentData->sequenceLabels;
-  
-  pllTreeInitDefaults (tr, tips);
-
-  for (i = 1; i <= tips; ++ i)
-   {
-     tr->nameList[i] = (char *) rax_malloc ((strlen (nameList[i]) + 1) * sizeof (char));
-     strcpy (tr->nameList[i], nameList[i]);
-     pllHashAdd (tr->nameHash, pllHashString(tr->nameList[i], tr->nameHash->size), tr->nameList[i], (void *) (tr->nodep[i]));
-   }
-}
-
-
-/** @brief Compute a randomized stepwise addition oder parsimony tree
-
-    Implements the RAxML randomized stepwise addition order algorithm 
-
-    @todo
-      check functions that are invoked for potential memory leaks!
-
-    @param tr
-      The PLL instance
-
-    @param partitions
-      The partitions
-
-    @param sprDist
-      SPR distance for the SPR search in parsimony
-*/
-void pllComputeRandomizedStepwiseAdditionParsimonyTree(pllInstance * tr, partitionList * partitions, int sprDist)
-{
-  allocateParsimonyDataStructures(tr, partitions);
-  pllMakeParsimonyTreeFast(tr, partitions, sprDist);
-  pllFreeParsimonyDataStructures(tr, partitions);
-}
-
-/** @brief Encode the alignment data to the PLL numerical representation
-    
-    Transforms the alignment to the PLL internal representation by substituting each base 
-    with a specific digit.
-
-    @param alignmentData  Multiple sequence alignment
-    @param partitions     List of partitions
-*/
-void pllBaseSubstitute (pllInstance * tr, partitionList * partitions)
-{
-  const char * d;
-  int i, j, k;
-
-  for (i = 0; i < partitions->numberOfPartitions; ++ i)
-   {
-     switch (partitions->partitionData[i]->dataType)
-      {
-        case PLL_DNA_DATA:
-          d = PLL_MAP_NT;
-          break;
-        case PLL_BINARY_DATA:
-          d = PLL_MAP_BIN;
-          break;
-        case PLL_AA_DATA:
-          d = PLL_MAP_AA;
-          break;
-        default:
-          assert(0);
-      }
-     
-     for (j = 1; j <= tr->mxtips; ++ j)
-      {
-        for (k = partitions->partitionData[i]->lower; k < partitions->partitionData[i]->upper; ++ k)
-         {
-           tr->yVector[j][k] = d[tr->yVector[j][k]];
-         }
-      }
-   }
-}
-
-/** Clears the rearrangements history from PLL instance
-    
-    Clears the rearrangements rollback information (history) from the PLL instance \a tr.
-
-    @param tr
-      PLL instance
-*/
-void pllClearRearrangeHistory (pllInstance * tr)
-{
-  pllRollbackInfo * ri;
-
-  while ((ri = (pllRollbackInfo *)pllStackPop (&(tr->rearrangeHistory))))
-   {
-     rax_free (ri);
-   }
-}
-
-/** @brief Deallocate the PLL instance
-
-    Deallocates the library instance and all its elements.
-
-    @param tr
-      The PLL instance
-*/
-void
-pllDestroyInstance (pllInstance * tr)
-{
-  int i;
-
-  for (i = 1; i <= tr->mxtips; ++ i)
-    rax_free (tr->nameList[i]);
-  
-  pllHashDestroy (&(tr->nameHash), NULL);
-  if (tr->yVector)
-   {
-     if (tr->yVector[0]) rax_free (tr->yVector[0]);
-     rax_free (tr->yVector);
-   }
-  rax_free (tr->aliaswgt);
-  rax_free (tr->rateCategory);
-  rax_free (tr->patrat);
-  rax_free (tr->patratStored);
-  rax_free (tr->lhs);
-  rax_free (tr->td[0].parameterValues);
-  rax_free (tr->td[0].executeModel);
-  rax_free (tr->td[0].ti);
-  rax_free (tr->nameList);
-  rax_free (tr->nodep);
-  rax_free (tr->nodeBaseAddress);
-  rax_free (tr->tree_string);
-  rax_free (tr->tree0);
-  rax_free (tr->tree1);
-  rax_free (tr->tipNames);
-  rax_free (tr->constraintVector);
-  pllClearRearrangeHistory (tr);
-
-  rax_free (tr);
-
-#ifdef _FINE_GRAIN_MPI
-  pllFinalizeMPI ();
-#endif
-
-}
-
-/* initializwe a parameter linkage list for a certain parameter type (can be whatever).
-   the input is an integer vector that contaions NumberOfModels (numberOfPartitions) elements.
-
-   if we want to have all alpha parameters unlinked and have say 4 partitions the input 
-   vector would look like this: {0, 1, 2, 3}, if we want to link partitions 0 and 3 the vector 
-   should look like this: {0, 1, 2, 0} 
-*/
-
-
-
-static int init_Q_MatrixSymmetries(char *linkageString, partitionList * pr, int model)
-{
-  int 
-    states = pr->partitionData[model]->states,
-    numberOfRates = ((states * states - states) / 2), 
-    *list = (int *)rax_malloc(sizeof(int) * numberOfRates),
-    j,
-    max = -1;
-
-  char
-    *str1,
-    *saveptr,
-    *ch,
-    *token;
-
-  ch = (char *) rax_malloc (strlen (linkageString) + 1);
-  strcpy (ch, linkageString);
-
-
-  for(j = 0, str1 = ch; ;j++, str1 = (char *)NULL) 
-    {
-      token = STRTOK_R(str1, ",", &saveptr);
-      if(token == (char *)NULL)
-        break;
-      if(!(j < numberOfRates))
-        {
-          errno = PLL_SUBSTITUTION_RATE_OUT_OF_BOUNDS;
-          return PLL_FALSE;
-        }
-      list[j] = atoi(token);     
-    }
-  
-  rax_free(ch);
-
-  for(j = 0; j < numberOfRates; j++)
-    {
-      if(!(list[j] <= j))
-        {
-          errno = PLL_INVALID_Q_MATRIX_SYMMETRY;
-          return PLL_FALSE;
-        }
-      
-      if(!(list[j] <= max + 1))
-        {
-          errno = PLL_Q_MATRIX_SYMMETRY_OUT_OF_BOUNDS;
-          return PLL_FALSE;
-        }
-      
-      if(list[j] > max)
-        max = list[j];
-    }  
-  
-  for(j = 0; j < numberOfRates; j++)  
-    pr->partitionData[model]->symmetryVector[j] = list[j];    
-
-  //less than the maximum possible number of rate parameters
-
-  if(max < numberOfRates - 1)    
-    pr->partitionData[model]->nonGTR = PLL_TRUE;
-
-  pr->partitionData[model]->optimizeSubstitutionRates = PLL_TRUE;
-
-  rax_free(list);
-
-  return PLL_TRUE;
-}
-
-/** @brief Check parameter linkage across partitions for consistency
- *
- * Checks that linked alpha, substitution rate and frequency model parameters 
- * across several partitions are consistent. E.g., when two partitions are linked 
- * via the alpha parameter, the alpha parameter should either be set to the same 
- * fixed value or it should be estimated!
- *
- * @param pr
- *   List of partitions
- *
- * @todo
- *   Call this in more functions, right now it's only invoked in the wrapper 
- *   for modOpt() 
- */
-static int checkLinkageConsistency(partitionList *pr)
-{
-  if(pr->dirty)
-    {
-      int 
-        i;
-      
-      linkageList 
-        *ll;
-
-      /* first deal with rates */
-
-      ll = pr->rateList;
-        
-      for(i = 0; i < ll->entries; i++)
-        {
-          int
-            partitions = ll->ld[i].partitions,
-            reference = ll->ld[i].partitionList[0];
-          
-          if(pr->partitionData[reference]->dataType == PLL_AA_DATA)
-            {
-              if(pr->partitionData[reference]->protModels == PLL_GTR || pr->partitionData[reference]->nonGTR)                             
-                {
-                  if(!(pr->partitionData[reference]->optimizeSubstitutionRates == PLL_TRUE))
-                    {
-                      errno = PLL_INCONSISTENT_SUBST_RATE_OPTIMIZATION_SETTING;
-                      return PLL_FALSE;
-                    }
-                }
-              else              
-                {
-                  if(!(pr->partitionData[reference]->optimizeSubstitutionRates == PLL_FALSE))
-                    {
-                      errno = PLL_INCONSISTENT_SUBST_RATE_OPTIMIZATION_SETTING;
-                      return PLL_FALSE;
-                    }
-                }                 
-            }
-
-          if(partitions > 1)
-            {
-              int
-                j,
-                k;
-              
-              for(k = 1; k < partitions; k++)
-                {
-                  int 
-                    index = ll->ld[i].partitionList[k];
-                  
-                  int
-                    states = pr->partitionData[index]->states,
-                    rates = ((states * states - states) / 2);
-                  
-                  if(!(pr->partitionData[reference]->nonGTR == pr->partitionData[index]->nonGTR))
-                    {
-                      errno = PLL_INCONSISTENT_SUBST_RATE_OPTIMIZATION_SETTING;
-                      return PLL_FALSE;
-                    }
-                  if(!(pr->partitionData[reference]->optimizeSubstitutionRates == pr->partitionData[index]->optimizeSubstitutionRates))
-                    {
-                      errno = PLL_INCONSISTENT_SUBST_RATE_OPTIMIZATION_SETTING;
-                      return PLL_FALSE;
-                    }
-                
-                  
-                  if(pr->partitionData[reference]->nonGTR)
-                    {              
-                      
-                      for(j = 0; j < rates; j++)                        
-                        {
-                          if(!(pr->partitionData[reference]->symmetryVector[j] == pr->partitionData[index]->symmetryVector[j]))
-                            {
-                              errno = PLL_INCONSISTENT_Q_MATRIX_SYMMETRIES_ACROSS_LINKED_PARTITIONS;
-                              return PLL_FALSE;
-                            }
-                        }
-                    }
-                  
-                 
-                  for(j = 0; j < rates; j++)
-                    {
-                      if(!(pr->partitionData[reference]->substRates[j] == pr->partitionData[index]->substRates[j]))
-                        {
-                          errno = PLL_INCONSISTENT_Q_MATRIX_ENTRIES_ACROSS_LINKED_PARTITIONS;
-                          return PLL_FALSE;
-                        }
-                    }
-                }           
-            }
-        }
-      
-      /* then deal with alpha parameters */
-
-      ll = pr->alphaList;
-
-      for(i = 0; i < ll->entries; i++)
-        {
-          int
-            partitions = ll->ld[i].partitions;
-          
-          if(partitions > 1)
-            {
-              int
-                k, 
-                reference = ll->ld[i].partitionList[0];
-              
-              for(k = 1; k < partitions; k++)
-                {
-                  int 
-                    index = ll->ld[i].partitionList[k];                          
-
-                  if(!(pr->partitionData[reference]->optimizeAlphaParameter == pr->partitionData[index]->optimizeAlphaParameter))
-                    {
-                      errno = PLL_INCONSISTENT_ALPHA_STATES_ACROSS_LINKED_PARTITIONS;
-                      return PLL_FALSE;
-                    }
-                  if(!(pr->partitionData[reference]->alpha == pr->partitionData[index]->alpha))
-                    {
-                      errno = PLL_INCONSISTENT_ALPHA_VALUES_ACROSS_LINKED_PARTITIONS;
-                      return PLL_FALSE;
-                    }
-                }           
-            }
-        }
-
-      /* and then deal with base frequencies */
-
-      ll = pr->freqList;
-
-      for(i = 0; i < ll->entries; i++)
-        {
-          int     
-            partitions = ll->ld[i].partitions;
-          
-          if(partitions > 1)
-            {
-              int               
-                k, 
-                reference = ll->ld[i].partitionList[0];
-              
-              for(k = 1; k < partitions; k++)
-                {
-                  int
-                    j,
-                    index = ll->ld[i].partitionList[k],
-                    states = pr->partitionData[index]->states;                           
-
-                  if(!(pr->partitionData[reference]->optimizeBaseFrequencies == pr->partitionData[index]->optimizeBaseFrequencies))
-                    {
-                      errno = PLL_INCONSISTENT_FREQUENCY_STATES_ACROSS_LINKED_PARTITIONS;
-                      return PLL_FALSE;
-                    }
-
-                  for(j = 0; j < states; j++)
-                    {
-                      if(!(pr->partitionData[reference]->frequencies[j] == pr->partitionData[index]->frequencies[j]))
-                        {
-                          errno = PLL_INCONSISTENT_FREQUENCY_VALUES_ACROSS_LINKED_PARTITIONS;
-                          return PLL_FALSE;
-                        }
-                    }
-                }           
-            }
-        }
-      
-      pr->dirty = PLL_FALSE;
-    }
-
-  return PLL_TRUE;
-}
-/** @brief Set symmetries among parameters in the Q matrix
-    
-    Allows to link some or all rate parameters in the Q-matrix 
-    for obtaining simpler models than GTR
-
-    @param string
-      string describing the symmetry pattern among the rates in the Q matrix
-
-    @param pr
-      List of partitions
-      
-    @param model
-      Index of the partition for which we want to set the Q matrix symmetries
-
-    @todo
-      nothing
-*/
-int pllSetSubstitutionRateMatrixSymmetries(char *string, partitionList * pr, int model)
-{
-  int 
-    result = init_Q_MatrixSymmetries(string, pr, model);
-
-  pr->dirty = PLL_TRUE;
-
-  return result;
-}
-
-/** @defgroup modelParamsGroup Model parameters setup and retrieval
-    
-    This set of functions is responsible for setting, retrieving, and optimizing
-    model parameters. It also contains functions for linking model parameters
-    across partitions.
-*/
-
-/** @ingroup modelParamsGroups
-    @brief Set the alpha parameter of the Gamma model to a fixed value for a partition
-    
-    Sets the alpha parameter of the gamma model of rate heterogeneity to a fixed value
-    and disables the optimization of this parameter 
-
-    @param alpha
-      alpha value
-
-    @param model
-      Index of the partition for which we want to set the alpha value
-
-    @param pr
-      List of partitions
-      
-    @param tr
-      Library instance for which we want to fix alpha 
-
-    @todo
-      test if this works with the parallel versions
-*/
-void pllSetFixedAlpha(double alpha, int model, partitionList * pr, pllInstance *tr)
-{
-  //make sure that we are swetting alpha for a partition within the current range 
-  //of partitions
-  double old_fracchange = tr->fracchange;
-
-  assert(model >= 0 && model < pr->numberOfPartitions);
-
-  assert(alpha >= PLL_ALPHA_MIN && alpha <= PLL_ALPHA_MAX);
-
-  //set the alpha paremeter 
-  
-  pr->partitionData[model]->alpha = alpha;
-
-  //do the discretization of the gamma curve
-
-  pllMakeGammaCats(pr->partitionData[model]->alpha, pr->partitionData[model]->gammaRates, 4, tr->useMedian);
-
-  //broadcast the changed parameters to all threads/MPI processes 
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-  pllMasterBarrier(tr, pr, PLL_THREAD_COPY_ALPHA);
-#endif
-
-  pr->partitionData[model]->optimizeAlphaParameter = PLL_FALSE;
-
-  pr->dirty = PLL_FALSE;
-  updateAllBranchLengths (tr, old_fracchange, tr->fracchange);
-}
-
-/** @ingroup modelParamsGroups
-    @brief Get the rate categories of the Gamma model of a partition
-
-    Gets the gamma rate categories of the Gamma model of rate heterogeneity
-    of partition \a pid from partition list \a pr.
-
-    @param pr   List of partitions
-    @param pid  Index of partition to use
-    @param outBuffer  Output buffer where to store the rates
-*/
-void pllGetGammaRates (partitionList * pr, int pid, double * outBuffer)
-{
-  /* TODO: Change the hardcoded 4 and also add a check that this partition
-     really uses gamma. Currently, instance is also not required */
-  memcpy (outBuffer, pr->partitionData[pid]->gammaRates, 4 * sizeof (double));
-}
-
-/** @ingroup modelParamsGroups
-    @brief Get the alpha parameter of the Gamma model of a partition
-
-    Returns the alpha parameter of the gamma model of rate heterogeneity
-    of partition \a pid from partition list \a pr.
-
-    @param pr   List of partitions
-    @param pid  Index of partition to use
-
-    @return
-      Alpha parameter
-*/
-double pllGetAlpha (partitionList * pr, int pid)
-{
-  /* TODO: check if the partition uses gamma */
-  return (pr->partitionData[pid]->alpha);
-}
-
-
-/** @ingroup modelParamsGroups
-    @brief Get the base frequencies of a partition
-
-    Gets the base frequencies of partition \a model from partition list
-    \a partitionList and stores them in \a outBuffer. Note that \outBuffer
-    must be of size s, where s is the number of states.
-
-    @param  tr       PLL instance
-    @param pr        List of partitions
-    @param model     Index of the partition for which we want to get the base frequencies
-    @param outBuffer Buffer where to store the base frequencies
-*/
-void pllGetBaseFrequencies(partitionList * pr, int model, double * outBuffer)
-{
-  memcpy (outBuffer, pr->partitionData[model]->frequencies, pr->partitionData[model]->states * sizeof (double));
-}
-
-
-/** @ingroup modelParamsGroups
-    @brief Set all base frequencies to a fixed value for a partition
-    
-    Sets all base freuqencies of a partition to fixed values and disables 
-    ML optimization of these parameters 
-
-    @param f
-      array containing the base frequencies
-
-    @param  length
-      length of array f, this needs to be as long as the number of 
-      states in the model, otherwise an assertion will fail!
-
-    @param model
-      Index of the partition for which we want to set the frequencies 
-
-    @param pr
-      List of partitions
-      
-    @param tr
-      Library instance for which we want to fix the base frequencies
-
-    @todo
-      test if this works with the parallel versions
-*/
-void pllSetFixedBaseFrequencies(double *f, int length, int model, partitionList * pr, pllInstance *tr)
-{
-  int 
-    i;
-
-  double 
-    acc = 0.0,
-    old_fracchange;
-
-  old_fracchange = tr->fracchange;
-
-  //make sure that we are setting the base frequencies for a partition within the current range 
-  //of partitions
-  assert(model >= 0 && model < pr->numberOfPartitions);
-
-  //make sure that the length of the input array f containing the frequencies 
-  //is as long as the number of states in the model 
-  assert(length == pr->partitionData[model]->states);
-
-
-  //make sure that the base frequencies sum approximately to 1.0
-  
-  for(i = 0; i < length; i++)
-    acc += f[i];
-
-  if(fabs(acc - 1.0) > 0.000001)
-    assert(0);
-
-  //copy the base frequencies 
-  memcpy(pr->partitionData[model]->frequencies, f, sizeof(double) * length);
-
-  //re-calculate the Q matrix 
-  pllInitReversibleGTR(tr, pr, model);
-
-
-  //broadcast the new Q matrix to all threads/processes 
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-  pllMasterBarrier (tr, pr, PLL_THREAD_COPY_RATES);
-#endif
-  
-  pr->partitionData[model]->optimizeBaseFrequencies = PLL_FALSE;
-
-  pr->dirty = PLL_TRUE;
-  updateAllBranchLengths (tr, old_fracchange, tr->fracchange);
-}
-
-/** @ingroup modelParamsGroups
-    @brief Set that the base freuqencies are optimized under ML
-    
-    The base freuqencies for partition model will be optimized under ML    
-
-    @param model
-      Index of the partition for which we want to optimize base frequencies 
-
-    @param pr
-      List of partitions
-      
-    @param tr
-      Library instance for which we want to fix the base frequencies
-
-    @todo
-      test if this works with the parallel versions
-*/
-int pllSetOptimizeBaseFrequencies(int model, partitionList * pr, pllInstance *tr)
-{
-  int
-    states,
-    i;
-
-  double 
-    initialFrequency,
-    acc = 0.0;
-
-  //make sure that we are setting the base frequencies for a partition within the current range 
-  //of partitions
-  if(!(model >= 0 && model < pr->numberOfPartitions))
-    {
-      errno = PLL_PARTITION_OUT_OF_BOUNDS;
-      return PLL_FALSE;
-    }
-
-  //set the number of states/ferquencies in this partition 
-  states = pr->partitionData[model]->states;
-
-  //set all frequencies to 1/states
-  
-  initialFrequency = 1.0 / (double)states;
-
-  for(i = 0; i < states; i++)
-    pr->partitionData[model]->frequencies[i] = initialFrequency;
-
-  //make sure that the base frequencies sum approximately to 1.0
-  
-  for(i = 0; i < states; i++)
-    acc += pr->partitionData[model]->frequencies[i];
-
-  if(fabs(acc - 1.0) > 0.000001)
-    {
-      errno = PLL_BASE_FREQUENCIES_DO_NOT_SUM_TO_1;
-      return PLL_FALSE;
-    }
-
-  //re-calculate the Q matrix 
-  pllInitReversibleGTR(tr, pr, model);
-
-  //broadcast the new Q matrix to all threads/processes 
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-  pllMasterBarrier (tr, pr, PLL_THREAD_COPY_RATES);
-#endif
-  
-  pr->partitionData[model]->optimizeBaseFrequencies = PLL_TRUE;
-
-  pr->dirty = PLL_TRUE;
-
-  return PLL_TRUE;
-}
-
-
-
-
-/** @ingroup modelParamsGroups
-    @brief Get the substitution rates for a specific partition
-
-    Gets the substitution rates of partition \a model from partition list
-    \a partitionList and stores them in \a outBuffer. Note that \outBuffer
-    must be of size (2 * s - s) / 2, where s is the number of states, i.e.
-    the number of upper diagonal entries of the Q matrix.
-
-    @param tr        PLL instance
-    @param pr        List of partitions
-    @param model     Index of partition for which we want to get the substitution rates
-    @param outBuffer Buffer where to store the substitution rates.
-*/
-void pllGetSubstitutionMatrix (partitionList * pr, int model, double * outBuffer)
-{
-  int 
-    rates,
-    states;
-  
-  states = pr->partitionData[model]->states;
-  rates = (states * states - states) / 2;
-
-  memcpy (outBuffer, pr->partitionData[model]->substRates, rates * sizeof (double));
-}
-
-/** @ingroup modelParamsGroups
-     @brief Set all substitution rates for a specific partition and disable ML optimization for them
-    
-    Sets all substitution rates of a partition to fixed values and disables 
-    ML optimization of these parameters. It will automatically re-scale the relative rates  
-    such that the last rate is 1.0 
-
-    @param f
-      array containing the substitution rates
-
-    @param length
-      length of array f, this needs to be as long as: (s * s - s) / 2,
-      i.e., the number of upper diagonal entries of the Q matrix
-
-    @param model
-      Index of the partition for which we want to set/fix the substitution rates
-
-    @param pr
-      List of partitions
-      
-    @param tr
-      Library instance for which we want to fix the substitution rates 
-
-    @todo
-      test if this works with the parallel versions
-*/
-void pllSetFixedSubstitutionMatrix(double *q, int length, int model, partitionList * pr,  pllInstance *tr)
-{
-  pllSetSubstitutionMatrix(q, length, model, pr, tr);
-  pr->partitionData[model]->optimizeSubstitutionRates = PLL_FALSE;
-}
-
-/** @ingroup modelParamsGroups
-     @brief Set all substitution rates for a specific partition
-    
-    Sets all substitution rates of a partition to the given values.
-    It will automatically re-scale the relative rates such that the last rate is 1.0 
-
-    @param f
-      array containing the substitution rates
-
-    @param length
-      length of array f, this needs to be as long as: (s * s - s) / 2,
-      i.e., the number of upper diagonal entries of the Q matrix
-
-    @param model
-      Index of the partition for which we want to set/fix the substitution rates
-
-    @param pr
-      List of partitions
-      
-    @param tr
-      Library instance for which we want to fix the substitution rates 
-
-    @todo
-      test if this works with the parallel versions
-*/
-void pllSetSubstitutionMatrix(double *q, int length, int model, partitionList * pr,  pllInstance *tr)
-{
-  int 
-    i,
-    numberOfRates; 
-
-  double
-    scaler,
-    old_fracchange;
-
-  old_fracchange = tr->fracchange;
-
-  //make sure that we are setting the Q matrix for a partition within the current range 
-  //of partitions
-  assert(model >= 0 && model < pr->numberOfPartitions);
-
-  numberOfRates = (pr->partitionData[model]->states * pr->partitionData[model]->states - pr->partitionData[model]->states) / 2;
-
-  //  make sure that the length of the array containing the subsitution rates 
-  //  corresponds to the number of states in the model
-
-  assert(length == numberOfRates);
-
-  //automatically scale the last rate to 1.0 if this is not already the case
-
-  if(q[length - 1] != 1.0)    
-    scaler = 1.0 / q[length - 1]; 
-  else
-    scaler = 1.0;
-
-  //set the rates for the partition and make sure that they are within the allowed bounds 
-
-  for(i = 0; i < length; i++)
-    {
-      double
-        r = q[i] * scaler;
-      
-      assert(r >= PLL_RATE_MIN && r <= PLL_RATE_MAX);
-      
-      pr->partitionData[model]->substRates[i] = r;
-    }
-
-  //re-calculate the Q matrix 
-  pllInitReversibleGTR(tr, pr, model);
-
-  //broadcast the new Q matrix to all threads/processes 
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-  pllMasterBarrier (tr, pr, PLL_THREAD_COPY_RATES);
-#endif
-  
-
-  pr->dirty = PLL_TRUE;
-  updateAllBranchLengths (tr, old_fracchange, tr->fracchange);
-}
-
-
-
-
-/* initialize a parameter linkage list for a certain parameter type (can be whatever).
-   the input is an integer vector that contaions NumberOfModels (numberOfPartitions) elements.
-
-   if we want to have all alpha parameters unlinked and have say 4 partitions the input 
-   vector would look like this: {0, 1, 2, 3}, if we want to link partitions 0 and 3 the vector 
-   should look like this: {0, 1, 2, 0} 
-*/
-
-/** @ingroup modelParamsGroups
-*/
-linkageList* initLinkageList(int *linkList, partitionList *pr)
-{
-  int 
-    k,
-    partitions,
-    numberOfModels = 0,
-    i,
-    pos;
-  
-  linkageList 
-    *ll = (linkageList*)rax_malloc(sizeof(linkageList));
-    
-  /* figure out how many distinct parameters we need to estimate 
-     in total, if all parameters are linked the result will be 1 if all 
-     are unlinked the result will be pr->numberOfPartitions */
-  
-  for(i = 0; i < pr->numberOfPartitions; i++)
-    {
-      if(!(linkList[i] >= 0 && linkList[i] < pr->numberOfPartitions))
-        {
-          errno = PLL_LINKAGE_LIST_OUT_OF_BOUNDS;
-          return (linkageList*)NULL;
-        }
-
-      if(!(linkList[i] <= i && linkList[i] <= numberOfModels + 1))
-        {
-          errno = PLL_LINKAGE_LIST_OUT_OF_BOUNDS;
-          return (linkageList*)NULL;
-        }
-
-      if(linkList[i] > numberOfModels)
-        numberOfModels = linkList[i];
-
-    }
-
-  numberOfModels++;
-  
-  /* allocate the linkage list data structure that containes information which parameters of which partition are 
-     linked with each other.
-
-     Note that we need a separate invocation of initLinkageList() and a separate linkage list 
-     for each parameter type */
-
-  ll->entries = numberOfModels;
-  ll->ld      = (linkageData*)rax_malloc(sizeof(linkageData) * numberOfModels);
-
-  /* noe loop over the number of free parameters and assign the corresponding partitions to each parameter */
-
-  for(i = 0; i < numberOfModels; i++)
-    {
-      /* 
-         the valid flag is used for distinguishing between DNA and protein data partitions.
-         This can be used to enable/disable parameter optimization for the paremeter 
-         associated to the corresponding partitions. This deature is used in optRatesGeneric 
-         to first optimize all DNA GTR rate matrices and then all PROT GTR rate matrices */
-
-      ll->ld[i].valid = PLL_TRUE;
-      partitions = 0;
-
-      /* now figure out how many partitions share this joint parameter */
-
-      for(k = 0; k < pr->numberOfPartitions; k++)
-        if(linkList[k] == i)
-          partitions++;     
-
-      /* assign a list to store the partitions that share the parameter */
-
-      ll->ld[i].partitions = partitions;
-      ll->ld[i].partitionList = (int*)rax_malloc(sizeof(int) * partitions);
-      
-      /* now store the respective partition indices in this list */
-      
-      for(k = 0, pos = 0; k < pr->numberOfPartitions; k++)
-        if(linkList[k] == i)
-          ll->ld[i].partitionList[pos++] = k;
-    }
-
-  /* return the linkage list for the parameter */
-
-  return ll;
-}
-
-
-
-static linkageList* initLinkageListString(char *linkageString, partitionList * pr)
-{
-  int 
-    *list = (int*)rax_malloc(sizeof(int) * pr->numberOfPartitions),
-    j;
-
-  linkageList 
-    *l;
-
-  char
-    *str1,
-    *saveptr,
-//    *ch = strdup(linkageString),
-    *ch,
-    *token;
-  
-  ch = (char *) rax_malloc (strlen (linkageString) + 1);
-  strcpy (ch, linkageString);
-
-  for(j = 0, str1 = ch; ;j++, str1 = (char *)NULL) 
-    {
-      token = STRTOK_R(str1, ",", &saveptr);
-      if(token == (char *)NULL)
-        break;
-      assert(j < pr->numberOfPartitions);
-      list[j] = atoi(token);
-    }
-  
-  rax_free(ch);
-
-  l = initLinkageList(list, pr);
-  
-  rax_free(list);
-
-  return l;
-}
-
-/** @ingroup modelParamsGroups
-    @brief Link alpha parameters across partitions
-    
-    Links alpha paremeters across partitions (GAMMA model of rate heterogeneity)
-
-    @param string
-      string describing the linkage pattern    
-
-    @param pr
-      List of partitions
-
-    @todo
-      test behavior/impact/mem-leaks of this when PSR model is used 
-      it shouldn't do any harm, but it would be better to check!
-*/
-int pllLinkAlphaParameters(char *string, partitionList *pr)
-{
-  //assumes that it has already been assigned once
-  freeLinkageList(pr->alphaList);
-  
-  pr->alphaList = initLinkageListString(string, pr); 
-
-  pr->dirty = PLL_TRUE;
-  
-  if(!pr->alphaList)
-    return PLL_FALSE;
-  else
-    return PLL_TRUE;
-}
-
-/** @ingroup modelParamsGroups
-    @brief Link base frequency parameters across partitions
-    
-    Links base frequency paremeters across partitions
-
-    @param string
-      string describing the linkage pattern    
-
-    @param pr
-      List of partitions
-
-    @todo
-      semantics of this function not clear yet: right now this only has an effect 
-      when we do a ML estimate of base frequencies 
-      when we use empirical or model-defined (protein data) base frequencies, one could 
-      maybe average over the per-partition frequencies, but the averages would need to be weighted 
-      accodring on the number of patterns per partition 
-*/
-int pllLinkFrequencies(char *string, partitionList *pr)
-{
-  //assumes that it has already been assigned once
-  freeLinkageList(pr->freqList);
-
-  pr->freqList = initLinkageListString(string, pr);
-
-  pr->dirty = PLL_TRUE;
-
-  if(!pr->freqList)
-    return PLL_FALSE;
-  else
-    return PLL_TRUE;
-}
-
-/** @ingroup modelParamsGroups
-    @brief Link Substitution matrices across partitions
-    
-    Links substitution matrices (Q matrices) across partitions
-
-    @param string
-      string describing the linkage pattern    
-
-    @param pr
-      List of partitions
-
-    @todo
-      re-think/re-design how this is done for protein
-      models
-*/
-int pllLinkRates(char *string, partitionList *pr)
-{
-  //assumes that it has already been assigned once
-  freeLinkageList(pr->rateList);
-  
-  pr->rateList = initLinkageListString(string, pr);
-  
-  pr->dirty = PLL_TRUE;  
-
-  if(!pr->dirty)
-    return PLL_FALSE;
-  else
-    return PLL_TRUE;
-}
-
-
-
-
-/** @ingroup modelParamsGroups
-    @brief Initialize partitions according to model parameters
-    
-    Initializes partitions according to model parameters.
-
-    @param tr              The PLL instance
-    @param partitions      List of partitions
-    @param alignmentData   The parsed alignment
-    @return                Returns \b PLL_TRUE in case of success, otherwise \b PLL_FALSE
-*/
-int pllInitModel (pllInstance * tr, partitionList * partitions) 
-{
-  double ** ef;
-  int
-    i,
-    *unlinked = (int *)rax_malloc(sizeof(int) * partitions->numberOfPartitions);
-  double old_fracchange = tr->fracchange;
-
-  ef = pllBaseFrequenciesInstance (tr, partitions);
-
-  if(!ef)
-    return PLL_FALSE;
-
-  
-#if ! (defined(__ppc) || defined(__powerpc__) || defined(PPC))
-#if (defined(__AVX) || defined(__SSE3))
-  _mm_setcsr( _mm_getcsr() | _MM_FLUSH_ZERO_ON);
-#endif
-#endif 
-
-#ifdef _USE_PTHREADS
-  tr->threadID = 0;
-#ifndef _PORTABLE_PTHREADS
-  /* not very portable thread to core pinning if PORTABLE_PTHREADS is not defined
-     by defualt the cod ebelow is deactivated */
-  pinToCore(0);
-#endif
-#endif
-
-#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
-  /* 
-     this main function is the master thread, so if we want to run RAxML with n threads,
-     we use pllStartPthreads to start the n-1 worker threads */
-  
-#ifdef _USE_PTHREADS
-  pllStartPthreads (tr, partitions);
-#endif
-
-  /* via pllMasterBarrier() we invoke parallel regions in which all Pthreads work on computing something, mostly likelihood 
-     computations. Have a look at execFunction() in axml.c where we siwtch of the different types of parallel regions.
-
-     Although not necessary, below we copy the info stored on tr->partitionData to corresponding copies in each thread.
-     While this is shared memory and we don't really need to copy stuff, it was implemented like this to allow for an easier 
-     transition to a distributed memory implementation (MPI).
-     */
-#ifdef _FINE_GRAIN_MPI
-  //MPI_Bcast (&(partitions->numberOfPartitions), 1, MPI_INT, MPI_ROOT, MPI_COMM_WORLD);
-  MPI_Bcast (&(partitions->numberOfPartitions), 1, MPI_INT, 0, MPI_COMM_WORLD);
-#endif
-  
-  /* mpi version now also uses the generic barrier */
-  pllMasterBarrier (tr, partitions, PLL_THREAD_INIT_PARTITION);
-#else  /* SEQUENTIAL */
-  /* 
-     allocate the required data structures for storing likelihood vectors etc 
-     */
-
-  //initializePartitions(tr, tr, partitions, partitions, 0, 0);
-  initializePartitionsSequential (tr, partitions);
-#endif
-  
-  //initializePartitions (tr, tr, partitions, partitions, 0, 0);
-  
-  initModel (tr, ef, partitions);
-
-  pllEmpiricalFrequenciesDestroy (&ef, partitions->numberOfPartitions);
-
-  for(i = 0; i < partitions->numberOfPartitions; i++)
-    unlinked[i] = i;
-
-  //by default everything is unlinked initially 
-  partitions->alphaList = initLinkageList(unlinked, partitions);
-  partitions->freqList  = initLinkageList(unlinked, partitions);
-  partitions->rateList  = initLinkageList(unlinked, partitions);
-
-  rax_free(unlinked);
-
-  updateAllBranchLengths (tr, old_fracchange ? old_fracchange : 1,  tr->fracchange);
-  pllEvaluateLikelihood (tr, partitions, tr->start, PLL_TRUE, PLL_FALSE);
-
-  return PLL_TRUE;
-}
- 
-/** @ingroup modelParamsGroups
-    @brief Optimize all free model parameters of the likelihood model
-    
-    Initializes partitions according to model parameters.
-
-    @param tr
-      The PLL instance
-
-    @param pr
-      List of partitions
-
-    @param likelihoodEpsilon
-      Specifies up to which epsilon in likelihood values the iterative routine will 
-      be optimizing the parameters  
-*/
-int pllOptimizeModelParameters(pllInstance *tr, partitionList *pr, double likelihoodEpsilon)
-{
-  //force the consistency check
-
-  pr->dirty = PLL_TRUE;
-
-  if(!checkLinkageConsistency(pr))
-    return PLL_FALSE;
-
-  modOpt(tr, pr, likelihoodEpsilon);
-
-  return PLL_TRUE;
-}
-
-/** @brief Read the contents of a file
-    
-    Reads the ile \a filename and return its content. In addition
-    the size of the file is stored in the input variable \a filesize.
-    The content of the variable \a filesize can be anything and will
-    be overwritten.
-
-    @param filename
-      Name of the input file
-
-    @param filesize
-      Input parameter where the size of the file (in bytes) will be stored
-
-    @return
-      Contents of the file
-*/
-char * 
-pllReadFile (const char * filename, long * filesize)
-{
-  FILE * fp;
-  char * rawdata;
-
-  // FIX BUG: opening with "r" does not work on Windows
-//  fp = fopen (filename, "r");
-  printf("[PLL] Reading file %s...\n", filename);
-  fp = fopen (filename, "rb");
-  printf("[PLL] Success!\n");
-  if (!fp) return (NULL);
-
-  /* obtain file size */
-  if (fseek (fp, 0, SEEK_END) == -1)
-   {
-     fclose (fp);
-     return (NULL);
-   }
-
-  *filesize = ftell (fp);
-
-  if (*filesize == -1) 
-   {
-     fclose (fp);
-     return (NULL);
-   }
-  rewind (fp);
-
-  /* allocate buffer and read file contents */
-  rawdata = (char *) rax_malloc (((*filesize) + 1) * sizeof (char));
-  if (rawdata) 
-   {
-     if (fread (rawdata, sizeof (char), *filesize, fp) != (size_t) *filesize) 
-      {
-        rax_free (rawdata);
-        rawdata = NULL;
-      }
-     else
-      {
-        rawdata[*filesize] = 0;
-      }
-   }
-
-  fclose (fp);
-
-  return (rawdata);
-}
-
-static void getInnerBranchEndPointsRecursive (nodeptr p, int tips, int * i, node **nodes)
-{
-  if (!isTip (p->next->back->number, tips))
-   {
-     nodes[(*i)++] = p->next;
-     getInnerBranchEndPointsRecursive(p->next->back, tips, i, nodes);
-   }
-  if (!isTip (p->next->next->back->number, tips))
-   {
-     nodes[(*i)++] = p->next->next;
-     getInnerBranchEndPointsRecursive(p->next->next->back, tips, i, nodes);
-   }
-}
-
-node ** pllGetInnerBranchEndPoints (pllInstance * tr)
-{
-  node ** nodes;
-  nodeptr p;
-  int i = 0;
-
-  nodes = (node **) rax_calloc(tr->mxtips - 3, sizeof(node *));
-
-  p = tr->start;
-  assert (isTip(p->number, tr->mxtips));
-
-  getInnerBranchEndPointsRecursive(p->back, tr->mxtips, &i, nodes);
-
-  return nodes;
-}
-
-#if defined WIN32 || defined _WIN32 || defined __WIN32__
-void* rax_calloc(size_t count, size_t size) {
-	void* res = rax_malloc(size * count);
-	memset(res,0,size * count);
-	return res;
-}
-#endif
-
diff --git a/stoprule.cpp b/stoprule.cpp
index 858ad19..f0afc7e 100644
--- a/stoprule.cpp
+++ b/stoprule.cpp
@@ -91,9 +91,9 @@ bool StopRule::meetStopCondition(int cur_iteration, double cur_correlation) {
 		else
 			return cur_iteration > predicted_iteration;
 	case SC_UNSUCCESS_ITERATION:
-		return cur_iteration > getLastImprovedIteration() + unsuccess_iteration;
+		return cur_iteration >= getLastImprovedIteration() + unsuccess_iteration;
 	case SC_BOOTSTRAP_CORRELATION:
-		return ((cur_correlation >= min_correlation) && (cur_iteration > getLastImprovedIteration() + unsuccess_iteration))
+		return ((cur_correlation >= min_correlation) && (cur_iteration >= getLastImprovedIteration() + unsuccess_iteration))
 				|| cur_iteration > max_iteration;
 	case SC_REAL_TIME:
 		return (getRealTime() - start_real_time >= max_run_time);
diff --git a/test_scripts/README b/test_scripts/README
new file mode 100644
index 0000000..f896292
--- /dev/null
+++ b/test_scripts/README
@@ -0,0 +1,19 @@
+1. Complile your local branch: 
+    ./compile.sh <your_branch>
+    EXAMPLE: ./compile.sh master
+You might also want to 'pull' code from the remote server to update your branch before performing the compilation. The binary of your branch will be stored in 'iqtree_binaries' directory. A binary of the most recent IQ-TREE release will also be compiled and stored in the folder.
+
+2. If you want to run the standard tests, use the gen_test_standrd.py script (running the script without any option output the help menu) as follows: 
+    ./gen_test_standard.py -b <path_to_your_iqtree_binary> -c <config_file>
+    EXAMPLE: ./gen_test_standard.py -b iqtree_binaries/iqtree_master -c test_configs.txt
+A text file named '<your_binary_name>_test_standard_cmds.txt' containing all the test commnds will be generated. Copy all the content of the test_script folder to libby. Submit the job with the following commands: 
+    ./submit_jobs.sh <number_of_threads> <cmd_file> <aln_dir> <out_dir> <binary_dir>
+    EXAMPLE: ./submit_jobs.sh 16 iqtree_master_test_standard_cmds.txt test_alignments iqtree_master_test_standard iqtree_binaries 
+The LOG FILE containing the status of all jobs are writen in <out_dir>/<cmd_file>[0-9]*.log. Look into the file to see whether all jobs have run successfully. Grep for "ERROR" to see which job contains BUG.  
+
+3. If you want to test all the commands by users of the web server that caused bugs: (./gen_test_standard.py -h for help)
+    ./gen_test_standard.py -b <path_to_iqtree_binary>
+    EXAMPLE: ./gen_test_standard.py -b iqtree_binaries/iqtree_master
+The above command creates a folder called 'webserver_alignments' that contains all the user alignments. The next steps are the same as described in 2. 
+    EXAMPLE: ./submit_jobs.sh 40 iqtree_master_test_webserver_cmds.txt webserver_alignments iqtree_master_test_webserver iqtree_binaries
+
diff --git a/test_scripts/compile.sh b/test_scripts/compile.sh
new file mode 100755
index 0000000..b532b0a
--- /dev/null
+++ b/test_scripts/compile.sh
@@ -0,0 +1,120 @@
+#!/bin/bash - 
+#===============================================================================
+#
+#          FILE: compile_binary.sh
+# 
+#         USAGE: ./compile_binary.sh 
+# 
+#   DESCRIPTION: This script checkouts the last release version of IQ-TREE and the HEAD of
+#                the current branch. Then it complile both version
+# 
+#       OPTIONS: ---
+#  REQUIREMENTS: ---
+#          BUGS: ---
+#         NOTES: ---
+#        AUTHOR: Tung Nguyen (nltung at gmail.com) 
+#  ORGANIZATION: 
+#       CREATED: 2015-01-26 13:02:57 CET
+#      REVISION:  ---
+#===============================================================================
+
+set -o nounset                              # Treat unset variables as an error
+
+require_clean_work_tree () {
+    # Update the index
+    git update-index -q --ignore-submodules --refresh
+    err=0
+
+    # Disallow unstaged changes in the working tree
+    if ! git diff-files --quiet --ignore-submodules --
+    then
+        echo >&2 "cannot $0: you have unstaged changes."
+        git diff-files --name-status -r --ignore-submodules -- >&2
+        err=1
+    fi
+
+    # Disallow uncommitted changes in the index
+    if ! git diff-index --cached --quiet HEAD --ignore-submodules --
+    then
+        echo >&2 "cannot $0: your index contains uncommitted changes."
+        git diff-index --cached --name-status -r --ignore-submodules HEAD -- >&2
+        err=1
+    fi
+
+    if [ $err = 1 ]
+    then
+        echo >&2 "Please commit or stash them."
+        exit 1
+    fi
+}
+
+#Check whether the git work tree is clean
+#require_clean_work_tree
+
+if [ "$#" != 1 ]
+then
+  echo "Please enter the name of the local branch you want to compile"
+  echo "USAGE: $0 <branch_name>" >&2
+  exit 1
+fi
+
+
+#Determine hash code of current branch
+#branch=`git status | grep "On branch" | awk '{print $3}'`
+branch=$1
+#Take the first 6 characters of the current head commit
+commit_cur=`git log | head -n1 | awk '{print $2}' | cut -c 1-6`
+
+#Dictionary and binary names
+cur_build="build_${branch}"
+release_build="build_release"
+release_binary_prefix="iqtree_release"
+#cur_binary="iqtree_${commit_cur}"
+cur_binary="iqtree_${branch}"
+bin_dir="iqtree_binaries"
+
+#Clean up
+if [ -e $cur_build ]
+then
+  rm -rf $cur_build
+fi
+if [ -e $release_build ]
+then
+  rm -rf $release_build 
+fi
+if [ -e $bin_dir ]
+then
+  rm -rf $bin_dir
+fi
+mkdir $bin_dir
+mkdir $cur_build 
+#Fetch changes from server
+git fetch
+curBranch=`git status | grep 'On branch' | awk '{print $3}'`
+if [ ${curBranch} != ${branch} ]
+then
+  git stash
+  git checkout $branch
+  git pull
+  git submodule update
+fi
+cmake -B${cur_build} -H..
+make -C ${cur_build} -j4
+cp ${cur_build}/iqtree ${bin_dir}/${cur_binary} 
+#rm -rf ${cur_build}
+mkdir $release_build
+#Find the hash code of the most recent release in master
+commit=`git log origin/master | grep -m 1 -B 4 "release version" | grep "commit" | awk '{print $2}'`
+version=`git log origin/master | grep -m 1 "release version [0-9]*" | awk '{print $3}'`
+git checkout ${commit}
+git submodule update
+cmake -B${release_build} -H..
+make -C ${release_build} -j4
+cp ${release_build}/iqtree ${bin_dir}/${release_binary_prefix}_${version}
+git checkout ${curBranch}
+git stash apply
+git submodule update
+
+#Clean up
+rm -rf $cur_build
+rm -rf $release_build 
diff --git a/test_scripts/gen_test_standard.py b/test_scripts/gen_test_standard.py
new file mode 100755
index 0000000..dfe7f1e
--- /dev/null
+++ b/test_scripts/gen_test_standard.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+'''
+Created on Jan. 26, 2015
+
+ at author: tung
+'''
+import sys, os, time, multiprocessing, optparse 
+import subprocess, logging, datetime
+
+def parse_config(config_file):
+  singleAln, partitionAln, partOpts, genericOpts = [], [], [], []
+  with open(config_file) as f:
+    #lines = f.readlines()
+    lines = [line.strip() for line in f if line.strip()]
+  readSingleAln = False
+  readPartAln = False
+  partOpt = False
+  genericOpt = False
+  for line in lines:
+    #print line
+    if line == 'START_SINGLE_ALN':
+      readSingleAln = True
+      continue
+    if line == 'END_SINGLE_ALN':
+      readSingleAln = False 
+      continue
+    if readSingleAln:
+      singleAln.append(line) 
+    if line == 'START_PARTITION_ALN':
+      readPartAln = True
+      continue
+    if line == 'END_PARTITION_ALN':
+      readPartAln = False 
+      continue
+    if readPartAln:
+      partitionAln.append(line.split())
+    if line == 'START_PARTITION_OPTIONS':
+      partOpt = True
+      continue
+    if line == 'END_PARTITION_OPTIONS':
+      partOpt = False
+      continue
+    if line == 'START_GENERIC_OPTIONS':
+      genericOpt = True
+      continue
+    if line == 'END_GENERIC_OPTIONS':
+      genericOpt = False
+      continue
+    if partOpt:
+      partOpts.append(line)
+    if genericOpt:
+      genericOpts.append(line)
+  return (singleAln, partitionAln, genericOpts, partOpts)
+      
+
+if __name__ == '__main__':
+  usage = "USAGE: %prog [options]"
+  parser = optparse.OptionParser(usage=usage)
+  parser.add_option('-b','--binary', dest="iqtree_bin", help='Path to your IQ-TREE binary')
+  parser.add_option('-c','--config', dest="config_file", help='Path to test configuration file')
+  (options, args) = parser.parse_args()
+  if not options.iqtree_bin or not options.config_file:
+    parser.print_help()
+    exit(0)
+  (singleAln, partitionAln, genericOpts, partOpts) = parse_config(options.config_file)
+  testCmds = []
+  # Generate test commands for single model
+  for aln in singleAln:
+    for opt in genericOpts:
+      cmd = '-s ' + aln + ' ' + opt
+      testCmds.append(cmd)
+  # Generate test commands for partition model
+  for aln in partitionAln:
+    for opt in genericOpts:
+      for partOpt in partOpts:
+        cmd = '-s ' + aln[0] + ' ' + opt + ' ' + partOpt + ' ' + aln[1]
+        testCmds.append(cmd)
+  testNr = 1
+  jobs = []
+  for cmd in testCmds:
+    testIDRel = os.path.basename(options.iqtree_bin) + "_TEST_" + str(testNr)
+    testCMD = testIDRel + " " + options.iqtree_bin + " -pre " + testIDRel + " " + cmd
+    testNr = testNr + 1 
+    jobs.append(testCMD)
+#  print "\n".join(jobs)
+  outfile = open(os.path.basename(options.iqtree_bin) + '_test_standard_cmds.txt', "wb")
+  for job in jobs:
+    print >> outfile, job
+  outfile.close()
+
+
+
diff --git a/test_scripts/gen_test_webserver.py b/test_scripts/gen_test_webserver.py
new file mode 100755
index 0000000..de1e84d
--- /dev/null
+++ b/test_scripts/gen_test_webserver.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+'''
+Created on Feb. 01, 2015
+This script collects all commands submited by users of the IQ-TREE web service 
+that were crashed
+Default location: /project/web-iqtree/user-data 
+
+ at author: Tung Nguyen
+ at email: nltung at gmai.com
+'''
+import sys, os, time, multiprocessing, optparse, fnmatch 
+import subprocess, logging, datetime
+import cmd
+import shutil
+from operator import itemgetter
+from cmd import Cmd
+
+def collect_logs(log_dir):
+    bugLogs = []
+    numLog = 0
+    for root, dirnames, filenames in os.walk(log_dir):
+        for filename in fnmatch.filter(filenames, '*.log'):
+            numLog = numLog + 1
+            logFile = os.path.join(root,filename)
+            if 'CRASH' in open(logFile).read():
+                bugLogs.append(logFile)
+    return (bugLogs, numLog)
+
+def collect_cmds(logFiles, out_dir):
+    if not os.path.exists(out_dir):
+        #shutil.rmtree(options.out_dir)
+        os.makedirs(out_dir)
+    runs = []
+    for log in logFiles:
+        id = log.split('/')[-2]
+        email = log.split('/')[-3]
+        with open(log) as f:            
+            for line in f:
+                if line.startswith('Command:'):
+                    cmd = " ".join(line.split()[2:])
+                    aln = line.split()[3]
+                    run_dir = os.path.abspath(os.path.join(log, os.pardir))
+                    shutil.copy2(os.path.join(run_dir, aln), out_dir)
+                    if line.find('-sp') != -1:
+                        partitionFile = line.split()[5]
+                        shutil.copy2(os.path.join(run_dir, partitionFile), out_dir)
+                if line.startswith('Seed:'):
+                    seed = line.split()[1]
+                    runs.append((int(id), email,seed, aln, cmd))
+                    #print run_dir
+                    break
+    return runs
+
+def create_test_cmds(runs, iqtree_binary, filename):
+    outfile = open(filename, "wb")
+    for run in runs:
+        run_id = run[1] + "_" + str((run[0]))
+        seed = run[2]
+        args = run[4]
+        if not " -b " in args:
+            cmd = run_id + ' ' + iqtree_binary + ' ' + args + ' -seed ' + seed + ' -pre ' + run_id
+            print >> outfile, cmd
+    outfile.close()
+                                        
+if __name__ == '__main__':
+    usage = "USAGE: %prog [options]"
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option('-d', '--indir', dest="in_dir", 
+                      help='Path to user directory of the IQ-TREE web server [default: %default]', default="/project/web-iqtree/user-data/")
+    parser.add_option('-o', '--outdir', dest="out_dir", 
+                      help='Directory containing alignments [default: %default]', default="webserver_alignments")
+    parser.add_option('-b', '--iqtree_bin', dest='iqtree_bin', help='Path to IQ-Tree binary')
+    (options, args) = parser.parse_args()
+    if not options.iqtree_bin:
+        print "Please specify the path to your IQ-TREE binary"
+        parser.print_help()
+        exit(0)
+    print "Collecting buggy runs from " + options.in_dir
+    (bugLogs, numLog) = collect_logs(options.in_dir)
+    print ("Found %d job submissions, %d of them caused bugs" % (numLog, len(bugLogs)))
+    runs = collect_cmds(bugLogs, options.out_dir)
+    runs.sort(key=lambda tup: tup[0])
+    create_test_cmds(runs, options.iqtree_bin, os.path.basename(options.iqtree_bin) + '_test_webserver_cmds.txt')
+
+    
diff --git a/test_scripts/generate_test_cmds.py b/test_scripts/generate_test_cmds.py
new file mode 100755
index 0000000..637eca9
--- /dev/null
+++ b/test_scripts/generate_test_cmds.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+'''
+Created on Jan. 26, 2015
+
+ at author: tung
+'''
+import sys, os, time, multiprocessing, optparse 
+import subprocess, logging, datetime
+
+def parse_config(config_file):
+  singleAln, partitionAln, partOpts, genericOpts = [], [], [], []
+  with open(config_file) as f:
+    #lines = f.readlines()
+    lines = [line.strip() for line in f if line.strip()]
+  readSingleAln = False
+  readPartAln = False
+  partOpt = False
+  genericOpt = False
+  for line in lines:
+    #print line
+    if line == 'START_SINGLE_ALN':
+      readSingleAln = True
+      continue
+    if line == 'END_SINGLE_ALN':
+      readSingleAln = False 
+      continue
+    if readSingleAln:
+      singleAln.append(line) 
+    if line == 'START_PARTITION_ALN':
+      readPartAln = True
+      continue
+    if line == 'END_PARTITION_ALN':
+      readPartAln = False 
+      continue
+    if readPartAln:
+      partitionAln.append(line.split())
+    if line == 'START_PARTITION_OPTIONS':
+      partOpt = True
+      continue
+    if line == 'END_PARTITION_OPTIONS':
+      partOpt = False
+      continue
+    if line == 'START_GENERIC_OPTIONS':
+      genericOpt = True
+      continue
+    if line == 'END_GENERIC_OPTIONS':
+      genericOpt = False
+      continue
+    if partOpt:
+      partOpts.append(line)
+    if genericOpt:
+      genericOpts.append(line)
+  return (singleAln, partitionAln, genericOpts, partOpts)
+      
+
+if __name__ == '__main__':
+  usage = "USAGE: %prog [options]"
+  parser = optparse.OptionParser(usage=usage)
+  parser.add_option('-r','--release', dest="release_bin", help='Path to release binary', default="iqtree_release")
+  parser.add_option('-t','--test', dest="test_bin", help='Path to test binary', default="iqtree_test")
+  parser.add_option('-c','--config', dest="config_file", help='Path to test configuration file')
+  parser.add_option('-o','--out_file', dest="out_file", help='Name of the output file', default="iqtree_test_cmds.txt")
+  (options, args) = parser.parse_args()
+  if len(sys.argv) == 1:
+    parser.print_help()
+    exit(0)
+  (singleAln, partitionAln, genericOpts, partOpts) = parse_config(options.config_file)
+  testCmds = []
+  # Generate test commands for single model
+  for aln in singleAln:
+    for opt in genericOpts:
+      cmd = '-s ' + aln + ' ' + opt
+      testCmds.append(cmd)
+  # Generate test commands for partition model
+  for aln in partitionAln:
+    for opt in genericOpts:
+      for partOpt in partOpts:
+        cmd = '-s ' + aln[0] + ' ' + opt + ' ' + partOpt + ' ' + aln[1]
+        testCmds.append(cmd)
+  testNr = 1
+  jobs = []
+  for cmd in testCmds:
+    testIDRel = options.release_bin + "_TEST_" + str(testNr)
+    release = testIDRel + " " + options.release_bin + " -pre " + testIDRel + " " + cmd
+    testIDTest = options.test_bin + "_TEST_" + str(testNr)
+    test = testIDTest + " " + options.test_bin + " -pre " + testIDTest + " " + cmd
+    testNr = testNr + 1 
+    jobs.append(release)
+    jobs.append(test)
+#  print "\n".join(jobs)
+  outfile = open(options.out_file, "wb")
+  for job in jobs:
+    print >> outfile, job
+  outfile.close()
+
+
+
diff --git a/test_scripts/jobmanager.py b/test_scripts/jobmanager.py
new file mode 100755
index 0000000..5720e96
--- /dev/null
+++ b/test_scripts/jobmanager.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+'''
+Created on Aug 23, 2014
+
+ at author: tung
+'''
+import sys, os, time, multiprocessing, optparse 
+import subprocess, logging, datetime
+
+def cpu_count():
+    ''' Returns the number of CPUs in the system
+    '''
+    num = 1
+    if sys.platform == 'win32':
+        try:
+            num = int(os.environ['NUMBER_OF_PROCESSORS'])
+        except (ValueError, KeyError):
+            pass
+    elif sys.platform == 'darwin':
+        try:
+            num = int(os.popen('sysctl -n hw.ncpu').read())
+        except ValueError:
+            pass
+    else:
+        try:
+            num = os.sysconf('SC_NPROCESSORS_ONLN')
+        except (ValueError, OSError, AttributeError):
+            pass
+
+    return num
+
+def exec_commands(cmds, name, num_cpus):
+    ''' Exec commands in parallel in multiple process 
+    (as much as we have CPU)
+    '''
+    if not cmds: return  # empty list
+
+    def done(p):
+        return p.poll() is not None
+    def success(p):
+        return p.returncode == 0
+    def fail():
+        sys.exit(1)
+        
+    # max_task = cpu_count()
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.DEBUG)
+    my_time = datetime.datetime.now()
+    handler = logging.FileHandler(name + "." + str(my_time.year) + str(my_time.month) + str(my_time.day) + 
+                                  str(my_time.hour) + str(my_time.minute) + str(my_time.second) + ".log")
+    handler.setLevel(logging.DEBUG)
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    max_task = multiprocessing.cpu_count()
+    logger.info("Available CPUs = " + str(max_task) + " / using " + str(num_cpus) + " CPUs")
+    logger.info("Number of jobs = " + str(len(cmds)))
+    processes = []
+    while True:
+        while cmds and len(processes) < num_cpus:
+            task = cmds.pop(0)
+            #print subprocess.list2cmdline(task)
+            task_id, cmd = task.split(" ", 1)
+            logger.info("Executing job " + task_id + ": " + cmd.strip())
+            #print cmd
+            task_output = open(task_id + ".out", "w")
+            time_cmd = "time " + cmd
+            processes.append([subprocess.Popen(time_cmd, stderr=subprocess.STDOUT, stdout=task_output, shell=True), task_id])
+
+        for p in processes:
+            if done(p[0]):
+                if success(p[0]):
+                    #print "Process with ID = ", p.pid, " has finished"
+                    #print "number of processes before removal: ", len(processes)
+                    logger.info("Job " + p[1] + " has finished")
+                    processes.remove(p)
+                    #print "number of processes after removal: ", len(processes)
+                else:
+                    logger.info("Job " + p[1] + " finished with ERROR CODE " + str(p[0].returncode))
+                    processes.remove(p)
+
+        if not processes and not cmds:
+            break
+        else:
+            time.sleep(5)
+        
+if __name__ == '__main__':
+    max_cores = multiprocessing.cpu_count()
+    usage = "USAGE: %prog [options]"
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option('-f','--cmd', dest="cmd", help='File containing all commands')
+    parser.add_option('-c','--cpu', dest="cpu", help='Number of CPU to use', default=max_cores)
+    (options, args) = parser.parse_args()
+    if len(sys.argv) == 1:
+        parser.print_help()
+        exit(0)
+    jobs = open(options.cmd, "r").readlines()
+    exec_commands(jobs, options.cmd, int(options.cpu))
+    
+    
+
+
diff --git a/test_scripts/submit_jobs.sh b/test_scripts/submit_jobs.sh
new file mode 100755
index 0000000..7b6f60c
--- /dev/null
+++ b/test_scripts/submit_jobs.sh
@@ -0,0 +1,45 @@
+#!/bin/bash - 
+#===============================================================================
+#
+#          FILE: submit_jobs.sh
+# 
+#         USAGE: ./submit_jobs.sh 
+# 
+#   DESCRIPTION: 
+# 
+#       OPTIONS: ---
+#  REQUIREMENTS: ---
+#          BUGS: ---
+#         NOTES: ---
+#        AUTHOR: Tung Nguyen 
+#  ORGANIZATION: 
+#       CREATED: 02/06/2015 02:21:40 PM CET
+#      REVISION:  ---
+#===============================================================================
+
+set -o nounset                              # Treat unset variables as an error
+
+if [ $# -ne 5 ]
+then
+  echo "USAGE: $0 <number_of_threads> <cmd_file> <aln_dir> <out_dir> <binary_dir>"
+  exit 1
+fi
+numThreads=$1
+cmd_file=$2
+aln_dir=$3
+out_dir=$4
+binary_dir=$5
+
+if [ -d $out_dir ]
+then
+  rm -rf $out_dir
+fi
+mkdir $out_dir
+cp ${aln_dir}/* $out_dir
+cp $cmd_file $out_dir
+cp ${binary_dir}/* ${out_dir}/
+cd $out_dir
+submitCMD="submit2sge -N iqtree_system_test -q cluster -r zuseX -s $numThreads \"../jobmanager.py -f $cmd_file -c $numThreads\""
+#echo "../jobmanager.py -f $cmd_file -c $numThreads" | qsub -V -S /bin/bash -cwd -j y -r y -N iqtree_system_test -l zuseX -l cluster -pe threads 16 -q q.norm at zuse02  
+$submitCMD
+cd ..
diff --git a/test_scripts/submitjob.sh b/test_scripts/submitjob.sh
new file mode 100755
index 0000000..660a4eb
--- /dev/null
+++ b/test_scripts/submitjob.sh
@@ -0,0 +1,2 @@
+cd test_data
+echo "../jobmanager.py -f ../iqtree_test_cmds.txt -c 16" | qsub -V -S /bin/bash -cwd -j y -r y -N iqtree_system_test -l zuseX -l cluster -pe threads 16 -q q.norm at zuse02  
diff --git a/test_scripts/test_configs.txt b/test_scripts/test_configs.txt
new file mode 100644
index 0000000..fbaa35e
--- /dev/null
+++ b/test_scripts/test_configs.txt
@@ -0,0 +1,27 @@
+START_PARTITION_ALN
+example.phy example.nex
+d59_8.phy d59_8.nex
+END_PARTITION_ALN
+
+START_SINGLE_ALN
+example.phy
+prot_M126_27_269.phy
+END_SINGLE_ALN
+
+START_PARTITION_OPTIONS
+-sp
+-spp
+-spj
+END_PARTITION_OPTIONS
+
+START_GENERIC_OPTIONS
+-m TEST -n 1000 
+-m TEST -bb 1000 -n 1000
+-m TEST -alrt 1000 -n 1000
+-m TEST -lbp 1000 -n 1000
+-m TEST -bb 1000 -alrt 1000 -lbp 1000 -n 1000
+-m TEST -b 10 -n 1000
+END_GENERIC_OPTIONS
+
+
+
diff --git a/test_scripts/test_data/d59_8.nex b/test_scripts/test_data/d59_8.nex
new file mode 100644
index 0000000..80342c0
--- /dev/null
+++ b/test_scripts/test_data/d59_8.nex
@@ -0,0 +1,13 @@
+#nexus
+begin sets;
+charset ndhf1st = 1-2183;
+charset rbcl1st = 2184-3527;
+charset rpoc23rd = 3528-4207;
+charset cprs = 4208-4571;
+charset phyb3rd = 4572-5753;
+charset set5_8S = 5754-5913;
+charset its2 = 5914-6177;
+charset gbss13rd = 6178-6951;
+
+
+end;
diff --git a/test_scripts/test_data/d59_8.phy b/test_scripts/test_data/d59_8.phy
new file mode 100644
index 0000000..1056468
--- /dev/null
+++ b/test_scripts/test_data/d59_8.phy
@@ -0,0 +1,60 @@
+59 6951
+Flagellari ----------------------CATGGATAATACCCTTTCTTCCACTTCCAGTTACTACGTCAATAGGATTTGGACTTCTACTTATTCCTACAGCAACAAAAAACCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCGATCAATCTGTCTATTCAACAAATAAATGGAAGTTTTATCTATCAATATCTATGGTCTTGGACCATCAATAACGATTTCTCCTTAGAGTTCGGATACTTGATCGATCCGCTTACTTCTATTATGTCAATACTAATTACTACTGTTGGAATCATGGTTCTTATTTATAGTGACAATTACATGTCCCACGATCAAGGATATTTGAGATTTTTTGTTTATATGAGTTTTTTCAATACTTCTATGTTGGGATTAGTTACTAGCTCCAATTTGATACAAATTTATTTTTTTTGGGAACTAGTGGGAATGTGTTCCTATTTAT [...]
+Elegia ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [...]
+Baloskion -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [...]
+Joinvillea -----------------------ATGGATAATACCTTTTCTTCCACTTCCAGTTACTATCTCAATAGGATTTGGACTTCTCCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCAGCTAACCTGTCTATTCAACAAATAAATGGAAATTTTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAGTTTGGATACTTGATCGATCCGCTTACGTCTATTATGTCAATACTAATTACTACTGTAGGAACCATGGTTCTTATTTATAGTGACGATTACATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCTATGTTGGGATTAGTTACCAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTGGTGGGAATGTGTTCCTATTTAT [...]
+Anomochloa ----------------------CATGGGTAATACCCTTTCTTCCACTTCCGGTTATTATGTCAATAGGGTTTGGACTTCTTCTTATTCCGACAGCAACAAAAAGTCTTCGTCGTATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTCTAGCTATGGTATTCTCAGTTCACCTGTCTATTCAACAAATAAATGGCAGTTTTATCTATCAATATCTATGGTCTTGGACTATCAATAATGATTTTTCCCTAGAGTTTGGATACTTGATCGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATTTTGGTTCTTATCTATAGTGACGATTATATGTCTCATGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTAGGAATGTGTTCTTATTTAT [...]
+Streptocha ----------------------CATGGGTAATACCTTTTCTTCCACTTCCGGTTATTATGTCAATAGGGGTTGGACTTCTTCTTATTCCGACAGCAACAAAAAATCTTCGTCGTATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCAGTTCACCTGTCTATTCAACAAATAAATGGAAGTTTTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCCTAGAGTTTGGATACTTGATCGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATTATGGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCTTATTTAT [...]
+Pharus -----------------------ATGGGTAATACCTTTTCTCCCACTTCCTGTTATTATGTCAATAGGGTTTGGACTTTTTCTTATTCCGACAGCAACAAAAAATCTTCGCCGTATATGGGCTTTTCCTAGTGTTTTACTTTTAAATATAGCTATGGTATTCTCAGTTTACCTGTCTATTCAACAAATAAATGGAAGTTTTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCCTAGAGTTTGGATACTTGGTCGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTATAGGAATCCTGGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCTATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTATTAAT [...]
+Eremitis -----------------------ATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGGTTTGGACTTATTCTTATTCCGACAGCGACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTATTCTTAAGTATAGCTATGGTATTCTCTGTTTACCTATCTATTCAACAAATAAACGGAAGTTCTATCTATCAATATTTATGGTCTTGGACCGTCAATAATGATTTTTCCTTAGAATTTGGATATTTGATCGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTTGTTCTTATTTATAGTGACGATTATATGTCTTACGATGAGGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTTACCAGTTCCAATTTTATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGCTCCTATTTATTG [...]
+Pariana ----------------------CATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGGTTTGGACTTATTCTTATTCCGACAGCGACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTATTCTTAAGTATAGCTATGGTATTCTCTGTTTACCTATCTATTCAACAAATAAACGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCGTCAATAATGATTTTTCCTTAGAATTTGGATATTTGATCGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTTGTTCTTATTTATAGTGACGATTATATGTCTTACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTTATCAGTTCTAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGCTCCTATTTATTGA [...]
+Aristida ----------------------CATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGGTTTGGACTTTTTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCAGTTCACCTGTCTATTCAACAAATAAATGGAAGTTCGATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTGATTGACCCCCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTGGTTCTTATTTATAGTGATGATTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTAGGATTGGTTACTAGTTCAAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTATTG [...]
+Stipagrost ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [...]
+Amphipogon ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [...]
+Arundo -----------------------ATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGGCTTGGGCTTTTTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCAGTTCACCTGTCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAGTTTGGATACTTGATCGACCCCCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCTTGGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTTAATACTTCTATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCTTATTTATTGAT [...]
+Molinia ----------------------CATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGGTTTGGACTTTTTCTTATTCCGACAGCAACCAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCAGTTCACCTGTCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTGATCGACCCCCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATTCTGGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTTAATACTTCTATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTATTGA [...]
+Phragmites ----------------AATATGCGTGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGGTTTGGACTTTTTCTTATTCCGACAGCAACCAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCAGTTCACCTGTCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTGATCGACCCCCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCTTGGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTTAATACTTCTATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTAT [...]
+Danthonia -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [...]
+Thysanolae -----------------------ATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATAGGATTTGGCCTTTTTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATACTCAGTTCACCTGTCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTGATCGACCCTCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCTTGGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTTAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTCGGAATGTGTTCCTATTTAT [...]
+Micraira -----------------ATATGCCTGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGGTTTGGACTTTTTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTATTCTTAAGTATAGCTCTGGTATTCTCAGTTCACCTGTCTATTCAACAAATAAAAGGGAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTGATCGATCCCCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCTTGGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAGGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTTAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTATTG [...]
+Gynerium --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [...]
+Eragrostis -----------------------ATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGGCTTGGTCTTTTTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTATTTTACTCTTAAGTATAGCTCTGGTATTCTCAATTCACCTGTCTATTCAACAAATAAAAGGGAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTGATCGATCCCCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCTTGGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAGGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTTAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTAT [...]
+Zoysia ----------------------CATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGGTTTGGACTTTTTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTATTCTTAAGTGTAGCTCTGGTATTCTCAGTTCACCTGTCTATTCAACAAATAAAAGGGAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTGATCGACCCCCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCTTGGTTCTTATTTATAGCGACGATTATATGTCTCACGATGAGGGATATTTGAGATTTTTCGTTTATATAAGTTTTTTTAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTATTGAT [...]
+Pappophoru ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [...]
+Spartina ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATAATGATTTTTCCTTAGAATTTGGA-ACTTGATCGACCCCCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCTTGGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAGGGATATTTGAGATTTTTCGTTTATATAAGTTTTTTTAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGGATGTGTTCCTATTTATTG [...]
+Sporobolus ----------------------CATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGGTTTGGACTTTTTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTATTCTTAAGTATAGCTCTGGTATTCTCAGTTCACCTGTCTATTCAACAAATAAAAGGGAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTGATCGATCCCCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCTTGGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAGGGATATTTGAGATTTTTCGTTTATATAAGTTTTTTTAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTAT [...]
+Chasmanthi -----------------------ATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGATTTGGACTTTTTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCAGTCCACCTGTCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTGATCGACCCCCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCTTGGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTTAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTCGGAATGTGTTCCTATTTAT [...]
+Zeugites ----------------------CATGGGTAATACCCCTTCTCCCACTTCCAGTTATTATGTCAATGGGATTTGGACTTTTTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCAGTTCACCTGTCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTGATCGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCTTGGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTTAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTCGGAATGTGTTCCTATTTATTG [...]
+Danthoniop -----------ATATCAATATGCCTGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATCGGATTTGGACTTTTTCTTATTCCAACAGCAACAAAAAATCTTCGTCGTATATGGGCTTTGCCTAGTGTTTTACTGTTAAGTATAGCTATGGTATTCTCAGTTCACCTGTCTATTCAACAAATAAATGGAAGTTCCATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGGTACTTGATCGACCCCCTTACTTCTATTATGTTAATACTAATTACTACCGTAGGAATCTTAGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAAGGATATTTAAGATTTTTTGTTTATATAAGTTTTTTTAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTAGGAATGTGTTCCTATTTAT [...]
+Panicum -----------------------ATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGATTTGGACTTTTTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTATTTTACTCTTAAGTATAGCTCTGGTATTCTCCGTTCACCTGTCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTGATCGACCCCCTTACGTCTATTATGTTAATACTAATTACTACTGTAGGAATCTTGGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTTAATACTGCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTCGGAATGTGTTCCTATTTATTGA [...]
+Pennisetum ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [...]
+Miscanthus -----------------------CTGGGTAATTCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGATTTGGACTTTTTCTTATTCCCACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTATTTTACTCTTAAGTATAGCTATGGTATTCTCACTTCACCTGTCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTGGTCGACCCCCTTACGTCTATTATGTTAATACTAATTACTACTGTAGGAATCTTAGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTTAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTCGGAATGTGTTCCTATTTAT [...]
+Zea ------------------------TGGGTAATTCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGATTTGGACTTTTTCTTATTCCTACAGCAACAAAAAATCTTCGTCGTATATGGGCTTTTCCTAGTATTTTACTCTTAAGTATAGCTATGGTATTCTCACTTCACCTGTCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTGGTCGACCCCCTTACGTCTATTATGTTAATACTAATTACTACTGTAGGAATCTTAGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTTAATACTTCCATGTTAGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTCGGAATGTGTTCCTATTTATTGATAGG [...]
+Eriachne --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [...]
+Phaenosper -------------------------GGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGGTTTGGCCTTATTCTTATTCCGACAGCAACAAAAAATATTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCAATTCAACTGTCTATTCAACAAATAAAAGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCGTCAATAATGATTTTTCCTTAGAATTTGGATACTTGATTGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTGGTTCTTATTTATAGTGACGGTTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTCACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTAT [...]
+Streptogyn ----------------------CATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGGTTTGGACTTATTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTATTTTTAAGTATAGCTATGGTATTCTCAGTTCACCTGTCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCGTCAATAATGATTTTTCCTTAGAATTTGGATACTTGATCGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTGGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTAT [...]
+Ehrharta -----------------------ATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGCCAATGGGGTTTGGACTTATTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTTTTAAGTCTAGCTCTGGTTTTCTCAGTTCACCTGTCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCGTCAATAACGATTTTTCCTTAGAATTTGGATACTTAATTGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATTCTGGTTCTTATTTATAGTGACGATTATATGTCTCATGATGAAGGATATTTGAGATTTTTCGTTTATATAAGTTTCTTCAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAGCTTGTGGGAATGTGTTCCTATTTATTG [...]
+Oryza ATGGAACATACATATCAATATGCATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGGTTTGGACTTTTTCTTGTTCCAACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTTTTAAGTATAGCTATGGTATTCTCAGTTCACCTGTCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCGTCAATAATGATTTTTCCTTAGAATTTGGATACTTAATCGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTGGTTCTTATTTATAGTGATGATTATATGTCTCACGATGAGGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAGCTTGTGGGAATGTGTTCCTATTTATTGATA [...]
+Leersia ----------------AATATGCATGGGTAATCCCTTTTCTCCCACTTCCAGTTATTATGTCAATGGGGCTTGGACTTTTTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTTTTAAGTATAGCTATGGTATTCTCAGTTCACCTGTCTATTCAACAAATAACCGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCGTCAACAATGATTTTTCCTTAGAATTTGGATACTTAATCGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTGGTTCTTATTTATAGTGACGGTTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAGCTTGTGGGAATGTGTTCCTATTTATTGA [...]
+Pseudosasa --------------------TGCATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGGTTTGGACTTATTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTTGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCAGTTCACCTATCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCGTCAATAATGATTTTTCCTTAGAATTTGGATACTTGATCGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTCGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTAT [...]
+Chusquea -----------------------TTGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATCAGGTTTGGACTTCTTCTTATTCCGACAGCAACAAAAAATCTTCGTTGCATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCAGTTCACTTATCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCGTCAATAATGATTTTTCCTTAGAATTTGGATACTTGATCGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTCGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTATTG [...]
+Guaduella -----------------------ATGGGTAATCC-TCTTTTCCCACTTCCAGTTATTATGTCAATGGGGTTTGGACTTATTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCAGTTCACCTGTCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTGATCGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTGGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTTACTAGCTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTAGGAATGTGTTCCTATTTATT [...]
+Puelia ----------------------CCTGGGTAATTCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGATTTGGACTTTTTCTTATTCCCACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTATTTTACTCTTAAGTATAGCTATGGTATTCTCACTTCACCTGTCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTGGTCGACCCCCTTACGTCTATTATGTTAATACTAATTACTACTGTAGGAATCTTAGTTCTTATTTATAGTGACGATTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTTAATACTTCCATGTTGGGATTGGTTACTAGCTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGT-GGAATGTGTTCCTATTTATTGAT [...]
+Lithachne -----------------------ATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGGTTTGGACTTATTCTTATTCCGACAGCGATCAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTATTCTTAAGTATAGCTATGGTATTCTCTGTTTACCTATCTATTCAACAAATAAACGGAAGTTCTATCTATCAATATCTATGGTCTTGGACTGTCAATAATGATTTTTCTCTAGAATTTGGATATTTGATCGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTCGTTCTTATTTATAGTGACAACTACATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCTATGTTGGGATTGGTTATTAGCTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTATT [...]
+Olyra ---------------------GCATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGATTTGGACTTATTCTTATTCCGACAGCGACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTATTCTTAAGTATAGCTATGGTATTCTCTGTTTACCTATCTATTCAACAAATAAACGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCGTCAATAATGATTTTTCCTTAGAATTTGGATATTTGATCGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAGTCCTCGTTCTTATTTATAGTGACGACTACATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCTATGTTGGGATTGGTTATTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTATTGATA [...]
+Buergersio -----------------------ATGGGTAATCCCTCTTCTCCCACTTCTAGTTATTATGTCAATGGGGTTTGGACTTATTCTTATTCCAACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCTGTTTACCTGTCTATTCAACAAATAAACGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCGTCAATAATGATTTTTCCTTAGAATTTGGATACTTGATCGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTCGTTCTTATTTATAGTGATGATTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTAT [...]
+Brachyelyt -----------------------ATGGGTAATACCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGGTTTGGACTTATTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCCTTTCCTAGTGTTTTATTCTTAAGTGTAGCTATGGTATTCTCAGTTCAACTGTCTATTCAACAAATAAATGGCAGTTCTATCTATCAATATCTATGGTCTTGGACCGTCAATAATGATTTTTCCTTAGAATTTGGATACTTGATTGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTGGTTCTTATTTATAGTGACGGTTATATGTCTCACGATGAAGGATATTTTAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCTATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTAGGAATGTGTTCCTATTTAT [...]
+Lygeum ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [...]
+Nardus ---------------------GCATGGCTAATCCCTCTTCTCCCACTTCCAGCTATTATGTCAATGGGATTGGGGCTTTTTCTTATTCCAAAACCGACAAAAAATATTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCATTTCAATTGTCTATTGAACAAATAAAAGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCTTTAGAATTTGGATACTTGATTGACCCGCTTACTTCGATTATGTTAATACTAATTACTACTGTAGGAATCCTAGTTCTTATTTATAGTGACAGTTATATGTCTCATGATGAAGGGTATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTTACTAGTTCGAATTTAATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCTTATTTATTGAT [...]
+Anisopogon ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ACCGTCAATAATGATTTTTCCTTAGAATTTGGATACTTGATTGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTGGTTCTTATTTATAGTGACGGTTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACT-GTGGGAATGTGTTCC--TTTAT [...]
+Ampelodesm ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [...]
+Stipa ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATAATGATTTTTCCTTAGAATTTGGATACTTGATTGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTAGTTCTTATTTATAGTGACGGTTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCT-TTTATTGATA [...]
+Nassella ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATAATGATTTTTCTTTAGAATTTGGATACTTGATTGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTGGTTCTTATTTATAGTGACGGTTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTAGTTACTAGCTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTATT- [...]
+Piptatheru -----------------------ATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGGTTTGGCCTTATTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCAGTTCAACTGTCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTGATTGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTGGTTCTTATTTATAGTGACGGTTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTAT [...]
+Brachypodi --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTGATTGACCCGCTCACTTCGATTATGTTAATACTAATTACTACTGTAGGAATCCTGGTTCTTATTTATAGTGATGGTTATATGTCTCATGATGAAGGATATTTGAGATTTTTTATTTACATAAGTTTTTTCAATATTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTAT [...]
+Melica --------------------------------------------------------------------------------------------------------------------------------------------TTTAAGTATAGCTATGGTATTCTCAGTTCAACTGTCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTAATTGAC--GCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCTTGGTTCTTGTTTATAGTGACGGTTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTATGGGAATGTGTTCCTATTTATTGAT [...]
+Glyceria ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AATAATGATTTTTCCTTAGAATTTGGATACTTAATTGACCCGCTTACTTCTATTATGTTAGTACTAATTACTACTGTAGGAATCTTGGTTCTTGTTTATAGCGACGGTTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTTGTGGGAATGTGTTCCTATTTATTG [...]
+Diarrhena -----------------------ATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGGTTTGGCCTTATTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCAGTTCAACTGTCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCATCAATAATGATTTTTCCTTAGAATTTGGATACTTGATTGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTGGTTCTTATTTATAGTGACGGTTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGTTGGGATTGGTTACTAGTTCCAATTTGATACAAATTTATTTTTTTTGGGAACTCGTGGGAATGTGTTCCTATTTATT [...]
+Avena -------------------------GGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGATTTGGCCTTTTT-TTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCAGTTCAACTGTCTATTCAACAAATAAATGGAAGTTCTAT-TATCAATATCTATGGTCTTGGACCGTCAATAATGATTTTTCTTTAGAATTTGGATACTTGATTGACCCGCTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTGGTTCT-ATTTATAGTGACGGTTATATGTCTCACGATGAAGGGTATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCCATGCTAGGATTGGTTACTAGCTCCAATTTGATACAAATTTATTTTTTT-GGGAACTCGTAGGAATGTGTTCCTATTTATTGATA [...]
+Bromus ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [...]
+Triticum -----------------------ATGGGTAATCCCTCTTCTCCCACTTCCAGTTATTATGTCAATGGGATTTGGCCTTATTCTTATTCCGACAGCAACAAAAAATCTTCGTCGCATATGGGCTTTTCCTAGTGTTTTACTCTTAAGTATAGCTATGGTATTCTCAGTTCAACTGTCTATTCAACAAATAAATGGAAGTTCTATCTATCAATATCTATGGTCTTGGACCGTCAATAATGATTTTTCTTTAGAATTTGGATACTTGATTGACCCACTTACTTCTATTATGTTAATACTAATTACTACTGTAGGAATCCTGGTTCTTATTTATAGTGATGGTTATATGTCTCACGATGAAGGATATTTGAGATTTTTTGTTTATATAAGTTTTTTCAATACTTCTATGTTGGGATTGGTTACTAGCTCCAATTTGATACAAATTTATTTTTTTTGGGAACTCGTGGGAATGTGTTCTTATTTATTG [...]
diff --git a/test_scripts/test_data/example.nex b/test_scripts/test_data/example.nex
new file mode 100644
index 0000000..cd8c847
--- /dev/null
+++ b/test_scripts/test_data/example.nex
@@ -0,0 +1,5 @@
+#nexus
+begin sets;
+charset part1 = 1-100;
+charset part2 = 101-384;
+end;
diff --git a/test_scripts/test_data/example.phy b/test_scripts/test_data/example.phy
new file mode 100644
index 0000000..8637b06
--- /dev/null
+++ b/test_scripts/test_data/example.phy
@@ -0,0 +1,46 @@
+ 44 384 
+FL-1-103     atgcgcatcacccaaggc---------------------accttctccttcctgcccgacctcacggcggcccaggtcaaggcccagatccagtatgcgctggaccagaactgggcggtctcggtggagtacacggacgatccc------------------------------------------------------catccccggaacacctattgggagatgtggggcctgcccatgttcgacctgcgcgatgccgccggcgtctatggcgaggtcgaggcctgccgcaccgcccatcccggcaagtatgtgcgggtgaacgccttcgactccaatcgcgggtgggagacggtgcgcctctccttcatcgtccagcgtccg
+OSH-1-103    atgcgcatcacccaaggc---------------------tgcttctcgttcctgcccgacctgaccgacgagcagatctcggcgcaggtggactattgcctcggccgcggctgggccgtgagcctcgaacataccgacgacccg------------------------------------------------------catccccggaacacctactgggaaatgtggggcatgccgatgttcgacctgcgcgaccccaagggcgtgatgatcgagctggacgagtgccgcaaggcctggcccggccgctacatccgcatcaatgccttcgattccacccgcggcttcgagacggtcacgatgagcttcatcgtcaaccgcccc
+CEU-1-103    atgcgcatcactcaaggc---------------------actttttccttcctgcccgaactgaccgacgagcagatcaccaaacagctcgaatactgcctgaaccagggctgggcggtcggcctcgaatacaccgacgacccg------------------------------------------------------cacccgcgcaacacgtactgggagatgttcgggctgccgatgttcgacctgcgcgatgccgccggcatcctgatggaaatcaacaacgcgcggaacaccttccccaaccactacatccgcgtcacggccttcgattcgacgcatacggtggagtcggtggtgatgtcgttcatcgtcaatcgtccc
+TH-1-103     atgagacttacacaaggc---------------------gcattttcgttcttacctgacttaacagatgagcaaatcgtaaaacaaattcaatatgctatcagcaaaaactgggctttaaacgttgaatggacagatgatccg------------------------------------------------------caccctcgcaacgcatactgggatttatggggattaccattatttggtattaaagatccagcggctgtaatgtttgaaatcaatgcttgccgtaaagctaaaccagcttgttacgtaaaagtaaatgcgtttgataactcacgtggtgtagaaagctgctgcttatcttttatcgttcaacgtcct
+CAa1-103     atgaaactaacacaagga---------------------gctttctcatttcttcctgacttaactgatgcgcaagtaactaagcaaatccagtacgctttaaataagagttgggctatttcgattgaatatactgatgatccg------------------------------------------------------cacccacgtaacagttactgggagatgtggggccttcctctattcgatgttaaggatccagctgcgattcttttcgaaatcaacatggctcgtaaggctaagcctaactactaccttaaaatagcttgttttgataacacacgtggtatcgaaagttgtgtactttctttcattgtacaacgtcct
+CAb1-103     gtgagagttacacaagga---------------------acattttcttttctaccagacctgacaaatgatcaaatcagaaaacaaattcaatatgccataaataaaggatgggcattgagtgtagaatatacagatgaccct------------------------------------------------------cacccacggaattcttactgggaaatgtggggactgcctttatttgatgtcaaagaccctgcggcaattatgtttgaagttgaagcttgtcgaaaagagaaaagcaactattatattaagctattagcttttgattcaaccaaaggagttgaaagtacagcaatgtcctttatggtcaataggcct
+SI-1-103     atgagagttacacaagga---------------------tgtttttcgtttttaccagatttaagtgatgatcaaattaaacaacaagtttcttacgctatgagcaaaggttgggcggttagtgtagaatggacagatgatcca------------------------------------------------------catccacgtaactcatattgggaattatggggtcttcctttatttgatgttaaagatccagctgcagttatgtatgaacttgctgaatgtagaaaagttaacccagaaggttatattaaaattaatgctttcgatgctagtattggtacagaaagttgtgtaatgtcttttattgtacaacgtcct
+LU-1-103     gtgagacttacacaagga---------------------gctttttcttatttaccagatttaactgatgcacaaatcatcaaacaaattgactactgcttaagcagaggttggtctgttggtgttgaatggactgatgatcca------------------------------------------------------cacccacgtaacgcttactgggaactatggggtcttccattatttgacgtaaaagattcttcagcaattttatacgaagttaatgaatgtcgtcgtttaaaccctgaaggttacattaaattagttgctttcaacgcagcacgtggtactgaaagtagtgcatctgcttttattgtacaacgtcca
+SU-1-103     gtgagaataactcaaggt---------------------accttttcttttttgccggacttgactgatgaacaaatcaaaaaacaaattgattatatgatatctaaaaaattagctataggtattgaatatactaacgacata------------------------------------------------------catcctagaaattcattttgggaaatgtggggattacctctatttgaggtcacagatccagctccagtattatttgaaattaatgcttgtcgtaaagcaaaaagtaatttctatatcaaggtagtaggattttcttctgaaagaggtatagaaagtacaataatttcatttattgtaaatagacca
+RP-56-175    atgcaggtgtggccaccagttggcaagaagaagtttgagaccctttcataccttccacccctcactgatgagcaattgcttaaggaagtagagtatcttctaaggaagggatgggttccatgtgttgaatttgagttggagaaa------------------ggatttgtccaccgtcagtacaacagttcaccaggatactatgatggacgttactggacaatgtggaggttgccattgtttggaaccactgatgctgctcaggtgttgaaggaagttgctgaatgtaaagcagaatacccagaagctttcatccgtatcatcggatttgacaacgttcgt------caagtgcaatgcattagtttcattgcaagcacaccc
+A-14-133     atgcaggtgtggcctccaattggaaagaagaagtttgagactctttcctatttgccaccattgacgagagatcaattgttgaaagaagttgaataccttctgaggaagggatgggttccatgcttggaatttgagttgctcaaa------------------ggatttgtgtacggtgagcacaacaagtcaccaagatactatgatggaagatactggacaatgtggaagcttcctatgtttggcaccactgatcctgctcaagtcgtgaaggaggttgatgaagttgttgccgcttaccccgaagctttcgttcgtgtcatcggtttcaacaacgttcgt------caagttcaatgcatcagtttcattgcacacacacca
+PR-57-176    atgcaggtgtggccaccacgtaatttgaagaagtttgagaccctatcataccttccaactctttccgaggagtcattgttgaaggagatcaactaccttctaatcaagggatgggttccttgccttgagttcgaagttggaccg------------------gcacatgtataccgtgagaacaacaagtcaccaggatactatgacggaaggtactggacaatgtggaagctacccatgttcggatgcactgacgcatcccaagttgcagctgaggtggtcgagtgcaagaacgcttaccctgatgcccacgtcagaatcattggattcgacaacaagcgt------caagtccagtgcatcagtttcattgcctacaaacct
+PY-61-180    atgcaggtgtggcctccactcggactgaagaagttcgagaccctctcttaccttcctcccctttcttccgagtccttggccaaggaagttgactacctcctccgcaagaactgggttccctgcttggaatttgagttggagact------------------ggattcgtgtaccgtgagaaccacaggtccccaggatactatgatggaaggtactggacaatgtggaagctgcccatgttcggatgcaccgactcttcccaggtgttgaaggagctggaagaggccaagaaggcttacccccagtccttcatccgtatcatcggattcgacaatgtccgt------caagtgcagtgcatcagtttcatcgcttacaagcct
+MGI-58-176   atgcaggtgtggccgccggagggcctgaagaagttcgagaccctctcctacctcccccctctctccgtcgaggacctcgccaaggaggtggactacctcctccgcaacgactgggttccctgcatcgagttctccaaggaa---------------------gggttcgtgtaccgcgagaaccacgcgtcgcccgggtactacgacgggcggtactggacgatgtggaagctgcccatgttcggctgcaccgacgccagccaggtgatcgccgaggtggaggaggccaagaaggcctaccccgagtacttcgtcagaatcatcggcttcgacaacaagcgc------caagtccagtgcatcagcttcatcgcctacaagccc
+SCR-58-177   tgcatggtgtggccaccactaggaatgaagaagtttgagactctgtcttacctgccccctctatccgaagagtcattgttgaaggaggtccaataccttctcaacaatggatgggttccctgcttggaattcgagcccactcac------------------ggatttgtgtaccgtgagcacggaaacacaccaggatactacgatggacgttactggacaatgtggaagttgcccatgttcggttgcactgacccatcccaggttgttgctgagctcgaggaggccaagaaggcttaccctgaggccttcatccgtatcataggattcgacaacgtgcgt------caagtccagtgtgtcagtttcatcgcctacaagccc
+SA-60-179    atgaaggtgtggccaccacttggattgaggaagttcgagactctttcttacctgcctgatatgagtaacgaacaattgtcaaaggaatgtgactaccttctcaggaatggatgggttccctgcgttgaattcgacatcggaagc------------------ggattcgtgtaccgtgagaaccacaggtcaccaggattctacgatggacgttactggaccatgtggaagctccctatgtttggctgcaccgactcatctcaggtgattcaggagattgaggaggctaagaaggaataccccgacgcattcatcagggttattggctttgacaacgtccgt------caagtccagtgcatcagtttcatcgcctacaagccc
+BR-60-179    atgcaggtatggccaccacgtgggaagaagttctacgagactctctcataccttccaccccttacaagggagcaattggccaaggaagttgaataccttcttcgcaagggatgggttccttgcttggaattcgagttggagcat------------------ggaaccgtgtaccgtgagtaccacagatcaccagggtactatgatggtcgttactggaccatgtggaagctgcccatgtttggttgcacagatgcagtgcaggtgttgcaggagcttgatgagatgattaaagcttacccagattgctatggtaggatcattggtttcgacaatgttcgc------caagtccagtgcattagtttccttgcctacaagcct
+CPL-58-177   atgcaggtgtggccaccaattaacaagaagaagtacgagactctctcatacctccctgatttgagccaagagcaattgcttagcgaaattgagtaccttttgaaaagtggatgggttccttgcttggaattcgaaactgagcgc------------------ggatttgtctaccgtgaacaccaccattcaccaggatactatgacggcaggtactggaccatgtggaagctacctatgttcggatgcactgatgccacccaagtgttggctgaggtggaagaggcgaagaaggcatacccacaggcctgggtccgtattattggattcgacaacgtgcgt------caagtgcagtgcatcagtttcattgcctacaagcca
+LTU-59-178   atgcaggtgtggccaccaattaacatgaagaaatacgagacattgtcataccttcctgacttgtccgatgagcaattgctcaaggaagttgagtaccttttgaaaaatggatgggttccttgcttggaattcgagactgagcac------------------ggatttgtgtaccgtgagcacaacagctcaccaggatactacgatggtagatactggaccatgtggaagttgcctatgtttgggtgcactgacggaacccaggtgttggctgaggttcaagaggccaagaatgcgtacccacaggcctggatccgtattatcggattcgacaacgttcgt------caagtgcagtgcatcagtttcattgcctacaagcca
+TSP-58-177   atgcaggtgtggcccccatatggcaagaagaagtacgagactctctcataccttcctgatttaaccgacgagcaattgctcaaggagattgagtaccttttgaacaagggatgggttccttgcttggaatttgagactgagcac------------------ggatttgtctaccgtgaataccacgcctcacctagatactatgatggaaggtactggaccatgtggaagttgcccatgtttgggtgcactgatgcaactcaggtgttgggtgagctccaagaggccaagaaggcttaccctaatgcatggatcagaatcatcggattcgacaacgtccgt------caagtgcaatgcatcagtttcattgcctacaagcca
+YBN-56-175   atgcaggtgtggccaccagttggcaagaagaagtttgagactctttcctacctgccagaccttgatgatgcacaattggcaaaggaagtagaataccttcttaggaagggatggattccttgcttggaattcgagttggagcac------------------ggtttcgtgtaccgtgagcacaacaggtcactaggatactacgatggacgctactggaccatgtggaagctgcctatgtttggttgcactgatgcttctcaggtgttgaaggagcttcaagaggctaagactgcataccccaacggcttcatccgtatcatcggattcgacaacgttcgc------caagtgcagtgcatcagcttcatcgcctacaagccc
+AN-56-175    atgaaggtgtggccaccacttggattgaagaagtacgagactctctcatacttaccaccactaactgaaactcagttggctaaggaagtcgactacttgctccgcaaaaaatgggttccttgtttggaattcgagttggagcac------------------ggttttgtctaccgtgagaacgccagatcccccggatactatgacggaagatactggacaatgtggaaattgcctatgttcggttgcaccgactcagcccaagtgatgaaggagcttgctgaatgcaagaaggagtacccccaggcctggatccgtatcatcggatttgacaatgttcgt------caagttcaatgtatcatgttcattgcttccaggcca
+HI-60-179    atgcaggtgtggcctcctcttgggaagaagaagttcgagacactctcatacctccccgatcttacacccgtacagttggctaaggaagtagattaccttcttcgctctaaatggattccttgcttggaattcgaattagaggag------------------ggattcgtgcaccgtaagtactcgagcttacccacgtactacgatggacgctactggaccatgtggaaactgcccatgtttgggtgcactgactcggctcaggtgttggaggagcttgagaattgcaagaaggaataccccaatgcattcattagaatcattgggttcgacaacgttcgt------caagtgcagtgcattagtttcattgcctacaagcct
+ANA-56-175   atgaaggtgtggccaccagttggaaagaagaagtttgagaccctctcttaccttcctgaccttaccgaagttgaattgggtaaggaagtcgactaccttctccgcaacaagtggattccttgtgttgaattcgagttggagcac------------------gggtttgtttaccgtgagcacggaagcacccccggatactacgatggccgttactggacaatgtggaagcttcccttgttcggatgcactgactctgctcaagtgttgaaggaagtccaagaatgcaaaacggagtaccctaacgctttcatcaggatcatcggattcgacaacaaccgt------caggtccagtgcatcagtttcatcgcctacaagcca
+ZE-48-166    atgcaggtgtggccggcctacggcaacaagaagttcgagacgctgtcgtacctgccgccgctgtcgacggacgacctgctgaagcaggtggactacctgctgcgcaacggctggataccctgcctcgagttcagcaaggtc---------------------ggcttcgtgtaccgcgagaactccacctccccgtgctactacgacggccgctactggaccatgtggaagctgcccatgttcggctgcaacgacgccacccaggtgtacaaggagctgcaggaggccatcaaatcctacccggacgccttccaccgcgtcatcggcttcgacaacatcaag------cagacgcagtgcgtcagcttcatcgcctacaagccc
+EAT-48-166   atgcaggtgtggccaattgagggcatcaagaagttcgagaccctgtcttacttgccacccctctccacggaggccctcttgaagcaggtcgactacttgatccgctccaagtgggtgccctgcctcgagttcagcaaggtt---------------------ggcttcgtcttccgtgagcacaacagctcccccgggtactacgacggtcgatactggacaatgtggaagctgcctatgttcgggtgcaccgacgccacacaggtgctcaacgaggtggaggaggttaagaaggagtaccctgatgcgtatgtccgcgtcatcggtttcgacaacatgcgc------caggtgcaatgcgtcagcttcattgccttcaggcca
+YSA-46-164   atgcaggtgtggccgattgagggcatcaagaagttcgagaccctctcctacctgccaccgctcaccgtggaggacctcctgaagcagatcgagtacctagctccgttccaagtggtgccctgcctcgagttcagcaaggtc---------------------ggatttgtctaccgtgagaaccacaagtcccctggatactacgacggcaggtactggaccatgtggaagctgcccatgttcgggtgcaccgacgccacccaggtcgtcaaggagctcgaggaggccaagaaggcgtaccctgatgcattcgtccgtatcatcggcttcgacaacgttagg------caggtgcagctcatcagcttcatcgcctacaacccg
+TH-52-170    atgcaggtgtggcctccattcggaaaccccaagtttgagactctgtcctacctccctacgctaaccgaggagcagctggtgaaggaggttgagtacttgttgaggaacaagtgggtgccttgtctagagtttgatctggaa---------------------ggatccatctcgaggaagtataataggagcccggggtactacgatgggagatactgggtgatgtggaagttgccgatgtttgggtgcacagaggcatctcaggtgataaacgaggtgagagagtgtgccaaggcataccccaaagccttcatccgtgtcattggctttgacaacgtccgc------caagtgcagtgcatctccttcatcgtccacaagccc
+LA-68-186    atgcaggtgtggcctccttacgcgaataaaaagtttgagactctgtcgtatctccctcgcttgaccccggagcaactggtgaaggaggtggagtacctgctgaagaacaagtgggtgccctgcctggaattcgaggaggat---------------------ggtgaaataaagagagtgtatgggaatagcccagggtactacgacgggagatactgggtgatgtggaagctgcctatgttcggatgcacagaggcatcgcaggtgttgaacgaggtgaacgagtgtgcgaaggcataccccaacgccttcatccgcgtcatcggattcgacaacgtccgc------caagtgcagtgcatctccttcatcgtccacaagcct
+GR-854-978   atgaaggtgtggaaccccgtcaacaacaagaagttcgagaccttctcctacctgccccccctgtctgacgcccagatcgccaagcaggtggacatgatcattgccaaggggctctccccctgcctggagttcgccgccccggagaacagcttcatcgccaatgacaacactgtgcgcttcagcggcaccgctgcaggctactatgacaaccggtactggaccatgtggaagctgcccatgttcggctgcacggacgccagccaggtgctgcgtgagatctccgagtgccgcagggcctacccccagtgctacgtccgc---ctggccttcgactccgtcaag------caggtgcaggtgatctcgttcgtggtgcagcgcccc
+MO-29-154    ttcaaggtctggcagcccgtgaacaacaagcagtacgagaccttctcctacctgccccccctgaccaaccagaagatcggccgtcaggtcgactacatcatcaacaacggctggaccccctgcttggagttcgctgacccctccacctccttcgtcagcaacgcgaacgccgtgcgcctccagggtgtctccgctggctactacgacaacaggtactggaccatgtggaagctgcccatgttcggctgcactgaccccagccaggtgctgcgcgaggtgtccgcctgccaggtggccttccccaacgtgtacatccgcctggttgccttcgacaacgtcaag------caggtgcagtgcatgggcttcctagtgcagcgcccc
+OE-36-161    atgatggtatggtagccctttaacaataagttctttgagaccttctcgtacttgccccctctcactgacgaccaaatcaccaagcaagtggactacatcttgagaaacaattggactccttgtctggagtttgcgggatccgaccaagcgtatgtgacccacgacaacacggtaagaatgggagattgtgcatccacttatcaggacaacagatattggaccatgtggaaattgcctatgttcggttgcattgatggatcgcaagtgttgaccgaaatttcagcttgcactaaggcctttcctgatgcctacatccgtttggtgtgttttgatgcaaatagg------caagtccaaatttccggctttttggtacataggccc
+EME-43-168   atgatggtttggtagcccttcaacaacaaaatgtttgaaactttttccttcttgcctcccttgactgatgaacaaattagcaaacaagtggactacatcttggccaactcctggaccccctgtcttgaatttgcagcttctgatcaagcttatgctggcaatgaaaattgcatcagaatgggacctgtggcttctacctaccaagacaatagatattggacaatgtggaagctacctatgtttggatgcacagacggctctcaagtgttgagcgagatccaagcatgcacaaatgctttccccgatgcttacatcagattggtttgttttgacgcaaacaga------taggtgtaaatttctggatttttggtgcacagacct
+LRE-46-171   atgatggtctggaccccggtcaacaacaagatgttcgagaccttctcctacctgccccccctgagcgacgagcagatcgccgcccaggtcgactacattgtcgccaacggctggatcccctgcctggagttcgctgagtcggacaaggcctacgtgtccaacgagtcggccatccgcttcggcagcgtgtcttgcctgtactacgacaaccgctactggaccatgtggaagctgcccatgttcggctgccgcgaccccatgcaggtgctgcgcgagatcgtcgcctgcaccaaggccttccccgatgcctacgtgcgcctggtggccttcgacaaccagaag------caggtgcagatcatgggcttcctggtccagcgcccc
+P6-2-107     atgaaaactctgcccaaa------gagcgtcgtttcgagactttctcgtacctgcctcccctcagcgatcgccaaatcgctgcacaaatcgagtacatgatcgagcaaggcttccaccccttgatcgagttcaacgagcac------------------------------------------------------tcgaatccggaagagttctactggacgatgtggaagctccccctgtttgactgcaagagccctcagcaagtcctcgatgaagtgcgtgagtgccgcagcgaatacggtgattgctacatccgtgtcgctggcttcgacaacatcaag------cagtgccaaaccgtgagcttcatcgttcatcgtccc
+HO-1-106     atgaaaactctgcccaaa------gagcgtcgctacgaaaccctttcctacctgccccccctgagcgatcagcaaattgctcgccagattgagtacatggtgcgcgaaggctatattcccgccgtggaattcaacgaagat------------------------------------------------------tccgacgcgaccacctgctactggaccatgtggaagttgcccctgttccacgccacttctacccaagaagtgttgggcgaagtgcgcgagtgccgcaccgaataccccaactgctacatccgcgtagttggtttcgacaacatcaag------cagtgtcagtccgtgagcttcatcgttcacaagccc
+SP-1-106     atgcaaaccttaccaaaa------gagcgtcgttacgaaaccctttcttacttaccccccctcaccgacgttcaaatcgaaaagcaagtccagtacattctgagccaaggctacattccagccgttgagttcaacgaagtt------------------------------------------------------tctgaacctaccgaactttattggacactgtggaagctacctttgtttggtgctaaaacatcccgtgaagtattggcagaagttcaatcttgccgttctcaatatcctggtcactacatccgtgttgtaggatttgacaatattaag------cagtgccaaatcctgagcttcatcgttcacaaaccc
+PA-1-105     ---atgcaacttagagta------gaacgtaagttcgaaactttttcttatttaccaccattaaacgaccaacagattgcgcgtcaattacaatacgcactttccaatggttatagcccagcaatcgaattcagttttaca------------------------------------------------------ggtaaagctgaagacttagtatggactttatggaaattacctttatttggtgcacaatctcctgaagaagtacttagcgaaattcaagcttgtaaacaacagttccctaatgcttacattcgtgttgtagcatttgactctatcaga------caagttcaaactttaatgttcttagtttacaaacca
+NE-2-109     gctgaaatgcaggattacaagcaaagcctcaaatatgagactttctcttatcttccacccatgaacgcggaacgcatccgcgctcaaatcaagtacgcaattgctcaaggctggagccccggcattgagcacgtagaagtgaaa------------------------------------------------------aactccatgaaccaatattggtacatgtggaaacttcccttcttcggcgaacaaaatgtcgacaacgtgttggctgaaattgaagcgtgtcgtagtgcgtatccaacacaccaggtcaaactggtggcttatgacaactatgcg------caaagcttaggtctggccttcgtggtctaccgcggc
+IFE-2-109    gctgacattcaggactacaactcaacacccaagtacgaaaccttctcttatttgccggcaatgggaccggaaaaaatgcgccgtcagatcgcctatctcatcaatcagggctggaaccccggcatcgagcatgtggaacctgaa------------------------------------------------------cgcgcatcaacatactactggtacatgtggaagttacccatgttcggcgaacagtcggtggacaccgtgatcatggagttggaagcatgccatcgcgctcaccccggccatcacgtgcgcttggtcgggtatgacaattactcg------cagagccagggcagcgcttttgtggtgtttcgcggg
+HS-9-115     ---tcgagcgtcagcgatccgtcgagccgcaagttcgagaccttctcctacctgcccgaactcggcgtggaaaagatccgcaagcaggtcgagtacatcgtcagcaagggctggaacccggccgtcgagcacaccgagccggag------------------------------------------------------aacgccttcgaccactactggtacatgtggaagctgccgatgttcggcgaaaccgacgtggacgccatcctggccgaggccgaggcatgccacaaggcgcatccctcgcatcacgtgcgcctgatcggctacgacaactatgcc------cagtcgcaaggcactgccatggtgatcttccgcggc
+RVI-7-114    agttccagcctcgaagacgtcaacagccgcaagttcgagaccttctcctacctgccgcgcatggatgccgaccgcatccgcaagcaggtcgagtacatcgtctccaagggctggaacccggccatcgagcacaccgagccggaa------------------------------------------------------aacgccttcgatcactactggtacatgtggaagctgccgatgttcggcgagaccgacatcgacaccatcctcaaggaggccgaagcctgccacaaggcgcaccccaacaatcacgtgcgtctgatcggcttcgacaactatgcc------cagtccaagggcgccgagatggtggtctatcgcggc
+IFE-8-115    aaatcccgtctctccgacccggcgagcgcgaagttcgagacactgtcttacctgcccgccctgaccgcggacgagatccgtcaacaggttgcgtatattgtttccaagggctggaatccggcggtagaacataccgaaccggaa------------------------------------------------------aacgccttcggcaactactggtatatgtggaagttgcccatgttcggcgaaacggacgtggacaccattctgaaagaagcggaacgctgccataagcggaatccccataaccacgtccgtatcgtcggctatgataacttcaag------cagtcccagggtacttccctggtagtctatcggggc
+RVI-5-112    agcagcatgggcgatcacgccaccatcggccgctacgagaccttttcctatctgccgccgctcaaccgcgaggagatcctggagcagatcctctacatcctcgacaacggctggaacgcctcgctggagcacgagcatccggat------------------------------------------------------cgcgccttcgagtattactggccgatgtggaagatgcccttcttcggcgaacaggatccgaacgtgatcctgaccgagatcgagtcctgccggcgcagctatccggaccatcacgtccggctggtcggctacgacacctacgcc------cagagcaagggacattccttcctggcgcaccgcccg
+
diff --git a/test_scripts/test_data/prot_M126_27_269.phy b/test_scripts/test_data/prot_M126_27_269.phy
new file mode 100644
index 0000000..7e754be
--- /dev/null
+++ b/test_scripts/test_data/prot_M126_27_269.phy
@@ -0,0 +1,28 @@
+27 269
+Acrasis_rosea              STTTGHLIYKCGGIDKRVIEKFKEAAEIGKSFKYAWVMDKLKAEKERGITIDISLWKFQSAKYDFTIIDAPGHRDFIKNMITGTSQADVAILMIDSTREHALLAQTLGVKQMIVCLNKFDNFSQARYDEIFIPLSGFQGYNMPWYPCLLEALDPKRPTDKPLRLPLQDVGRVETGLLKGMNVTFAPGNKTTEVKSVMHAEPGDNVGFNISNGYSPVLDCHTAHIACKFETIKSLIDKGLKRGDSGIVKLVPMCVESYTEYPPLGRFAVR
+Arabidopsis_thaliana       STTTGHLIYKLGGIDKRVIERFKEAAEMNKSFKYAWVLDKLKAERDRGITIDIALWKFETTKYYCTVIDAPGHRDFIKNMITGTSQADCAVLIIDSTREHALLAFTLGVKQMICCCNKMDKYSKARYDEIFVPISGFEGDNMDWYPTLLEALDPKRPSDKPLRLPLQDVGRVETGMIKGMVVTFAPTGLTTEVKSVMHALPGDNVGFNIGNGYAPVLDCHTSHIAVKFSEILTKIDKFLKNGDAGMVKMTPMVVETFSEYPPLGRFAVR
+Blastocystis_hominis       STTTGHLIYACGGIDKRTIERFEGGQRIGKSFKYAWVLDKMKAERERGITIDISLWKFQTEKYFFTIIDAPGHRDFIKNMITGTSQADVAILIIAATREHALLANTLGVKQMICCVNKMDNYSEARYKEIFIPISGFNGDNMPWYPTLIEALDPKRPVDKPLRLPLQDVGRVETGILKGMTVTFAPVGVTTEVKSVMHALPGDNVGFNIRPGYCPVMDCHTAHIACKFEKIMSEMDDIVKNGKSMMAELVPLCVESFQDYPPLGRFAVR
+Caenorhabditis_elegans     STTTGHLIYKCGGIDKRTIEKFKEAQEMGKSFKYAWVLDKLKAERERGITIDIALWKFETAKYYITIIDAPGHRDFIKNMITGTSQADCAVLVVACTREHALLAQTLGVKQLIVACNKMDPFSEARFTEIFVPISGFNGDNMPWFKTLLEALDPQRPTDRPLRLPLQDVGRVETGIIKGMVVTFAPQNVTTEVKSVMHAVPGDNVGFNISNGYTPVLDCHTAHIACKFNELKEKVDKFLKSGDAGIVELIPLCVESFTDYAPLGRFAVR
+Cryptosporidium_parvum     STTTGHLIYKLGGIDKRTIEKFKESSEMGKSFKYAWVLDKLKAERERGITIDIALWQFETPKYHYTVIDAPGHRDFIKNMITGTSQADVALLVVPATREHALLAFTLGVRQMIVGINKMDEYKQSRFDEIFVAISGFVGDNMPWYKTLVEALDPKRPTDKPLRLPLQDVGRVETGIIRGMNVTFAPAGVTTEVKSVMHAVPGDNVGFNIKNGYSPVVDCHTAHISCKFQTITAKMDKLIKSGDAALVVMQPLCVEAFTDYPPLGRFAVR
+Cyanophora_paradoxa        STTTGHLIYKCGGIDKRTIEKFKEAAEIGKSFKYAWVLDKLKAERERGITIDIALWKFETPKYYVTIIDAPGHRDFIKNMITGTSQADCAVLVIPATREHALLAYTLGVKQMIVAVNKMDNYGQPRFEEIFVPISGFNGDNMGWYPTLVEALDPKRPSEKPLRLPLQDVGRVETGVIKGMTVVFAPSAVTTEVKSVMHALPGDNVGFNIGAGYSPVVDCHTAHIACKFSELITKIDKFVKSGDACMARLIPMCVEAFTNYPPLGRFAVR
+Dictyostelium_discoideum   STTTGHLIYKCGGIDKRVIEKYKEASEMGKSFKYAWVMDKLKAERERGITIDIALWKFETSKYYFTIIDAPGHRDFIKNMITGTSQADCAVLVIASTREHALLAYTLGVKQMIVAINKMDNYSQARYDEIFVPISGWNGDNMEWYPTLLEALDPKRPHDKPLRIPLQDVGRVETGIIKGMVVTFAPAGLSTEVKSVMHARPGDNVGFNIHAGYSPVLDCHTAHIACKFTEIVDKVDVVLKNGDAAMVELTPMCVESFTEYPPLGRFAVR
+Dinenympha_exilis          STTTGHLIYKCGGIDERTIKKFQESEAMGKSFKYAWVLDKLKAERERGITIDIALWKFETNKYYFTIIDAPGHRDFIKNMITGTSQADAAILVVAATREHALLAYTLGVEQMIVCVNKMDNWAESRYNEIMIPISGFNGDNMPWYPILFDALDPKRPSDKPLRLPIQDVGRVETGILTGQVITIAPCMITTEVKSVMHAVPGDNVGFNIQNGYTPVLDCHTSHIACKFKEIQSKIDKFIKTGDSAIVEMQPMVVETFVEYPPLGRFAVR
+Entamoeba_histolytica      STTTGHLIYKCGGIDQRTIEKFKESAEMGKSFKYAWVLDNLKAERERGITIDISLWKFETSKYYFTIIDAPGHRDFIKNMITGTSQADVAILIVAATREHILLSYTLGVKQMIVGVNKMDQYKQERYEEIFVPISGFQGDNMPWYPTLIGALDPERPVDKPLRLPLQDVGRVETGILKGTIVQFAPSGVSSECKSIMHAIPGDNVGFNIRKGYTPVLDCHTSHIACKFEELLSKIDEYIKNGDSALVKIVPLCVEEFAKFPPLGRFAVR
+Euglena_gracilis           STTTGHLIYKCGGIDKRTIEKFKEASEMGKSFKYAWVLDKLKAERERCITIDIALWKFETAKSVFTIIDAPGHRDFIKNMITGTSQADAAVLVIDSTREHALLAYTLGVKQMIVATNKFDKYSQARYEEIFIPISGWNGDNMGWYLTLIGALDPKRPSDKPLRLPLQDVGRVETGVLKGDVVTFAPNNLTTEVKSVMHAVPGDNVGFNIGNGYAPVLDCHTCHIACKFATIQTKIDKFIKSGDAAIVLMKPMCVESFTDYPPLG-VSCG
+Giardia_lamblia            STLTGHLIYKCGGIDQRTIDEYKRATEMGKSFKYAWVLDQLKDERERGITINIALWKFETKKYIVTIIDAPGHRDFIKNMITGTSQADVAILVVAATREHATLANTLGIKTMIICVNKMDKYSKERYDEIYIPTSGWTGDNIPWYPCLIDAIDPKRPTDKPLRLPIQDVGRVETGELAGMKVVFAPTSQVSEVKSVMHAGPGDNVGFNIQPGYTPVIDCHTAHIACQFQLFLQKLDPDAGRGDCIIVKMVPLCCETFNDYAPLGPFAVR
+Hexamita_inflata           STLTGHLIYKCGGIDQRTLEDYKKANEIGKSFKYAWVLDQLKDERERGITINIALWKFETKKFIVTIIDAPGHRDFIKNMITGTSQADVAILVVAATREHATLANTLGIKTMIVAVNKMDNYSEARYTEIFVPLSGWTGDNIPWYKCLIECIDPKRPNDKPLRLPIQDVGRVESGELIGMMVVFAPAGEKTEVKSVMHAGPGDNVGFNINPGYTPVLDCHTSHLAWKFDKFLAKLNTEAVRGECVLMQIVPLCVESFEQYPALGRFAVR
+Homo_sapiens               STTTGHLIYKCGGIDKRTIEKFKEAAEMGKSFKYAWVLDKLKAERERGITIDISLWKFETSKYYVTIIDAPGHRDFIKNMITGTSQADCAVLIVAATREHALLAYTLGVKQLIVGVNKMDPYSQKRYEEIFVPISGWNGDNMPWFTTLLEALDPTRPTDKPLRLPLQDVGRVETGVLKGMVVTFAPVNVTTEVKSVMHALPGDNVGFNISAGYAPVLDCHTAHIACKFAELKEKIDKFLKSGDAAIVDMVPMCVESFSDYPPLGRFAVR
+Naegleria_andersoni        STTTGHLIYKCGGIDKRVIEKFKEAAEMGKSFKYAWVLDKLKAERERGITIDIALWKFESKKYVFTIIDAPGHRDFIKNMITGTSQADVAILVVDSTREHALLAYTLGIKQMIVCMNKFDSYKEDRYNEIFVPISGWTGDNMPWYPCLLDALDPVRPTDKPLRLPFQDVGRVETGKLKGMMIHFAPGQADTEVKSVMHAGPGDNVGFNIRAGYTPVLDCHTSHIACKFDKLIDKIDEKIKKGDSAIVQVIPMCVEGFTEYPPLGRFAVR
+Nicotiana_tabacum          STTTGHLIYKLGGIDKRVIERFKEAAEMNKSFKYAWVLDKLKAERERGITIDIALWKFETTKYYCTVIDAPGHRDFIKNMITGTSQADCAVLIIDSTREHALLAFTLGVKQMICCCNKMDKYSKARYDEIFVPISGFEGDNMDWYPTLLEALDPKRPSDKPLRLPLQDVGRVETGVLKGMVVTFGPTGLTTEVKSVMHALPGDNVGFNIGNGYAPVLDCHTSHIAVKFAEILTKIDKFLKNGDAGMVKMIPMVVETFSEYPPLGRFAVR
+Physarum_polycephalum      STTTGHLIYKCGGIDKRTIEKFKEAAEMGKSFKYAWVLDKLKSERERGITIDIALWKFETAKYYITIIDAPGHRDFIKNMITGTSQADAAVLVIASTREHALLAYTLGVKQMIVAINKMDNWSQARYDEIFVPISGWNGDNMPWYPTLLEALDPKRPTDKPLRVPLQDVGRVETGILKGMIVTFAPANLSTEVKSVMHAVPGDNVGFNIHAGYAPVLDCHTAHIACKFTEILSKVDKNIKNGDAAIVKLTPMCVESFTDFPPLGRFAVR
+Phytophthora_capcisi       STTTGHLIYKCGGIDKRTIEKFKEAAELGKSFKYAWVLDNLKAERERGITIDIALWKFESPKYFFTVIDAPGHRDFIKNMITGTSQADCAILVVASTREHALLAFTLGVKQMVVAINKMDMYGQARYEEIFVPISGWEGDNMPWYPFLLEALDPKRPSDKPLRLPLQDVGRVETGVIKGMVATFGPVGLSTEVKSVMHAVPGDNVGFNIGNGYSPVLDCHTAHVACKFKEITEKMDKFVKSGDACMVILEPMTVESFQEYPPLGRFAVR
+Plasmodium_falciparum      STTTGHIIYKLGGIDRRTIEKFKESAEMGKSFKYAWVLDKLKAERERGITIDIALWKFETPRYFFTVIDAPGHKDFIKNMITGTSQADVALLVVPATKEHALLAFTLGVKQIVVGVNKMDKYSEDRYEEIFIPISGFEGDNLPWYRTLIEALDPKRPYDKPLRIPLQGVGRVETGILKGMVLNFAPSAVVSECKSVMHARPGDNIGFNIKNGYSPVLDCHTAHISCKFLNIDSKIDKSIKSGDSALVSLEPMVVETFTEYPPLGRFAIR
+Podospora_anserina         STTTGHLIYKCGGIDKRTIEKFKEAAELGKSFKYAWVLDKLKAERERGITIDIALWKFETPKYYVTVIDAPGHRDFIKNMITGTSQADCAILIIAATREHALLAYTLGVKQLIVAINKMDKWSEARFNEIFVPISGFNGDNMPWYKTLLEAIDPKRPTDKPLRLPLQDVGRIETGILKGMVVTFAPSNVTTEVKSVMHGVPGDNVGFNVGAGYAPVLDCHTAHIACKFSELLQKIDKFIKSGDAAIVKMVPMCVEAFTEYPPLGRFAVR
+Porphyra_purpurea          STTTGHLIYKCGGIDKRAIEKFKEAAEMGKSFKYAWVLDKLKAERERGITIDIALWKFETDKYNFTIIDAPGHRDFIKNMITGTSQADLAILVIASTREHALLAYTLGVKQMIVACNKMDNWSKERYEEVKVPTSGWTGENLKWYPCLLEALDPKRPVDKPLRLPLQDVGRVETGVIKGMVVTFAPSGLSTEVKSVMHAGPGDNVGFNIHAGYAPVLDCHTAHIACKFSELILKMDKMIKSGDAAMVKMVPMCVEAFTSYPPLGRFAVR
+Schizosaccharomyces_pombe  STTTGHLIYKCGGIDKRTIEKFKEATELGKSFKYAWVLDKLKAERERGITIDIALWKFETPKYNVTVIDAPGHRDFIKNMITGTSQADCAVLIIGGTREHALRAYTLGVKQLIVAVNKMDGWSQARFEEIFVPVSGFQGDNMPWYKTLLEAIDPARPTDKPLRLPLQDVGRVETGVIKGMIVTFAPAGVTTEVKSVMHGLPGDNVGFNISAGYSPVLDCHTAHIACKFAELIEKIDKFVKSGDACIAKMVPMCVEAFTDYAPLGRFAVR
+Spironucleus_barkhanus     STLTGHLIYKCGGIDQRTLDEYKRANEMGKSFKYAWVLDQLKDERERGITINIALWKFETKKFTVTIIDAPGHRDFIKNMITGTSQADVAILVIASTREHATLAHTLGIKTLIVCVNKMDNYSEARYKEIFIPTSGWTGDSIPWYPCLIDAIDPKRPTDKPLRLPIQDVGRVESGLLINMTVVFAPSTTTAEVKSVMHAGPGDNVGFNIQPGYSPVIDCHTAHIACKFDAFLQKLNTEASRGECIVVRMVPLSCESFNDYAALGRFAVR
+Stylonychia_lemnae         STSTGHLIYKCGGIDKRTIEKFKEAAEMGKSFKYAWVLDKLKAERERGITIDIALWNFETAKSVFTIIDAPGHRDFIKNMITGTSQADAAILIIASTREHALLAFTMGVKQMIVAVNKMDNWDQGRFIEIFIPISGWHGDNMPWFSTLIDALDPKRPKDKPLRLPLQDVGRVETGLLKGMVLTFAPMNITTECKSVMHAEPGDNVGFTIQKGYAPVLDCHTAHIACKFDEIESKVDKFIKSGEAALVRMVPMCVEAFNQYPPLGRFAVR
+Tetrahymena_pyriformis     STTTGHLIYKCGGIDKRVIEKFKESAEQGKSFKYAWVLDKLKAERERGITIDISLWKFETAKYHFTIIDAPGHRDFIKNMITGTSQADVAILMIASTREHALLAFTLGVKQMIVCLNKMDNFSEERYQEIFIPISGFNGDNMPWYPILVEALDPKRPVDKPLRLPLQDVGRVETGVIKGMSIQFAPNKVIAECKSVMHAVPGDNVGFNIQAGYTPVLDCHTAHIACKFETIHDKIDKFIKNGDAALVTLIALCVEVFQEYPPLGRYAVR
+Trichomonas_vaginalis      STTTGHLIYKCGGLDKRKLAAIKEAEQLGKSFKYAFVMDSLKAERERGITIDISLWKFEGQKFSFTIIDAPGHRDFIKNMITGTSQADAAILVIDSTREHALLAFTLGIKQVIVAVNKMDNYNKARFDEIFVPISGWAGDNMPWYPYLLEALDPKRPFDKPLRLPLQDVGRVESGTMKGMIVNFAPSTVTAEVKSIMHALPGDNIGFNIHAGYQPVFDCHTAHIACKFDKLIQRIDEYIQKDDAAIVEVVPLVVESFQEYPPLGRFAIR
+Trypanosoma_brucei         STATGHLIYKCGGIDKRTIEKFKEAADIGKSFKYAWVLDKLKAERERGITIDIALWKFESPKSVFTIIDAPGHRDFIKNMITGTSQADAAILIIASTREHALLAFTLGVKQMVVCCNKMDNYGQERYDEIFVPISGWQGDNMPWYPTLLEALDPVRPSDKPLRLPLQTCGRVETGVMKGDVVTFAPANVTTEVKSIMHATPGDNVGFNIGNGYAPVLDCHTSHIACKFAEIESKIDKSIKSGDAAIVRMVPMCVEVFNDYAPLGRFAVR
+oxymonad                   STTTRHLIYKCGGIDQRTLDRFKESEAMGKSFKYAWVLDKLKAERERGITIDIALWKFETGKYYFTIIDAPGHRDFIKNMITGTSQADGAILVVAATREHALLAYTLGVRQMIVCVNKMDNWAESRYNEVMIPISGFNGDNMPWYPVLFEALDPKRPSEKPLRLPIQDVGRVETGILTGQVITIAPCMITTEVKSVMHAVPGDNVGFNIQNGYTPVLDCHTSHIACKFKEIQSKIDKSIKSGDSAFVEMQPMVVETFVEYPPLGRFAVR
diff --git a/tools.cpp b/tools.cpp
index 41a42d6..3281686 100644
--- a/tools.cpp
+++ b/tools.cpp
@@ -20,7 +20,7 @@
 
 
 
-#if (defined(__GNUC__) || defined(__clang__)) && !defined(WIN32)
+#if (defined(__GNUC__) || defined(__clang__)) && !defined(WIN32) && !defined(__CYGWIN__)
 #include <execinfo.h>
 #include <cxxabi.h>
 #endif
@@ -288,7 +288,7 @@ double convert_double(const char *str, int &end_pos) throw (string) {
 	return d;
 }
 
-void convert_double_vec(const char *str, DoubleVector &vec) throw (string) {
+void convert_double_vec(const char *str, DoubleVector &vec, char separator) throw (string) {
     char *beginptr = (char*)str, *endptr;
     vec.clear();
     do {
@@ -301,7 +301,7 @@ void convert_double_vec(const char *str, DoubleVector &vec) throw (string) {
 			throw err;
 		}
 		vec.push_back(d);
-		if (*endptr == ',') endptr++;
+		if (*endptr == separator) endptr++;
 		beginptr = endptr;
     } while (*endptr != 0);
 }
@@ -613,7 +613,9 @@ void parseArg(int argc, char *argv[], Params &params) {
     params.user_file = NULL;
     params.fai = false;
     params.testAlpha = false;
-    params.testAlphaEps = 100.0;
+    params.testAlphaEpsAdaptive = false;
+    params.randomAlpha = false;
+    params.testAlphaEps = 0.1;
     params.exh_ai = false;
     params.alpha_invar_file = NULL;
     params.out_prefix = NULL;
@@ -676,6 +678,7 @@ void parseArg(int argc, char *argv[], Params &params) {
     params.tree_burnin = 0;
     params.tree_max_count = 1000000;
     params.split_threshold = 0.0;
+    params.split_threshold_str = NULL;
     params.split_weight_threshold = -1000;
     params.split_weight_summary = SW_SUM;
     params.gurobi_format = true;
@@ -726,6 +729,7 @@ void parseArg(int argc, char *argv[], Params &params) {
     params.stop_confidence = 0.95;
     params.model_name = "";
     params.model_set = NULL;
+    params.model_extra_set = NULL;
     params.model_subset = NULL;
     params.state_freq_set = NULL;
     params.ratehet_set = NULL;
@@ -745,12 +749,14 @@ void parseArg(int argc, char *argv[], Params &params) {
     params.p_invar_sites = -1.0;
     params.optimize_model_rate_joint = false;
     params.optimize_by_newton = true;
-    params.optimize_alg = "2-BFGS-B";
+    params.optimize_alg = "2-BFGS-B,EM";
     params.fixed_branch_length = false;
+    params.min_branch_length = 0.0; // this is now adjusted later based on alignment length
+    params.max_branch_length = 100.0;
     params.iqp_assess_quartet = IQP_DISTANCE;
     params.iqp = false;
     params.write_intermediate_trees = 0;
-    params.avoid_duplicated_trees = false;
+//    params.avoid_duplicated_trees = false;
     params.rf_dist_mode = 0;
     params.mvh_site_rate = false;
     params.rate_mh_type = true;
@@ -758,10 +764,12 @@ void parseArg(int argc, char *argv[], Params &params) {
     params.mean_rate = 1.0;
     params.aLRT_threshold = 101;
     params.aLRT_replicates = 0;
+    params.aLRT_test = false;
+    params.aBayes_test = false;
     params.localbp_replicates = 0;
     params.SSE = LK_EIGEN_SSE;
     params.lk_no_avx = false;
-    params.print_site_lh = 0;
+    params.print_site_lh = WSL_NONE;
     params.print_site_rate = false;
     params.print_site_posterior = 0;
     params.print_tree_lh = false;
@@ -809,8 +817,8 @@ void parseArg(int argc, char *argv[], Params &params) {
     params.online_bootstrap = true;
     params.min_correlation = 0.99;
     params.step_iterations = 100;
-    params.store_candidate_trees = false;
-	params.print_ufboot_trees = false;
+//    params.store_candidate_trees = false;
+	params.print_ufboot_trees = 0;
     //const double INF_NNI_CUTOFF = -1000000.0;
     params.nni_cutoff = -1000000.0;
     params.estimate_nni_cutoff = false;
@@ -855,7 +863,7 @@ void parseArg(int argc, char *argv[], Params &params) {
     params.num_threads = 1;
 #endif
     params.model_test_criterion = MTC_BIC;
-    params.model_test_stop_rule = MTC_ALL;
+//    params.model_test_stop_rule = MTC_ALL;
     params.model_test_sample_size = 0;
     params.root_state = NULL;
     params.print_bootaln = false;
@@ -873,6 +881,8 @@ void parseArg(int argc, char *argv[], Params &params) {
     params.freq_const_patterns = NULL;
     params.no_rescale_gamma_invar = false;
     params.compute_seq_identity_along_tree = false;
+    params.link_alpha = false;
+
 
 	if (params.nni5) {
 	    params.nni_type = NNI5;
@@ -1420,6 +1430,13 @@ void parseArg(int argc, char *argv[], Params &params) {
 					throw "Split threshold must be between 0 and 1";
 				continue;
 			}
+			if (strcmp(argv[cnt], "-minsupnew") == 0) {
+				cnt++;
+				if (cnt >= argc)
+					throw "Use -minsupnew <split_threshold_1/.../split_threshold_k>";
+				params.split_threshold_str = argv[cnt];
+				continue;
+			}
 			if (strcmp(argv[cnt], "-tw") == 0) {
 				cnt++;
 				if (cnt >= argc)
@@ -1678,7 +1695,7 @@ void parseArg(int argc, char *argv[], Params &params) {
 					throw "Use -nb <#bootstrap_replicates>";
 				params.min_iterations = convert_int(argv[cnt]);
 				params.iqp_assess_quartet = IQP_BOOTSTRAP;
-				params.avoid_duplicated_trees = true;
+//				params.avoid_duplicated_trees = true;
 				continue;
 			}
 			if (strcmp(argv[cnt], "-mod") == 0
@@ -1696,6 +1713,13 @@ void parseArg(int argc, char *argv[], Params &params) {
 				params.model_set = argv[cnt];
 				continue;
 			}
+			if (strcmp(argv[cnt], "-madd") == 0) {
+				cnt++;
+				if (cnt >= argc)
+					throw "Use -madd <extra_model_set>";
+				params.model_extra_set = argv[cnt];
+				continue;
+			}
 			if (strcmp(argv[cnt], "-msub") == 0) {
 				cnt++;
 				if (cnt >= argc)
@@ -1905,10 +1929,33 @@ void parseArg(int argc, char *argv[], Params &params) {
 				params.optimize_model_rate_joint = false;
 				continue;
 			}
-			if (strcmp(argv[cnt], "-fixbr") == 0) {
+			if (strcmp(argv[cnt], "-fixbr") == 0 || strcmp(argv[cnt], "-blfix") == 0) {
 				params.fixed_branch_length = true;
 				continue;
 			}
+			if (strcmp(argv[cnt], "-blmin") == 0) {
+				cnt++;
+				if (cnt >= argc)
+					throw "Use -blmin <min_branch_length>";
+				params.min_branch_length = convert_double(argv[cnt]);
+				if (params.min_branch_length < 0.0)
+					outError("Negative -blmin not allowed!");
+				if (params.min_branch_length == 0.0)
+					outError("Zero -blmin is not allowed due to numerical problems");
+				if (params.min_branch_length > 0.1)
+					outError("-blmin must be < 0.1");
+
+				continue;
+			}
+			if (strcmp(argv[cnt], "-blmax") == 0) {
+				cnt++;
+				if (cnt >= argc)
+					throw "Use -blmax <max_branch_length>";
+				params.max_branch_length = convert_double(argv[cnt]);
+				if (params.max_branch_length < 0.5)
+					outError("-blmax smaller than 0.5 is not allowed");
+				continue;
+			}
 			if (strcmp(argv[cnt], "-sr") == 0) {
 				params.stop_condition = SC_WEIBULL;
 				cnt++;
@@ -2009,13 +2056,13 @@ void parseArg(int argc, char *argv[], Params &params) {
 			}
 			if (strcmp(argv[cnt], "-wt2") == 0) {
 				params.write_intermediate_trees = 2;
-				params.avoid_duplicated_trees = true;
+//				params.avoid_duplicated_trees = true;
 				params.print_tree_lh = true;
 				continue;
 			}
 			if (strcmp(argv[cnt], "-wt3") == 0) {
 				params.write_intermediate_trees = 3;
-				params.avoid_duplicated_trees = true;
+//				params.avoid_duplicated_trees = true;
 				params.print_tree_lh = true;
 				continue;
 			}
@@ -2027,10 +2074,10 @@ void parseArg(int argc, char *argv[], Params &params) {
                 params.write_init_tree = true;
                 continue;
             }
-			if (strcmp(argv[cnt], "-nodup") == 0) {
-				params.avoid_duplicated_trees = true;
-				continue;
-			}
+//			if (strcmp(argv[cnt], "-nodup") == 0) {
+//				params.avoid_duplicated_trees = true;
+//				continue;
+//			}
 			if (strcmp(argv[cnt], "-rf_all") == 0) {
 				params.rf_dist_mode = RF_ALL_PAIR;
 				continue;
@@ -2071,10 +2118,18 @@ void parseArg(int argc, char *argv[], Params &params) {
 			}
 			if (strcmp(argv[cnt], "-alrt") == 0) {
 				cnt++;
-				params.aLRT_replicates = convert_int(argv[cnt]);
-				if (params.aLRT_replicates < 1000
-						&& params.aLRT_replicates != 0)
-					throw "aLRT replicates must be at least 1000";
+                int reps = convert_int(argv[cnt]);
+                if (reps == 0)
+                    params.aLRT_test = true;
+                else {
+                    params.aLRT_replicates = reps;
+                    if (params.aLRT_replicates < 1000)
+                        throw "aLRT replicates must be at least 1000";
+                }
+				continue;
+			}
+			if (strcmp(argv[cnt], "-abayes") == 0) {
+				params.aBayes_test = true;
 				continue;
 			}
 			if (strcmp(argv[cnt], "-lbp") == 0) {
@@ -2086,11 +2141,19 @@ void parseArg(int argc, char *argv[], Params &params) {
 				continue;
 			}
 			if (strcmp(argv[cnt], "-wsl") == 0) {
-				params.print_site_lh = 1;
+				params.print_site_lh = WSL_SITE;
+				continue;
+			}
+			if (strcmp(argv[cnt], "-wslg") == 0 || strcmp(argv[cnt], "-wslr") == 0) {
+				params.print_site_lh = WSL_RATECAT;
+				continue;
+			}
+			if (strcmp(argv[cnt], "-wslm") == 0) {
+				params.print_site_lh = WSL_MIXTURE;
 				continue;
 			}
-			if (strcmp(argv[cnt], "-wslg") == 0) {
-				params.print_site_lh = 2;
+			if (strcmp(argv[cnt], "-wslmr") == 0 || strcmp(argv[cnt], "-wslrm") == 0) {
+				params.print_site_lh = WSL_MIXTURE_RATECAT;
 				continue;
 			}
 			if (strcmp(argv[cnt], "-wsr") == 0) {
@@ -2278,7 +2341,7 @@ void parseArg(int argc, char *argv[], Params &params) {
 				if (cnt >= argc)
 					throw "Use -bb <#replicates>";
 				params.gbo_replicates = convert_int(argv[cnt]);
-				params.avoid_duplicated_trees = true;
+//				params.avoid_duplicated_trees = true;
 				if (params.gbo_replicates < 1000)
 					throw "#replicates must be >= 1000";
 				params.consensus_type = CT_CONSENSUS_TREE;
@@ -2296,7 +2359,12 @@ void parseArg(int argc, char *argv[], Params &params) {
 				continue;
 			}
 			if (strcmp(argv[cnt], "-wbt") == 0) {
-				params.print_ufboot_trees = true;
+				params.print_ufboot_trees = 1;
+				continue;
+			}
+			if (strcmp(argv[cnt], "-wbtl") == 0) {
+                // print ufboot trees with branch lengths
+				params.print_ufboot_trees = 2;
 				continue;
 			}
 			if (strcmp(argv[cnt], "-bs") == 0) {
@@ -2335,11 +2403,11 @@ void parseArg(int argc, char *argv[], Params &params) {
 				params.online_bootstrap = false;
 				continue;
 			}
-			if (strcmp(argv[cnt], "-nostore") == 0
-					|| strcmp(argv[cnt], "-memsave") == 0) {
-				params.store_candidate_trees = false;
-				continue;
-			}
+//			if (strcmp(argv[cnt], "-nostore") == 0
+//					|| strcmp(argv[cnt], "-memsave") == 0) {
+//				params.store_candidate_trees = false;
+//				continue;
+//			}
 			if (strcmp(argv[cnt], "-lhmemsave") == 0) {
 				params.lh_mem_save = LM_PER_NODE;
 				continue;
@@ -2348,10 +2416,10 @@ void parseArg(int argc, char *argv[], Params &params) {
 				params.lh_mem_save = LM_ALL_BRANCH;
 				continue;
 			}
-			if (strcmp(argv[cnt], "-storetrees") == 0) {
-				params.store_candidate_trees = true;
-				continue;
-			}
+//			if (strcmp(argv[cnt], "-storetrees") == 0) {
+//				params.store_candidate_trees = true;
+//				continue;
+//			}
 			if (strcmp(argv[cnt], "-nodiff") == 0) {
 				params.distinct_trees = false;
 				continue;
@@ -2441,6 +2509,14 @@ void parseArg(int argc, char *argv[], Params &params) {
 				params.testAlpha = true;
 				continue;
 			}
+            if (strcmp(argv[cnt], "--adaptive-eps") == 0) {
+                params.testAlphaEpsAdaptive = true;
+                continue;
+            }
+            if (strcmp(argv[cnt], "--rand-alpha") == 0) {
+                params.randomAlpha = true;
+                continue;
+            }
             if (strcmp(argv[cnt], "--test-alpha-eps") == 0) {
                 cnt++;
                 if (cnt >= argc)
@@ -2665,11 +2741,11 @@ void parseArg(int argc, char *argv[], Params &params) {
 				if (cnt >= argc)
 					throw "Use -merit AIC|AICC|BIC";
                 if (strcmp(argv[cnt], "AIC") == 0)
-                    params.model_test_stop_rule = MTC_AIC;
+                    params.model_test_criterion = MTC_AIC;
                 else if (strcmp(argv[cnt], "AICc") == 0 || strcmp(argv[cnt], "AICC") == 0)
-                    params.model_test_stop_rule = MTC_AICC;
+                    params.model_test_criterion = MTC_AICC;
                 else if (strcmp(argv[cnt], "BIC") == 0)
-                    params.model_test_stop_rule = MTC_BIC;
+                    params.model_test_criterion = MTC_BIC;
                 else throw "Use -merit AIC|AICC|BIC";
 				continue;
 			}
@@ -2738,6 +2814,11 @@ void parseArg(int argc, char *argv[], Params &params) {
 				continue;
 			}
             
+			if (strcmp(argv[cnt], "--link-alpha") == 0) {
+				params.link_alpha = true;
+				continue;
+			}
+
 			if (argv[cnt][0] == '-') {
                 string err = "Invalid \"";
                 err += argv[cnt];
@@ -2876,8 +2957,7 @@ void usage_iqtree(char* argv[], bool full_command) {
             << "  -sp <partition_file> Edge-unlinked partition model (like -M option of RAxML)" << endl
             << "  -t <start_tree_file> | BIONJ | RANDOM" << endl
             << "                       Starting tree (default: 100 parsimony trees and BIONJ)" << endl
-            << "  -te <user_tree_file> Evaluating a fixed user tree (no tree search performed)" << endl
-            << "  -z <trees_file>      Evaluating user trees at the end (can be used with -t, -te)" << endl
+            << "  -te <user_tree_file> Like -t but fixing user tree (no tree search performed)" << endl
             << "  -o <outgroup_taxon>  Outgroup taxon name for writing .treefile" << endl
             << "  -pre <PREFIX>        Using <PREFIX> for output files (default: aln/partition)" << endl
 #ifdef _OPENMP
@@ -2900,6 +2980,7 @@ void usage_iqtree(char* argv[], bool full_command) {
             << endl << "ULTRAFAST BOOTSTRAP:" << endl
             << "  -bb <#replicates>    Ultrafast bootstrap (>=1000)" << endl
             << "  -wbt                 Write bootstrap trees to .ufboot file (default: none)" << endl
+            << "  -wbtl                Like -wbt but also writing branch lengths" << endl
 //            << "  -n <#iterations>     Minimum number of iterations (default: 100)" << endl
             << "  -nm <#iterations>    Maximum number of iterations (default: 1000)" << endl
 			<< "  -nstep <#iterations> #Iterations for UFBoot stopping rule (default: 100)" << endl
@@ -2909,17 +2990,21 @@ void usage_iqtree(char* argv[], bool full_command) {
             << "  -b <#replicates>     Bootstrap + ML tree + consensus tree (>=100)" << endl
             << "  -bc <#replicates>    Bootstrap + consensus tree" << endl
             << "  -bo <#replicates>    Bootstrap only" << endl
-            << "  -t <threshold>       Minimum bootstrap support [0...1) for consensus tree" << endl
+//            << "  -t <threshold>       Minimum bootstrap support [0...1) for consensus tree" << endl
             << endl << "SINGLE BRANCH TEST:" << endl
             << "  -alrt <#replicates>  SH-like approximate likelihood ratio test (SH-aLRT)" << endl
+            << "  -alrt 0              Parametric aLRT test (Anisimova and Gascuel 2006)" << endl
+            << "  -abayes              approximate Bayes test (Anisimova et al. 2011)" << endl
             << "  -lbp <#replicates>   Fast local bootstrap probabilities" << endl
             << endl << "AUTOMATIC MODEL SELECTION:" << endl
             << "  -m TESTONLY          Standard model selection (like jModelTest, ProtTest)" << endl
             << "  -m TEST              Like -m TESTONLY but followed by tree reconstruction" << endl
-            << "  -m TESTNEWONLY       New model selection with FreeRate model replacing I+G" << endl
+            << "  -m TESTNEWONLY       New model selection including FreeRate (+R) heterogeneity" << endl
             << "  -m TESTNEW           Like -m TESTNEWONLY but followed by tree reconstruction" << endl
             << "  -m TESTMERGEONLY     Select best-fit partition scheme (like PartitionFinder)" << endl
             << "  -m TESTMERGE         Like -m TESTMERGEONLY but followed by tree reconstruction" << endl
+            << "  -m TESTNEWMERGEONLY  Like -m TESTMERGEONLY but includes FreeRate heterogeneity" << endl
+            << "  -m TESTNEWMERGE      Like -m TESTNEWMERGEONLY followed by tree reconstruction" << endl
             << "  -rcluster <percent>  Percentage of partition pairs (relaxed clustering alg.)" << endl
             << "  -mset program        Restrict search to models supported by other programs" << endl
             << "                       (i.e., raxml, phyml or mrbayes)" << endl
@@ -2937,6 +3022,7 @@ void usage_iqtree(char* argv[], bool full_command) {
 //            << "  -msep                Perform model selection and then rate selection" << endl
             << "  -mtree               Performing full tree search for each model considered" << endl
             << "  -mredo               Ignoring model results computed earlier (default: no)" << endl
+            << "  -madd mx1,...,mxk    List of mixture models to also consider" << endl
             << "  -mdef <nexus_file>   A model definition NEXUS file (see Manual)" << endl
 
             << endl << "SUBSTITUTION MODEL:" << endl
@@ -2971,6 +3057,7 @@ void usage_iqtree(char* argv[], bool full_command) {
             << "                       number of categories (default: n=4)" << endl
             << "  -a <Gamma_shape>     Gamma shape parameter for site rates (default: estimate)" << endl
             << "  -gmedian             Computing mean for Gamma rate category (default: mean)" << endl
+            << "  --test-alpha         More thorough estimation for +I+G model parameters" << endl
             << "  -i <p_invar>         Proportion of invariable sites (default: estimate)" << endl
             << "  -mh                  Computing site-specific rates to .mhrate file using" << endl
             << "                       Meyer & von Haeseler (2003) method" << endl
@@ -3009,7 +3096,8 @@ void usage_iqtree(char* argv[], bool full_command) {
             << "                       stored in <treefile> and <treefile2>" << endl
             << "  -rf_adj              Computing RF distances of adjacent trees in <treefile>" << endl
             << endl << "TREE TOPOLOGY TEST:" << endl
-            << "  -zb <#replicates>    BP,KH,SH,ELW tests with RELL for trees passed via -z" << endl
+            << "  -z <trees_file>      Evaluating a set of user trees" << endl
+            << "  -zb <#replicates>    Performing BP,KH,SH,ELW tests for trees passed via -z" << endl
             << "  -zw                  Also performing weighted-KH and weighted-SH tests" << endl
             << endl;
 
@@ -3024,10 +3112,13 @@ void usage_iqtree(char* argv[], bool full_command) {
 
 			cout << endl << "MISCELLANEOUS:" << endl
 		    << "  -wt                  Write locally optimal trees into .treels file" << endl
-			<< "  -fixbr               Fix branch lengths of <treefile>." << endl
-            << "                       Used with -n 0 to compute log-likelihood of <treefile>" << endl
-			<< "  -wsl                 Writing site log-likelihoods to .sitelh file" << endl
-            << "  -wslg                Writing site log-likelihoods per Gamma category" << endl
+			<< "  -blfix               Fix branch lengths of user tree passed via -te" << endl
+			<< "  -blmin               Min branch length for optimization (default 0.000001)" << endl
+			<< "  -blmax               Max branch length for optimization (default 100)" << endl
+			<< "  -wsl                 Write site log-likelihoods to .sitelh file" << endl
+            << "  -wslr                Write site log-likelihoods per rate category" << endl
+            << "  -wslm                Write site log-likelihoods per mixture class" << endl
+            << "  -wslmr               Write site log-likelihoods per mixture+rate class" << endl
             << "  -fconst f1,...,fN    Add constant patterns into alignment (N=#nstates)" << endl;
 //            << "  -d <file>            Reading genetic distances from file (default: JC)" << endl
 //			<< "  -d <outfile>         Calculate the distance matrix inferred from tree" << endl
@@ -3652,3 +3743,9 @@ void print_stacktrace(ostream &out, unsigned int max_frames)
 }
 
 #endif // WIN32
+
+bool memcmpcpy(void * destination, const void * source, size_t num) {
+    bool diff = (memcmp(destination, source, num) != 0);
+    memcpy(destination, source, num);
+    return diff;
+}
diff --git a/tools.h b/tools.h
index b1a1d02..b7808f3 100644
--- a/tools.h
+++ b/tools.h
@@ -402,6 +402,10 @@ enum LhMemSave {
 	LM_DETECT, LM_ALL_BRANCH, LM_PER_NODE
 };
 
+enum SiteLoglType {
+    WSL_NONE, WSL_SITE, WSL_RATECAT, WSL_MIXTURE, WSL_MIXTURE_RATECAT
+};
+
 /** maximum number of newton-raphson steps for NNI branch evaluation */
 extern int NNI_MAX_NR_STEP;
 
@@ -435,6 +439,16 @@ public:
 	bool testAlpha;
 
     /**
+     *  Automatic adjust the log-likelihood espilon using some heuristic
+     */
+    bool testAlphaEpsAdaptive;
+
+    /**
+     *  Use random starting points for alpha
+     */
+    bool randomAlpha;
+
+    /**
      *  Logl epsilon to test for initial alpha and pinvar values.
      *  This does not need to be small (default value = 100)
      */
@@ -1043,6 +1057,11 @@ public:
     double split_threshold;
 
     /**
+        thresholds of split frequency with back-slash separator
+     */
+    char* split_threshold_str;
+
+    /**
             threshold of split weight, splits with weight less than or equal to threshold will be discarded
      */
     double split_weight_threshold;
@@ -1108,6 +1127,9 @@ public:
     /** set of models for testing */
     char *model_set;
 
+    /** set of models to be added into default set */
+    char *model_extra_set;
+
     /** subset of models for testing, e.g. viral, mitochondrial */
     char *model_subset;
 
@@ -1193,6 +1215,13 @@ public:
      */
     bool fixed_branch_length;
 
+    /** minimum branch length for optimization, default 0.000001 */
+    double min_branch_length;
+
+    /** maximum branch length for optimization, default 100 */
+    double max_branch_length;
+
+
     /**
             criterion to assess important quartet
      */
@@ -1239,7 +1268,7 @@ public:
     /**
         TRUE to avoid duplicated trees while writing intermediate trees
      */
-    bool avoid_duplicated_trees;
+//    bool avoid_duplicated_trees;
 
     /**
             Robinson-Foulds distance computation mode: RF_ADJACENT PAIR, RF_ALL_PAIR
@@ -1278,6 +1307,12 @@ public:
      */
     int aLRT_replicates;
 
+    /** true to perform aLRT branch test of Anisimova & Gascuel (2006) */
+    bool aLRT_test;
+
+    /** true to perform aBayes branch test of Anisimova et al (2011) */
+    bool aBayes_test;
+
     /**
             number of replicates for local bootstrap probabilities method of Adachi & Hasegawa (1996) in MOLPHY
      */
@@ -1292,11 +1327,13 @@ public:
     bool lk_no_avx;
 
     /**
-     	 	0: do not print anything
-            1: print site log-likelihood
-            2: print site log-likelihood per Gamma category
+     	 	WSL_NONE: do not print anything
+            WSL_SITE: print site log-likelihood
+            WSL_RATECAT: print site log-likelihood per rate category
+            WSL_MIXTURE: print site log-likelihood per mixture class
+            WSL_MIXTURE_RATECAT: print site log-likelihood per mixture class per rate category
      */
-    int print_site_lh;
+    SiteLoglType print_site_lh;
 
     /** TRUE to print site-specific rates, default: FALSE */
     bool print_site_rate;
@@ -1519,10 +1556,10 @@ public:
     int step_iterations;
 
     /** TRUE to store all candidate trees in memory */
-    bool store_candidate_trees;
+//    bool store_candidate_trees;
 
 	/** true to print all UFBoot trees to a file */
-	bool print_ufboot_trees;
+	int print_ufboot_trees;
 
     /****** variables for NNI cutoff heuristics ******/
 
@@ -1588,7 +1625,7 @@ public:
     ModelTestCriterion model_test_criterion;
 
     /** either MTC_AIC, MTC_AICc, MTC_BIC, or MTC_ALL to stop +R increasing categories */
-    ModelTestCriterion model_test_stop_rule;
+//    ModelTestCriterion model_test_stop_rule;
 
     /** sample size for AICc and BIC */
     int model_test_sample_size;
@@ -1610,6 +1647,9 @@ public:
 	/** TRUE to print concatenated alignment, default: false */
 	bool print_conaln;
 
+	/** TRUE to link alpha among Gamma model over partitions */
+	bool link_alpha;
+
 	/** true to count all distinct trees visited during tree search */
 	bool count_trees;
 
@@ -1856,8 +1896,9 @@ double convert_double(const char *str, int &end_pos) throw (string);
         convert comma-separated string to integer vector, with error checking
         @param str original string with integers separated by comma
         @param vec (OUT) integer vector
+        @param separator char separating elements
  */
-void convert_double_vec(const char *str, DoubleVector &vec) throw (string);
+void convert_double_vec(const char *str, DoubleVector &vec, char separator = ',') throw (string);
 
 /**
  * Convert seconds to hour, minute, second
@@ -2243,4 +2284,13 @@ inline uint32_t popcount_lauradoux(unsigned *buf, int n) {
   return bitCount;
 }
 
+/**
+ * combination of memcmp and memcpy.
+ * @param destination destination memory to copy to
+ * @param source code memory to copy from
+ * @param num number of bytes to copy
+ * @return TRUE of memory are different, FALSE if identical
+ */
+bool memcmpcpy(void * destination, const void * source, size_t num);
+
 #endif
diff --git a/vectorclass/changelog.txt b/vectorclass/changelog.txt
index dd30ddb..a27dad3 100755
--- a/vectorclass/changelog.txt
+++ b/vectorclass/changelog.txt
@@ -1,7 +1,28 @@
 change log for vectorclass.zip
 ------------------------------
+version 1.20
+  * round functions: suppress precision exception under SSE4.1 and higher
 
-2015-10-24 version 1.16
+
+2015-11-14 version 1.19
+  * fix various problems with Clang compiler
+
+
+2015-09-25 version 1.18
+  * fix compiler error for Vec8s divide_by_i(Vec8s const & x) under Clang compiler
+  * fix error in Vec4d::size() in vectorf256e.h
+
+
+2015-07-31 version 1.17
+  * improved operator > for Vec4uq
+  * more special cases in blend4q
+  * nan_code functions made static inline
+  * template parameter BTYPE renamed to BVTYPE in mathematical functions to avoid clash
+    with macro named BTYPE in winnt.h
+  * fixed bug in Vec4db constructor
+
+
+2014-10-24 version 1.16
   * workaround for problem in Clang compiler extended to version 3.09 because not fixed yet by Clang
     (vectorf128.h line 134)
   * recognize problem with Apple version of Clang reporting wrong version number
@@ -11,7 +32,7 @@ change log for vectorclass.zip
   * various minor changes
 
 
-2015-10-17 version 1.15
+2014-10-17 version 1.15
   * added files ranvec1.h and ranvec1.cpp for random number generator
   * constructors to make boolean vectors from their elements
   * constructors and = operators to broadcast boolean scalar into boolean vectors
diff --git a/vectorclass/special.zip b/vectorclass/special.zip
index dfb1e13..3f3ce57 100755
Binary files a/vectorclass/special.zip and b/vectorclass/special.zip differ
diff --git a/vectorclass/vectorclass.h b/vectorclass/vectorclass.h
index 6509bca..0368ef8 100755
--- a/vectorclass/vectorclass.h
+++ b/vectorclass/vectorclass.h
@@ -1,8 +1,8 @@
 /****************************  vectorclass.h   ********************************
 * Author:        Agner Fog
 * Date created:  2012-05-30
-* Last modified: 2014-10-24
-* Version:       1.16
+* Last modified: 2015-11-07
+* Version:       1.19
 * Project:       vector classes
 * Description:
 * Header file defining vector classes as interface to intrinsic functions 
@@ -22,7 +22,7 @@
 *
 * For detailed instructions, see VectorClass.pdf
 *
-* (c) Copyright 2012 - 2014 GNU General Public License www.gnu.org/licenses
+* (c) Copyright 2012 - 2015 GNU General Public License www.gnu.org/licenses
 ******************************************************************************/
 #ifndef VECTORCLASS_H
 #define VECTORCLASS_H  116
diff --git a/vectorclass/vectorclass.pdf b/vectorclass/vectorclass.pdf
index 87bcdfc..91e66c8 100755
Binary files a/vectorclass/vectorclass.pdf and b/vectorclass/vectorclass.pdf differ
diff --git a/vectorclass/vectorf128.h b/vectorclass/vectorf128.h
index 0c12ece..86fca47 100755
--- a/vectorclass/vectorf128.h
+++ b/vectorclass/vectorf128.h
@@ -1,8 +1,8 @@
 /****************************  vectorf128.h   *******************************
 * Author:        Agner Fog
 * Date created:  2012-05-30
-* Last modified: 2014-10-24
-* Version:       1.16
+* Last modified: 2015-11-27
+* Version:       1.20
 * Project:       vector classes
 * Description:
 * Header file defining floating point vector classes as interface to 
@@ -30,7 +30,7 @@
 *
 * For detailed instructions, see VectorClass.pdf
 *
-* (c) Copyright 2012 - 2014 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2012 - 2015 GNU General Public License http://www.gnu.org/licenses
 *****************************************************************************/
 #ifndef VECTORF128_H
 #define VECTORF128_H
@@ -940,7 +940,7 @@ static inline Vec4f round(Vec4f const & a) __attribute__ ((optimize("-fno-unsafe
 // function round: round to nearest integer (even). (result as float vector)
 static inline Vec4f round(Vec4f const & a) {
 #if INSTRSET >= 5   // SSE4.1 supported
-    return _mm_round_ps(a, 0);
+    return _mm_round_ps(a, 8);
 #else // SSE2. Use magic number method
     // Note: assume MXCSR control register is set to rounding
     // (don't use conversion to int, it will limit the value to +/- 2^31)
@@ -960,7 +960,7 @@ static inline Vec4f round(Vec4f const & a) {
 // function truncate: round towards zero. (result as float vector)
 static inline Vec4f truncate(Vec4f const & a) {
 #if INSTRSET >= 5   // SSE4.1 supported
-    return _mm_round_ps(a, 3);
+    return _mm_round_ps(a, 3+8);
 #else  // SSE2. Use magic number method (conversion to int would limit the value to 2^31)
     uint32_t t1 = _mm_getcsr();        // MXCSR
     uint32_t t2 = t1 | (3 << 13);      // bit 13-14 = 11
@@ -974,7 +974,7 @@ static inline Vec4f truncate(Vec4f const & a) {
 // function floor: round towards minus infinity. (result as float vector)
 static inline Vec4f floor(Vec4f const & a) {
 #if INSTRSET >= 5   // SSE4.1 supported
-    return _mm_round_ps(a, 1);
+    return _mm_round_ps(a, 1+8);
 #else  // SSE2. Use magic number method (conversion to int would limit the value to 2^31)
     uint32_t t1 = _mm_getcsr();        // MXCSR
     uint32_t t2 = t1 | (1 << 13);      // bit 13-14 = 01
@@ -988,7 +988,7 @@ static inline Vec4f floor(Vec4f const & a) {
 // function ceil: round towards plus infinity. (result as float vector)
 static inline Vec4f ceil(Vec4f const & a) {
 #if INSTRSET >= 5   // SSE4.1 supported
-    return _mm_round_ps(a, 2);
+    return _mm_round_ps(a, 2+8);
 #else  // SSE2. Use magic number method (conversion to int would limit the value to 2^31)
     uint32_t t1 = _mm_getcsr();        // MXCSR
     uint32_t t2 = t1 | (2 << 13);      // bit 13-14 = 10
@@ -1099,7 +1099,8 @@ static inline Vec4i exponent(Vec4f const & a) {
 
 // Extract the fraction part of a floating point number
 // a = 2^exponent(a) * fraction(a), except for a = 0
-// fraction(1.0f) = 1.0f, fraction(5.0f) = 1.25f 
+// fraction(1.0f) = 1.0f, fraction(5.0f) = 1.25f
+// NOTE: The name fraction clashes with an ENUM in MAC XCode CarbonCore script.h !
 static inline Vec4f fraction(Vec4f const & a) {
     Vec4ui t1 = _mm_castps_si128(a);   // reinterpret as 32-bit integer
     Vec4ui t2 = Vec4ui((t1 & 0x007FFFFF) | 0x3F800000); // set exponent to 0 + bias
@@ -1436,7 +1437,7 @@ DOZERO:
 template <int i0, int i1, int i2, int i3>
 static inline Vec4f change_sign(Vec4f const & a) {
     if ((i0 | i1 | i2 | i3) == 0) return a;
-    __m128i mask = constant4i<i0 ? 0x80000000 : 0, i1 ? 0x80000000 : 0, i2 ? 0x80000000 : 0, i3 ? 0x80000000 : 0>();
+    __m128i mask = constant4i<i0 ? (int)0x80000000 : 0, i1 ? (int)0x80000000 : 0, i2 ? (int)0x80000000 : 0, i3 ? (int)0x80000000 : 0>();
     return  _mm_xor_ps(a, _mm_castsi128_ps(mask));     // flip sign bits
 }
 
@@ -1901,7 +1902,7 @@ static inline Vec4f round(Vec4f const & a) __attribute__ ((optimize("-fno-unsafe
 // function round: round to nearest integer (even). (result as double vector)
 static inline Vec2d round(Vec2d const & a) {
 #if INSTRSET >= 5   // SSE4.1 supported
-    return _mm_round_pd(a, 0);
+    return _mm_round_pd(a, 0+8);
 #else // SSE2. Use magic number method
     // Note: assume MXCSR control register is set to rounding
     // (don't use conversion to int, it will limit the value to +/- 2^31)
@@ -1920,7 +1921,7 @@ static inline Vec2d round(Vec2d const & a) {
 static inline Vec2d truncate(Vec2d const & a) {
 // (note: may fail on MS Visual Studio 2008, works in later versions)
 #if INSTRSET >= 5   // SSE4.1 supported
-    return _mm_round_pd(a, 3);
+    return _mm_round_pd(a, 3+8);
 #else  // SSE2. Use magic number method (conversion to int would limit the value to 2^31)
     uint32_t t1 = _mm_getcsr();        // MXCSR
     uint32_t t2 = t1 | (3 << 13);      // bit 13-14 = 11
@@ -1935,7 +1936,7 @@ static inline Vec2d truncate(Vec2d const & a) {
 // (note: may fail on MS Visual Studio 2008, works in later versions)
 static inline Vec2d floor(Vec2d const & a) {
 #if INSTRSET >= 5   // SSE4.1 supported
-    return _mm_round_pd(a, 1);
+    return _mm_round_pd(a, 1+8);
 #else  // SSE2. Use magic number method (conversion to int would limit the value to 2^31)
     uint32_t t1 = _mm_getcsr();        // MXCSR
     uint32_t t2 = t1 | (1 << 13);      // bit 13-14 = 01
@@ -1949,7 +1950,7 @@ static inline Vec2d floor(Vec2d const & a) {
 // function ceil: round towards plus infinity. (result as double vector)
 static inline Vec2d ceil(Vec2d const & a) {
 #if INSTRSET >= 5   // SSE4.1 supported
-    return _mm_round_pd(a, 2);
+    return _mm_round_pd(a, 2+8);
 #else  // SSE2. Use magic number method (conversion to int would limit the value to 2^31)
     uint32_t t1 = _mm_getcsr();        // MXCSR
     uint32_t t2 = t1 | (2 << 13);      // bit 13-14 = 10
@@ -2125,7 +2126,8 @@ static inline Vec2q exponent(Vec2d const & a) {
 
 // Extract the fraction part of a floating point number
 // a = 2^exponent(a) * fraction(a), except for a = 0
-// fraction(1.0) = 1.0, fraction(5.0) = 1.25 
+// fraction(1.0) = 1.0, fraction(5.0) = 1.25
+// NOTE: The name fraction clashes with an ENUM in MAC XCode CarbonCore script.h !
 static inline Vec2d fraction(Vec2d const & a) {
     Vec2uq t1 = _mm_castpd_si128(a);   // reinterpret as 64-bit integer
     Vec2uq t2 = Vec2uq((t1 & 0x000FFFFFFFFFFFFFll) | 0x3FF0000000000000ll); // set exponent to 0 + bias
@@ -2371,7 +2373,7 @@ static inline Vec2d blend2d(Vec2d const & a, Vec2d const & b) {
 template <int i0, int i1>
 static inline Vec2d change_sign(Vec2d const & a) {
     if ((i0 | i1) == 0) return a;
-    __m128i mask = constant4i<0, i0 ? 0x80000000 : 0, 0, i1 ? 0x80000000 : 0> ();
+    __m128i mask = constant4i<0, i0 ? (int)0x80000000 : 0, 0, i1 ? (int)0x80000000 : 0> ();
     return  _mm_xor_pd(a, _mm_castsi128_pd(mask));     // flip sign bits
 }
 
diff --git a/vectorclass/vectorf256.h b/vectorclass/vectorf256.h
index 75bc267..dc126a0 100755
--- a/vectorclass/vectorf256.h
+++ b/vectorclass/vectorf256.h
@@ -1,8 +1,8 @@
 /****************************  vectorf256.h   *******************************
 * Author:        Agner Fog
 * Date created:  2012-05-30
-* Last modified: 2014-10-22
-* Version:       1.16
+* Last modified: 2015-11-27
+* Version:       1.20
 * Project:       vector classes
 * Description:
 * Header file defining 256-bit floating point vector classes as interface
@@ -27,7 +27,7 @@
 *
 * For detailed instructions, see VectorClass.pdf
 *
-* (c) Copyright 2012 - 2014 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2012 - 2015 GNU General Public License http://www.gnu.org/licenses
 *****************************************************************************/
 
 // check combination of header files
@@ -330,7 +330,7 @@ public:
 #else
         __m128 blo = _mm_castsi128_ps(_mm_setr_epi32(-(int)b0, -(int)b0, -(int)b1, -(int)b1));
         __m128 bhi = _mm_castsi128_ps(_mm_setr_epi32(-(int)b2, -(int)b2, -(int)b3, -(int)b3));
-        ymm = _mm256_castps_pd(set_m128r(bhi,blo));
+        ymm = _mm256_castps_pd(set_m128r(blo, bhi));
 #endif
     }
     // Constructor to build from two Vec2db:
@@ -1001,22 +1001,22 @@ static inline Vec8f pow(Vec8f const & a, Const_int_t<n>) {
 
 // function round: round to nearest integer (even). (result as float vector)
 static inline Vec8f round(Vec8f const & a) {
-    return _mm256_round_ps(a, 0);
+    return _mm256_round_ps(a, 0+8);
 }
 
 // function truncate: round towards zero. (result as float vector)
 static inline Vec8f truncate(Vec8f const & a) {
-    return _mm256_round_ps(a, 3);
+    return _mm256_round_ps(a, 3+8);
 }
 
 // function floor: round towards minus infinity. (result as float vector)
 static inline Vec8f floor(Vec8f const & a) {
-    return _mm256_round_ps(a, 1);
+    return _mm256_round_ps(a, 1+8);
 }
 
 // function ceil: round towards plus infinity. (result as float vector)
 static inline Vec8f ceil(Vec8f const & a) {
-    return _mm256_round_ps(a, 2);
+    return _mm256_round_ps(a, 2+8);
 }
 
 #ifdef VECTORI256_H  // 256 bit integer vectors are available
@@ -1292,8 +1292,8 @@ static inline Vec8f nan8f(int n = 0x10) {
 template <int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
 static inline Vec8f change_sign(Vec8f const & a) {
     if ((i0 | i1 | i2 | i3 | i4 | i5 | i6 | i7) == 0) return a;
-    __m256 mask = constant8f<i0 ? 0x80000000 : 0, i1 ? 0x80000000 : 0, i2 ? 0x80000000 : 0, i3 ? 0x80000000 : 0,
-        i4 ? 0x80000000 : 0, i5 ? 0x80000000 : 0, i6 ? 0x80000000 : 0, i7 ? 0x80000000 : 0> ();
+    __m256 mask = constant8f<i0 ? (int)0x80000000 : 0, i1 ? (int)0x80000000 : 0, i2 ? (int)0x80000000 : 0, i3 ? (int)0x80000000 : 0,
+        i4 ? (int)0x80000000 : 0, i5 ? (int)0x80000000 : 0, i6 ? (int)0x80000000 : 0, i7 ? (int)0x80000000 : 0> ();
     return _mm256_xor_ps(a, mask);
 }
 
@@ -1755,22 +1755,22 @@ static inline Vec4d pow(Vec4d const & a, Const_int_t<n>) {
 
 // function round: round to nearest integer (even). (result as double vector)
 static inline Vec4d round(Vec4d const & a) {
-    return _mm256_round_pd(a, 0);
+    return _mm256_round_pd(a, 0+8);
 }
 
 // function truncate: round towards zero. (result as double vector)
 static inline Vec4d truncate(Vec4d const & a) {
-    return _mm256_round_pd(a, 3);
+    return _mm256_round_pd(a, 3+8);
 }
 
 // function floor: round towards minus infinity. (result as double vector)
 static inline Vec4d floor(Vec4d const & a) {
-    return _mm256_round_pd(a, 1);
+    return _mm256_round_pd(a, 1+8);
 }
 
 // function ceil: round towards plus infinity. (result as double vector)
 static inline Vec4d ceil(Vec4d const & a) {
-    return _mm256_round_pd(a, 2);
+    return _mm256_round_pd(a, 2+8);
 }
 
 // function round_to_int: round to nearest integer (even). (result as integer vector)
@@ -2085,7 +2085,7 @@ static inline Vec4d nan4d(int n = 0x10) {
 template <int i0, int i1, int i2, int i3>
 static inline Vec4d change_sign(Vec4d const & a) {
     if ((i0 | i1 | i2 | i3) == 0) return a;
-    __m256 mask = constant8f<0, i0 ? 0x80000000 : 0, 0, i1 ? 0x80000000 : 0, 0, i2 ? 0x80000000 : 0, 0, i3 ? 0x80000000 : 0> ();
+    __m256 mask = constant8f<0, i0 ? (int)0x80000000 : 0, 0, i1 ? (int)0x80000000 : 0, 0, i2 ? (int)0x80000000 : 0, 0, i3 ? (int)0x80000000 : 0> ();
     return _mm256_xor_pd(a, _mm256_castps_pd(mask));
 }
 
diff --git a/vectorclass/vectorf256e.h b/vectorclass/vectorf256e.h
index 6c9f4b7..39c4410 100755
--- a/vectorclass/vectorf256e.h
+++ b/vectorclass/vectorf256e.h
@@ -1,8 +1,8 @@
 /****************************  vectorf256e.h   *******************************
 * Author:        Agner Fog
 * Date created:  2012-05-30
-* Last modified: 2014-10-22
-* Version:       1.16
+* Last modified: 2015-08-25
+* Version:       1.18
 * Project:       vector classes
 * Description:
 * Header file defining 256-bit floating point vector classes as interface
@@ -16,7 +16,7 @@
 *
 * For detailed instructions, see VectorClass.pdf
 *
-* (c) Copyright 2012 - 2014 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2012 - 2015 GNU General Public License http://www.gnu.org/licenses
 *****************************************************************************/
 
 // check combination of header files
@@ -1199,7 +1199,7 @@ public:
         return y1;
     }
     static int size () {
-        return 2;
+        return 4;
     }
 };
 
diff --git a/vectorclass/vectorf512.h b/vectorclass/vectorf512.h
index 66c1263..5fab837 100755
--- a/vectorclass/vectorf512.h
+++ b/vectorclass/vectorf512.h
@@ -1,8 +1,8 @@
 /****************************  vectorf512.h   *******************************
 * Author:        Agner Fog
 * Date created:  2014-07-23
-* Last modified: 2014-10-22
-* Version:       1.16
+* Last modified: 2015-11-27
+* Version:       1.20
 * Project:       vector classes
 * Description:
 * Header file defining floating point vector classes as interface to intrinsic 
@@ -23,7 +23,7 @@
 *
 * For detailed instructions, see VectorClass.pdf
 *
-* (c) Copyright 2014 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2015 GNU General Public License http://www.gnu.org/licenses
 *****************************************************************************/
 
 // check combination of header files
@@ -767,27 +767,26 @@ static inline Vec16f pow(Vec16f const & a, Const_int_t<n>) {
 
 // function round: round to nearest integer (even). (result as float vector)
 static inline Vec16f round(Vec16f const & a) {
-    return _mm512_roundscale_ps(a, 0);
+    return _mm512_roundscale_ps(a, 0+8);
 }
 
 // function truncate: round towards zero. (result as float vector)
 static inline Vec16f truncate(Vec16f const & a) {
-    return _mm512_roundscale_ps(a, 3);
+    return _mm512_roundscale_ps(a, 3+8);
 }
 
 // function floor: round towards minus infinity. (result as float vector)
 static inline Vec16f floor(Vec16f const & a) {
-    return _mm512_roundscale_ps(a, 1);
+    return _mm512_roundscale_ps(a, 1+8);
 }
 
 // function ceil: round towards plus infinity. (result as float vector)
 static inline Vec16f ceil(Vec16f const & a) {
-    return _mm512_roundscale_ps(a, 2);
+    return _mm512_roundscale_ps(a, 2+8);
 }
 
 // function round_to_int: round to nearest integer (even). (result as integer vector)
 static inline Vec16i round_to_int(Vec16f const & a) {
-    // Note: assume MXCSR control register is set to rounding
     return _mm512_cvt_roundps_epi32(a, _MM_FROUND_NO_EXC);
 }
 
@@ -1438,8 +1437,8 @@ static inline Vec8d ceil(Vec8d const & a) {
 
 // function round_to_int: round to nearest integer (even). (result as integer vector)
 static inline Vec8i round_to_int(Vec8d const & a) {
-    // Note: assume MXCSR control register is set to rounding
-    return _mm512_cvtpd_epi32(a);
+    //return _mm512_cvtpd_epi32(a);
+    return _mm512_cvt_roundpd_epi32(a, __MM_FROUND_NO_EXC);
 }
 
 // function truncate_to_int: round towards zero. (result as integer vector)
@@ -1473,11 +1472,11 @@ static inline Vec8q round_to_int64(Vec8d const & a) {
 // function round_to_int64_limited: round to nearest integer (even)
 // result as 64-bit integer vector, but with limited range
 static inline Vec8q round_to_int64_limited(Vec8d const & a) {
-    // Note: assume MXCSR control register is set to rounding
-    Vec4q   b = _mm512_cvtpd_epi32(a);                     // round to 32-bit integers
-    __m512i c = permute8q<0,-256,1,-256,2,-256,3,-256>(Vec8q(b,b));      // get bits 64-127 to position 128-191, etc.
-    __m512i s = _mm512_srai_epi32(c, 31);                  // sign extension bits
-    return      _mm512_unpacklo_epi32(c, s);               // interleave with sign extensions
+    //Vec4q   b = _mm512_cvtpd_epi32(a);                             // round to 32-bit integers
+    Vec4q   b = _mm512_cvt_roundpd_epi32(a, __MM_FROUND_NO_EXC);     // round to 32-bit integers   
+    __m512i c = permute8q<0,-256,1,-256,2,-256,3,-256>(Vec8q(b,b));  // get bits 64-127 to position 128-191, etc.
+    __m512i s = _mm512_srai_epi32(c, 31);                            // sign extension bits
+    return      _mm512_unpacklo_epi32(c, s);                         // interleave with sign extensions
 }
 
 // function to_double: convert integer vector elements to double vector (inefficient)
diff --git a/vectorclass/vectori128.h b/vectorclass/vectori128.h
index 8ec5df0..73f53d5 100755
--- a/vectorclass/vectori128.h
+++ b/vectorclass/vectori128.h
@@ -1,8 +1,8 @@
 /****************************  vectori128.h   *******************************
 * Author:        Agner Fog
 * Date created:  2012-05-30
-* Last modified: 2014-10-24
-* Version:       1.16
+* Last modified: 2015-11-07
+* Version:       1.19
 * Project:       vector classes
 * Description:
 * Header file defining integer vector classes as interface to intrinsic 
@@ -39,7 +39,7 @@
 *
 * For detailed instructions, see VectorClass.pdf
 *
-* (c) Copyright 2012 - 2013 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2012 - 2015 GNU General Public License http://www.gnu.org/licenses
 *****************************************************************************/
 #ifndef VECTORI128_H
 #define VECTORI128_H
@@ -864,25 +864,25 @@ class Vec16uc : public Vec16c {
 public:
     // Default constructor:
     Vec16uc() {
-    };
+    }
     // Constructor to broadcast the same value into all elements:
     Vec16uc(uint32_t i) {
         xmm = _mm_set1_epi8((char)i);
-    };
+    }
     // Constructor to build from all elements:
     Vec16uc(uint8_t i0, uint8_t i1, uint8_t i2, uint8_t i3, uint8_t i4, uint8_t i5, uint8_t i6, uint8_t i7,
         uint8_t i8, uint8_t i9, uint8_t i10, uint8_t i11, uint8_t i12, uint8_t i13, uint8_t i14, uint8_t i15) {
         xmm = _mm_setr_epi8(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15);
-    };
+    }
     // Constructor to convert from type __m128i used in intrinsics:
     Vec16uc(__m128i const & x) {
         xmm = x;
-    };
+    }
     // Assignment operator to convert from type __m128i used in intrinsics:
     Vec16uc & operator = (__m128i const & x) {
         xmm = x;
         return *this;
-    };
+    }
     // Member function to load from array (unaligned)
     Vec16uc & load(void const * p) {
         xmm = _mm_loadu_si128((__m128i const*)p);
@@ -1080,28 +1080,28 @@ class Vec8s : public Vec128b {
 public:
     // Default constructor:
     Vec8s() {
-    };
+    }
     // Constructor to broadcast the same value into all elements:
     Vec8s(int i) {
         xmm = _mm_set1_epi16((int16_t)i);
-    };
+    }
     // Constructor to build from all elements:
     Vec8s(int16_t i0, int16_t i1, int16_t i2, int16_t i3, int16_t i4, int16_t i5, int16_t i6, int16_t i7) {
         xmm = _mm_setr_epi16(i0, i1, i2, i3, i4, i5, i6, i7);
-    };
+    }
     // Constructor to convert from type __m128i used in intrinsics:
     Vec8s(__m128i const & x) {
         xmm = x;
-    };
+    }
     // Assignment operator to convert from type __m128i used in intrinsics:
     Vec8s & operator = (__m128i const & x) {
         xmm = x;
         return *this;
-    };
+    }
     // Type cast operator to convert to __m128i used in intrinsics
     operator __m128i() const {
         return xmm;
-    };
+    }
     // Member function to load from array (unaligned)
     Vec8s & load(void const * p) {
         xmm = _mm_loadu_si128((__m128i const*)p);
@@ -1184,7 +1184,7 @@ public:
             xmm = _mm_insert_epi16(xmm,value,7);  break;
         }
         return *this;
-    };
+    }
     // Member function extract a single element from vector
     // Note: This function is inefficient. Use store function if extracting more than one element
     int16_t extract(uint32_t index) const {
@@ -1426,40 +1426,40 @@ static inline Vec8s & operator >>= (Vec8s & a, int b) {
 }
 
 // vector operator == : returns true for elements for which a == b
-static inline Vec8s operator == (Vec8s const & a, Vec8s const & b) {
+static inline Vec8sb operator == (Vec8s const & a, Vec8s const & b) {
     return _mm_cmpeq_epi16(a, b);
 }
 
 // vector operator != : returns true for elements for which a != b
-static inline Vec8s operator != (Vec8s const & a, Vec8s const & b) {
+static inline Vec8sb operator != (Vec8s const & a, Vec8s const & b) {
 #ifdef __XOP__  // AMD XOP instruction set
     return _mm_comneq_epi16(a,b);
 #else  // SSE2 instruction set
-    return Vec8s (~(a == b));
+    return Vec8sb (~(a == b));
 #endif
 }
 
 // vector operator > : returns true for elements for which a > b
-static inline Vec8s operator > (Vec8s const & a, Vec8s const & b) {
+static inline Vec8sb operator > (Vec8s const & a, Vec8s const & b) {
     return _mm_cmpgt_epi16(a, b);
 }
 
 // vector operator < : returns true for elements for which a < b
-static inline Vec8s operator < (Vec8s const & a, Vec8s const & b) {
+static inline Vec8sb operator < (Vec8s const & a, Vec8s const & b) {
     return b > a;
 }
 
 // vector operator >= : returns true for elements for which a >= b (signed)
-static inline Vec8s operator >= (Vec8s const & a, Vec8s const & b) {
+static inline Vec8sb operator >= (Vec8s const & a, Vec8s const & b) {
 #ifdef __XOP__  // AMD XOP instruction set
     return _mm_comge_epi16(a,b);
 #else  // SSE2 instruction set
-    return Vec8s (~(b > a));
+    return Vec8sb (~(b > a));
 #endif
 }
 
 // vector operator <= : returns true for elements for which a <= b (signed)
-static inline Vec8s operator <= (Vec8s const & a, Vec8s const & b) {
+static inline Vec8sb operator <= (Vec8s const & a, Vec8s const & b) {
     return b >= a;
 }
 
@@ -1641,24 +1641,24 @@ class Vec8us : public Vec8s {
 public:
     // Default constructor:
     Vec8us() {
-    };
+    }
     // Constructor to broadcast the same value into all elements:
     Vec8us(uint32_t i) {
         xmm = _mm_set1_epi16((int16_t)i);
-    };
+    }
     // Constructor to build from all elements:
     Vec8us(uint16_t i0, uint16_t i1, uint16_t i2, uint16_t i3, uint16_t i4, uint16_t i5, uint16_t i6, uint16_t i7) {
         xmm = _mm_setr_epi16(i0, i1, i2, i3, i4, i5, i6, i7);
-    };
+    }
     // Constructor to convert from type __m128i used in intrinsics:
     Vec8us(__m128i const & x) {
         xmm = x;
-    };
+    }
     // Assignment operator to convert from type __m128i used in intrinsics:
     Vec8us & operator = (__m128i const & x) {
         xmm = x;
         return *this;
-    };
+    }
     // Member function to load from array (unaligned)
     Vec8us & load(void const * p) {
         xmm = _mm_loadu_si128((__m128i const*)p);
@@ -1674,7 +1674,7 @@ public:
     Vec8us const & insert(uint32_t index, uint16_t value) {
         Vec8s::insert(index, value);
         return *this;
-    };
+    }
     // Member function extract a single element from vector
     uint16_t extract(uint32_t index) const {
         return Vec8s::extract(index);
@@ -1740,9 +1740,8 @@ static inline Vec8s operator >= (Vec8us const & a, Vec8us const & b) {
     __m128i max_ab = _mm_max_epu16(a,b);                   // max(a,b), unsigned
     return _mm_cmpeq_epi16(a,max_ab);                      // a == max(a,b)
 #else  // SSE2 instruction set
-    __m128i sub1 = _mm_sub_epi16(a,b);                     // a-b, wraparound
-    __m128i sub2 = _mm_subs_epu16(a,b);                    // a-b, saturated
-    return  _mm_cmpeq_epi16(sub1,sub2);                    // sub1 == sub2 if no carry
+    __m128i s = _mm_subs_epu16(b,a);                       // b-a, saturated
+    return  _mm_cmpeq_epi16(s, _mm_setzero_si128());       // s == 0 
 #endif
 }
 
@@ -2463,24 +2462,24 @@ class Vec4ui : public Vec4i {
 public:
     // Default constructor:
     Vec4ui() {
-    };
+    }
     // Constructor to broadcast the same value into all elements:
     Vec4ui(uint32_t i) {
         xmm = _mm_set1_epi32(i);
-    };
+    }
     // Constructor to build from all elements:
     Vec4ui(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3) {
         xmm = _mm_setr_epi32(i0, i1, i2, i3);
-    };
+    }
     // Constructor to convert from type __m128i used in intrinsics:
     Vec4ui(__m128i const & x) {
         xmm = x;
-    };
+    }
     // Assignment operator to convert from type __m128i used in intrinsics:
     Vec4ui & operator = (__m128i const & x) {
         xmm = x;
         return *this;
-    };
+    }
     // Member function to load from array (unaligned)
     Vec4ui & load(void const * p) {
         xmm = _mm_loadu_si128((__m128i const*)p);
@@ -3296,24 +3295,24 @@ class Vec2uq : public Vec2q {
 public:
     // Default constructor:
     Vec2uq() {
-    };
+    }
     // Constructor to broadcast the same value into all elements:
     Vec2uq(uint64_t i) {
         xmm = Vec2q(i);
-    };
+    }
     // Constructor to build from all elements:
     Vec2uq(uint64_t i0, uint64_t i1) {
         xmm = Vec2q(i0, i1);
-    };
+    }
     // Constructor to convert from type __m128i used in intrinsics:
     Vec2uq(__m128i const & x) {
         xmm = x;
-    };
+    }
     // Assignment operator to convert from type __m128i used in intrinsics:
     Vec2uq & operator = (__m128i const & x) {
         xmm = x;
         return *this;
-    };
+    }
     // Member function to load from array (unaligned)
     Vec2uq & load(void const * p) {
         xmm = _mm_loadu_si128((__m128i const*)p);
@@ -3386,8 +3385,14 @@ static inline Vec2uq operator << (Vec2uq const & a, int32_t b) {
 
 // vector operator > : returns true for elements for which a > b (unsigned)
 static inline Vec2qb operator > (Vec2uq const & a, Vec2uq const & b) {
-#ifdef __XOP__  // AMD XOP instruction set
+#if defined ( __XOP__ ) // AMD XOP instruction set
     return Vec2q(_mm_comgt_epu64(a,b));
+#elif INSTRSET >= 6 // SSE4.2
+    __m128i sign64 = constant4i<0,(int32_t)0x80000000,0,(int32_t)0x80000000>();
+    __m128i aflip  = _mm_xor_si128(a, sign64);
+    __m128i bflip  = _mm_xor_si128(b, sign64);
+    Vec2q   cmp    = _mm_cmpgt_epi64(aflip,bflip);
+    return Vec2qb(cmp);
 #else  // SSE2 instruction set
     __m128i sign32  = _mm_set1_epi32(0x80000000);          // sign bit of each dword
     __m128i aflip   = _mm_xor_si128(a,sign32);             // a with sign bits flipped
@@ -3557,16 +3562,16 @@ template <int i0, int i1, int i2, int i3>
 static inline Vec4i permute4i(Vec4i const & a) {
 
     // Combine all the indexes into a single bitfield, with 4 bits for each
-    const int m1 = (i0&3) | (i1&3)<<4 | (i2&3)<<8 | (i3&3)<<12; 
+    const uint32_t m1 = (i0&3) | (i1&3)<<4 | (i2&3)<<8 | (i3&3)<<12; 
 
     // Mask to zero out negative indexes
-    const int mz = (i0<0?0:0xF) | (i1<0?0:0xF)<<4 | (i2<0?0:0xF)<<8 | (i3<0?0:0xF)<<12;
+    const uint32_t mz = (i0<0?0:0xF) | (i1<0?0:0xF)<<4 | (i2<0?0:0xF)<<8 | (i3<0?0:0xF)<<12;
 
     // Mask indicating required zeroing of all indexes, with 4 bits for each, 0 for index = -1, 0xF for index >= 0 or -256
-    const int ssz = ((i0 & 0x80) ? 0 : 0xF) | ((i1 & 0x80) ? 0 : 0xF) << 4 | ((i2 & 0x80) ? 0 : 0xF) << 8 | ((i3 & 0x80) ? 0 : 0xF) << 12;
+    const uint32_t ssz = ((i0 & 0x80) ? 0 : 0xF) | ((i1 & 0x80) ? 0 : 0xF) << 4 | ((i2 & 0x80) ? 0 : 0xF) << 8 | ((i3 & 0x80) ? 0 : 0xF) << 12;
 
     // Mask indicating 0 for don't care, 0xF for non-negative value of required zeroing
-    const int md = mz | ~ ssz;
+    const uint32_t md = mz | ~ ssz;
 
     // Test if permutation needed
     const bool do_shuffle = ((m1 ^ 0x00003210) & mz) != 0;
@@ -5210,7 +5215,7 @@ static inline uint32_t vml_popcnt (uint32_t a) {
 
 // Define bit-scan-forward function. Gives index to lowest set bit
 #if defined (__GNUC__) || defined(__clang__)
-static inline uint32_t bit_scan_reverse (uint32_t a) __attribute__ ((pure));
+static inline uint32_t bit_scan_forward (uint32_t a) __attribute__ ((pure));
 static inline uint32_t bit_scan_forward (uint32_t a) {	
     uint32_t r;
     __asm("bsfl %1, %0" : "=r"(r) : "r"(a) : );
@@ -5832,7 +5837,7 @@ static inline Vec8s divide_by_i(Vec8s const & x) {
     Static_error_check<(d0 != 0)> Dividing_by_zero;                  // Error message if dividing by zero
     if (d0 ==  1) return  x;                                         // divide by  1
     if (d0 == -1) return -x;                                         // divide by -1
-    if (uint16_t(d0) == 0x8000u) return (x == Vec8s(0x8000)) & 1;    // prevent overflow when changing sign
+    if (uint16_t(d0) == 0x8000u) return Vec8s(x == Vec8s(0x8000)) & 1;// prevent overflow when changing sign
     // if (d > 0x7FFF || d < -0x8000) return 0;                      // not relevant when d truncated to 16 bits
     const uint16_t d1 = d0 > 0 ? d0 : -d0;                           // compile-time abs(d0)
     if ((d1 & (d1-1)) == 0) {
diff --git a/vectorclass/vectori256.h b/vectorclass/vectori256.h
index be0c869..47da1ea 100755
--- a/vectorclass/vectori256.h
+++ b/vectorclass/vectori256.h
@@ -1,8 +1,8 @@
 /****************************  vectori256.h   *******************************
 * Author:        Agner Fog
 * Date created:  2012-05-30
-* Last modified: 2014-10-16
-* Version:       1.16
+* Last modified: 2015-11-08
+* Version:       1.19
 * Project:       vector classes
 * Description:
 * Header file defining integer vector classes as interface to intrinsic 
@@ -36,7 +36,7 @@
 *
 * For detailed instructions, see VectorClass.pdf
 *
-* (c) Copyright 2012 - 2013 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2012 - 2015 GNU General Public License http://www.gnu.org/licenses
 *****************************************************************************/
 
 // check combination of header files
@@ -78,7 +78,7 @@ protected:
 public:
     // Default constructor:
     Vec256b() {
-    };
+    }
     // Constructor to broadcast the same value into all elements
     // Removed because of undesired implicit conversions
     //Vec256b(int i) {
@@ -91,12 +91,12 @@ public:
     // Constructor to convert from type __m256i used in intrinsics:
     Vec256b(__m256i const & x) {
         ymm = x;
-    };
+    }
     // Assignment operator to convert from type __m256i used in intrinsics:
     Vec256b & operator = (__m256i const & x) {
         ymm = x;
         return *this;
-    };
+    }
     // Type cast operator to convert to __m256i used in intrinsics
     operator __m256i() const {
         return ymm;
@@ -285,11 +285,11 @@ class Vec32c : public Vec256b {
 public:
     // Default constructor:
     Vec32c(){
-    };
+    }
     // Constructor to broadcast the same value into all elements:
     Vec32c(int i) {
         ymm = _mm256_set1_epi8((char)i);
-    };
+    }
     // Constructor to build from all elements:
     Vec32c(int8_t i0, int8_t i1, int8_t i2, int8_t i3, int8_t i4, int8_t i5, int8_t i6, int8_t i7,
         int8_t i8, int8_t i9, int8_t i10, int8_t i11, int8_t i12, int8_t i13, int8_t i14, int8_t i15,        
@@ -297,7 +297,7 @@ public:
         int8_t i24, int8_t i25, int8_t i26, int8_t i27, int8_t i28, int8_t i29, int8_t i30, int8_t i31) {
         ymm = _mm256_setr_epi8(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15,
             i16, i17, i18, i19, i20, i21, i22, i23, i24, i25, i26, i27, i28, i29, i30, i31);
-    };
+    }
     // Constructor to build from two Vec16c:
     Vec32c(Vec16c const & a0, Vec16c const & a1) {
         ymm = set_m128ir(a0, a1);
@@ -305,12 +305,12 @@ public:
     // Constructor to convert from type __m256i used in intrinsics:
     Vec32c(__m256i const & x) {
         ymm = x;
-    };
+    }
     // Assignment operator to convert from type __m256i used in intrinsics:
     Vec32c & operator = (__m256i const & x) {
         ymm = x;
         return *this;
-    };
+    }
     // Type cast operator to convert to __m256i used in intrinsics
     operator __m256i() const {
         return ymm;
@@ -643,11 +643,7 @@ static inline Vec32cb operator == (Vec32c const & a, Vec32c const & b) {
 
 // vector operator != : returns true for elements for which a != b
 static inline Vec32cb operator != (Vec32c const & a, Vec32c const & b) {
-#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-    return _mm256_comneq_epi8(a,b);
-#else  // AVX2 instruction set
     return Vec32cb(Vec32c(~(a == b)));
-#endif
 }
 
 // vector operator > : returns true for elements for which a > b (signed)
@@ -662,11 +658,7 @@ static inline Vec32cb operator < (Vec32c const & a, Vec32c const & b) {
 
 // vector operator >= : returns true for elements for which a >= b (signed)
 static inline Vec32cb operator >= (Vec32c const & a, Vec32c const & b) {
-#ifdef __XOP2__  // // Possible future 256-bit XOP extension ?
-    return _mm256_comge_epi8(a,b);
-#else  // SSE2 instruction set
     return Vec32cb(Vec32c(~(b > a)));
-#endif
 }
 
 // vector operator <= : returns true for elements for which a <= b (signed)
@@ -805,9 +797,6 @@ static inline Vec32c abs_saturated(Vec32c const & a) {
 // function rotate_left all elements
 // Use negative count to rotate right
 static inline Vec32c rotate_left(Vec32c const & a, int b) {
-#ifdef __XOP2__      // Possible future 256-bit XOP extension ?
-    return _mm256_rot_epi8(a,_mm256_set1_epi8(b));
-#else  // SSE2 instruction set
     __m128i bb        = _mm_cvtsi32_si128(b & 7);             // b modulo 8
     __m128i mbb       = _mm_cvtsi32_si128((8-b) & 7);         // 8-b modulo 8
     __m256i maskeven  = _mm256_set1_epi32(0x00FF00FF);        // mask for even numbered bytes
@@ -821,7 +810,6 @@ static inline Vec32c rotate_left(Vec32c const & a, int b) {
     __m256i oddrot    = _mm256_or_si256(oddleft,oddright);    // odd  bytes of a rotated
     __m256i allrot    = selectb(maskeven,evenrot,oddrot);     // all  bytes rotated
     return  allrot;
-#endif
 }
 
 
@@ -836,11 +824,11 @@ class Vec32uc : public Vec32c {
 public:
     // Default constructor:
     Vec32uc(){
-    };
+    }
     // Constructor to broadcast the same value into all elements:
     Vec32uc(uint32_t i) {
         ymm = _mm256_set1_epi8((char)i);
-    };
+    }
     // Constructor to build from all elements:
     Vec32uc(uint8_t i0, uint8_t i1, uint8_t i2, uint8_t i3, uint8_t i4, uint8_t i5, uint8_t i6, uint8_t i7,
         uint8_t i8, uint8_t i9, uint8_t i10, uint8_t i11, uint8_t i12, uint8_t i13, uint8_t i14, uint8_t i15,        
@@ -848,7 +836,7 @@ public:
         uint8_t i24, uint8_t i25, uint8_t i26, uint8_t i27, uint8_t i28, uint8_t i29, uint8_t i30, uint8_t i31) {
         ymm = _mm256_setr_epi8(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15,
             i16, i17, i18, i19, i20, i21, i22, i23, i24, i25, i26, i27, i28, i29, i30, i31);
-    };
+    }
     // Constructor to build from two Vec16uc:
     Vec32uc(Vec16uc const & a0, Vec16uc const & a1) {
         ymm = set_m128ir(a0, a1);
@@ -856,12 +844,12 @@ public:
     // Constructor to convert from type __m256i used in intrinsics:
     Vec32uc(__m256i const & x) {
         ymm = x;
-    };
+    }
     // Assignment operator to convert from type __m256i used in intrinsics:
     Vec32uc & operator = (__m256i const & x) {
         ymm = x;
         return *this;
-    };
+    }
     // Member function to load from array (unaligned)
     Vec32uc & load(void const * p) {
         ymm = _mm256_loadu_si256((__m256i const*)p);
@@ -947,11 +935,7 @@ static inline Vec32uc & operator >>= (Vec32uc & a, uint32_t b) {
 
 // vector operator >= : returns true for elements for which a >= b (unsigned)
 static inline Vec32cb operator >= (Vec32uc const & a, Vec32uc const & b) {
-#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-    return _mm256_comge_epu8(a,b);
-#else 
     return _mm256_cmpeq_epi8(_mm256_max_epu8(a,b), a); // a == max(a,b)
-#endif
 }
 
 // vector operator <= : returns true for elements for which a <= b (unsigned)
@@ -961,11 +945,7 @@ static inline Vec32cb operator <= (Vec32uc const & a, Vec32uc const & b) {
 
 // vector operator > : returns true for elements for which a > b (unsigned)
 static inline Vec32cb operator > (Vec32uc const & a, Vec32uc const & b) {
-#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-    return _mm256_comgt_epu8(a,b);
-#else  // SSE2 instruction set
     return Vec32cb(Vec32c(~(b >= a)));
-#endif
 }
 
 // vector operator < : returns true for elements for which a < b (unsigned)
@@ -1078,16 +1058,16 @@ class Vec16s : public Vec256b {
 public:
     // Default constructor:
     Vec16s() {
-    };
+    }
     // Constructor to broadcast the same value into all elements:
     Vec16s(int i) {
         ymm = _mm256_set1_epi16((int16_t)i);
-    };
+    }
     // Constructor to build from all elements:
     Vec16s(int16_t i0, int16_t i1, int16_t i2,  int16_t i3,  int16_t i4,  int16_t i5,  int16_t i6,  int16_t i7,
            int16_t i8, int16_t i9, int16_t i10, int16_t i11, int16_t i12, int16_t i13, int16_t i14, int16_t i15) {
         ymm = _mm256_setr_epi16(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15 );
-    };
+    }
     // Constructor to build from two Vec8s:
     Vec16s(Vec8s const & a0, Vec8s const & a1) {
         ymm = set_m128ir(a0, a1);
@@ -1095,16 +1075,16 @@ public:
     // Constructor to convert from type __m256i used in intrinsics:
     Vec16s(__m256i const & x) {
         ymm = x;
-    };
+    }
     // Assignment operator to convert from type __m256i used in intrinsics:
     Vec16s & operator = (__m256i const & x) {
         ymm = x;
         return *this;
-    };
+    }
     // Type cast operator to convert to __m256i used in intrinsics
     operator __m256i() const {
         return ymm;
-    };
+    }
     // Member function to load from array (unaligned)
     Vec16s & load(void const * p) {
         ymm = _mm256_loadu_si256((__m256i const*)p);
@@ -1160,7 +1140,7 @@ public:
         __m256i broad = _mm256_set1_epi16(value);
         ymm = selectb(mask, broad, ymm);
         return *this;
-    };
+    }
     // Member function extract a single element from vector
     int16_t extract(uint32_t index) const {
         int16_t x[16];
@@ -1405,11 +1385,7 @@ static inline Vec16sb operator == (Vec16s const & a, Vec16s const & b) {
 
 // vector operator != : returns true for elements for which a != b
 static inline Vec16sb operator != (Vec16s const & a, Vec16s const & b) {
-#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-    return _mm256_comneq_epi16(a,b);
-#else  // SSE2 instruction set
     return Vec16sb(Vec16s(~(a == b)));
-#endif
 }
 
 // vector operator > : returns true for elements for which a > b
@@ -1424,11 +1400,7 @@ static inline Vec16sb operator < (Vec16s const & a, Vec16s const & b) {
 
 // vector operator >= : returns true for elements for which a >= b (signed)
 static inline Vec16sb operator >= (Vec16s const & a, Vec16s const & b) {
-#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-    return _mm256_comge_epi16(a,b);
-#else  // SSE2 instruction set
     return Vec16sb(Vec16s(~(b > a)));
-#endif
 }
 
 // vector operator <= : returns true for elements for which a <= b (signed)
@@ -1500,7 +1472,6 @@ static inline Vec16s if_add (Vec16sb const & f, Vec16s const & a, Vec16s const &
 // Horizontal add: Calculates the sum of all vector elements.
 // Overflow will wrap around
 static inline int32_t horizontal_add (Vec16s const & a) {
-// #ifdef __XOP2__  // Possible future 256-bit XOP extension ?
     __m256i sum1  = _mm256_hadd_epi16(a,a);                           // horizontally add 2x8 elements in 3 steps
     __m256i sum2  = _mm256_hadd_epi16(sum1,sum1);
     __m256i sum3  = _mm256_hadd_epi16(sum2,sum2); 
@@ -1567,14 +1538,10 @@ static inline Vec16s abs_saturated(Vec16s const & a) {
 // function rotate_left all elements
 // Use negative count to rotate right
 static inline Vec16s rotate_left(Vec16s const & a, int b) {
-#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-    return _mm256_rot_epi16(a,_mm256_set1_epi16(b));
-#else  // SSE2 instruction set
     __m256i left  = _mm256_sll_epi16(a,_mm_cvtsi32_si128(b & 0x0F));      // a << b 
     __m256i right = _mm256_srl_epi16(a,_mm_cvtsi32_si128((16-b) & 0x0F)); // a >> (16 - b)
     __m256i rot   = _mm256_or_si256(left,right);                          // or
     return  rot;
-#endif
 }
 
 
@@ -1588,16 +1555,16 @@ class Vec16us : public Vec16s {
 public:
     // Default constructor:
     Vec16us(){
-    };
+    }
     // Constructor to broadcast the same value into all elements:
     Vec16us(uint32_t i) {
         ymm = _mm256_set1_epi16((int16_t)i);
-    };
+    }
     // Constructor to build from all elements:
     Vec16us(uint16_t i0, uint16_t i1, uint16_t i2,  uint16_t i3,  uint16_t i4,  uint16_t i5,  uint16_t i6,  uint16_t i7,
             uint16_t i8, uint16_t i9, uint16_t i10, uint16_t i11, uint16_t i12, uint16_t i13, uint16_t i14, uint16_t i15) {
         ymm = _mm256_setr_epi16(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15 );
-    };
+    }
     // Constructor to build from two Vec8us:
     Vec16us(Vec8us const & a0, Vec8us const & a1) {
         ymm = set_m128ir(a0, a1);
@@ -1605,12 +1572,12 @@ public:
     // Constructor to convert from type __m256i used in intrinsics:
     Vec16us(__m256i const & x) {
         ymm = x;
-    };
+    }
     // Assignment operator to convert from type __m256i used in intrinsics:
     Vec16us & operator = (__m256i const & x) {
         ymm = x;
         return *this;
-    };
+    }
     // Member function to load from array (unaligned)
     Vec16us & load(void const * p) {
         ymm = _mm256_loadu_si256((__m256i const*)p);
@@ -1626,7 +1593,7 @@ public:
     Vec16us const & insert(uint32_t index, uint16_t value) {
         Vec16s::insert(index, value);
         return *this;
-    };
+    }
     // Member function extract a single element from vector
     uint16_t extract(uint32_t index) const {
         return Vec16s::extract(index);
@@ -1693,12 +1660,8 @@ static inline Vec16us operator << (Vec16us const & a, int32_t b) {
 
 // vector operator >= : returns true for elements for which a >= b (unsigned)
 static inline Vec16sb operator >= (Vec16us const & a, Vec16us const & b) {
-#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-    return _mm256_comge_epu16(a,b);
-#else
     __m256i max_ab = _mm256_max_epu16(a,b);                   // max(a,b), unsigned
     return _mm256_cmpeq_epi16(a,max_ab);                      // a == max(a,b)
-#endif
 }
 
 // vector operator <= : returns true for elements for which a <= b (unsigned)
@@ -1708,11 +1671,7 @@ static inline Vec16sb operator <= (Vec16us const & a, Vec16us const & b) {
 
 // vector operator > : returns true for elements for which a > b (unsigned)
 static inline Vec16sb operator > (Vec16us const & a, Vec16us const & b) {
-#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-    return _mm256_comgt_epu16(a,b);
-#else  // SSE2 instruction set
     return Vec16sb(Vec16s(~(b >= a)));
-#endif
 }
 
 // vector operator < : returns true for elements for which a < b (unsigned)
@@ -1764,7 +1723,6 @@ static inline Vec16us if_add (Vec16sb const & f, Vec16us const & a, Vec16us cons
 // Horizontal add: Calculates the sum of all vector elements.
 // Overflow will wrap around
 static inline uint32_t horizontal_add (Vec16us const & a) {
-//#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
     __m256i sum1  = _mm256_hadd_epi16(a,a);                           // horizontally add 2x8 elements in 3 steps
     __m256i sum2  = _mm256_hadd_epi16(sum1,sum1);
     __m256i sum3  = _mm256_hadd_epi16(sum2,sum2);
@@ -2151,11 +2109,7 @@ static inline Vec8ib operator == (Vec8i const & a, Vec8i const & b) {
 
 // vector operator != : returns true for elements for which a != b
 static inline Vec8ib operator != (Vec8i const & a, Vec8i const & b) {
-#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-    return _mm256_comneq_epi32(a,b);
-#else  // SSE2 instruction set
     return Vec8ib(Vec8i(~(a == b)));
-#endif
 }
   
 // vector operator > : returns true for elements for which a > b
@@ -2170,11 +2124,7 @@ static inline Vec8ib operator < (Vec8i const & a, Vec8i const & b) {
 
 // vector operator >= : returns true for elements for which a >= b (signed)
 static inline Vec8ib operator >= (Vec8i const & a, Vec8i const & b) {
-#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-    return _mm256_comge_epi32(a,b);
-#else  // SSE2 instruction set
     return Vec8ib(Vec8i(~(b > a)));
-#endif
 }
 
 // vector operator <= : returns true for elements for which a <= b (signed)
@@ -2246,7 +2196,6 @@ static inline Vec8i if_add (Vec8ib const & f, Vec8i const & a, Vec8i const & b)
 // Horizontal add: Calculates the sum of all vector elements.
 // Overflow will wrap around
 static inline int32_t horizontal_add (Vec8i const & a) {
-//#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
     __m256i sum1  = _mm256_hadd_epi32(a,a);                           // horizontally add 2x4 elements in 2 steps
     __m256i sum2  = _mm256_hadd_epi32(sum1,sum1);
 #if defined (_MSC_VER) && _MSC_VER <= 1700 && ! defined(__INTEL_COMPILER)
@@ -2313,14 +2262,10 @@ static inline Vec8i abs_saturated(Vec8i const & a) {
 // function rotate_left all elements
 // Use negative count to rotate right
 static inline Vec8i rotate_left(Vec8i const & a, int b) {
-#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-    return _mm256_rot_epi32(a,_mm_set1_epi32(b));
-#else  // SSE2 instruction set
     __m256i left  = _mm256_sll_epi32(a,_mm_cvtsi32_si128(b & 0x1F));      // a << b 
     __m256i right = _mm256_srl_epi32(a,_mm_cvtsi32_si128((32-b) & 0x1F)); // a >> (32 - b)
     __m256i rot   = _mm256_or_si256(left,right);                          // or
     return  rot;
-#endif
 }
 
 
@@ -2334,15 +2279,15 @@ class Vec8ui : public Vec8i {
 public:
     // Default constructor:
     Vec8ui() {
-    };
+    }
     // Constructor to broadcast the same value into all elements:
     Vec8ui(uint32_t i) {
         ymm = _mm256_set1_epi32(i);
-    };
+    }
     // Constructor to build from all elements:
     Vec8ui(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4, uint32_t i5, uint32_t i6, uint32_t i7) {
         ymm = _mm256_setr_epi32(i0, i1, i2, i3, i4, i5, i6, i7);
-    };
+    }
     // Constructor to build from two Vec4ui:
     Vec8ui(Vec4ui const & a0, Vec4ui const & a1) {
         ymm = set_m128ir(a0, a1);
@@ -2350,12 +2295,12 @@ public:
     // Constructor to convert from type __m256i used in intrinsics:
     Vec8ui(__m256i const & x) {
         ymm = x;
-    };
+    }
     // Assignment operator to convert from type __m256i used in intrinsics:
     Vec8ui & operator = (__m256i const & x) {
         ymm = x;
         return *this;
-    };
+    }
     // Member function to load from array (unaligned)
     Vec8ui & load(void const * p) {
         ymm = _mm256_loadu_si256((__m256i const*)p);
@@ -2438,14 +2383,10 @@ static inline Vec8ui operator << (Vec8ui const & a, int32_t b) {
 
 // vector operator > : returns true for elements for which a > b (unsigned)
 static inline Vec8ib operator > (Vec8ui const & a, Vec8ui const & b) {
-#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-    return _mm256_comgt_epu32(a,b);
-#else  // AVX2 instruction set
     __m256i signbit = _mm256_set1_epi32(0x80000000);
     __m256i a1      = _mm256_xor_si256(a,signbit);
     __m256i b1      = _mm256_xor_si256(b,signbit);
     return _mm256_cmpgt_epi32(a1,b1);                         // signed compare
-#endif
 }
 
 // vector operator < : returns true for elements for which a < b (unsigned)
@@ -2455,12 +2396,8 @@ static inline Vec8ib operator < (Vec8ui const & a, Vec8ui const & b) {
 
 // vector operator >= : returns true for elements for which a >= b (unsigned)
 static inline Vec8ib operator >= (Vec8ui const & a, Vec8ui const & b) {
-#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-    return _mm256_comge_epu32(a,b);
-#else
     __m256i max_ab = _mm256_max_epu32(a,b);                   // max(a,b), unsigned
     return _mm256_cmpeq_epi32(a,max_ab);                      // a == max(a,b)
-#endif
 }
 
 // vector operator <= : returns true for elements for which a <= b (unsigned)
@@ -2736,7 +2673,7 @@ public:
     Vec4qb & insert (int index, bool a) {
         Vec4q::insert(index, -(int64_t)a);
         return *this;
-    };    
+    }    
     // Member function extract a single element from vector
     bool extract(uint32_t index) const {
         return Vec4q::extract(index) != 0;
@@ -2931,11 +2868,7 @@ static inline Vec4qb operator == (Vec4q const & a, Vec4q const & b) {
 
 // vector operator != : returns true for elements for which a != b
 static inline Vec4qb operator != (Vec4q const & a, Vec4q const & b) {
-#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-    return _mm256_comneq_epi64(a,b);
-#else 
     return Vec4qb(Vec4q(~(a == b)));
-#endif
 }
   
 // vector operator < : returns true for elements for which a < b
@@ -2950,11 +2883,7 @@ static inline Vec4qb operator > (Vec4q const & a, Vec4q const & b) {
 
 // vector operator >= : returns true for elements for which a >= b (signed)
 static inline Vec4qb operator >= (Vec4q const & a, Vec4q const & b) {
-#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-    return _mm256_comge_epi64(a,b);
-#else  // SSE2 instruction set
     return Vec4qb(Vec4q(~(a < b)));
-#endif
 }
 
 // vector operator <= : returns true for elements for which a <= b (signed)
@@ -3073,14 +3002,10 @@ static inline Vec4q abs_saturated(Vec4q const & a) {
 // function rotate_left all elements
 // Use negative count to rotate right
 static inline Vec4q rotate_left(Vec4q const & a, int b) {
-#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-    return _mm256_rot_epi64(a,Vec4q(b));
-#else  // SSE2 instruction set
     __m256i left  = _mm256_sll_epi64(a,_mm_cvtsi32_si128(b & 0x3F));      // a << b 
     __m256i right = _mm256_srl_epi64(a,_mm_cvtsi32_si128((64-b) & 0x3F)); // a >> (64 - b)
     __m256i rot   = _mm256_or_si256(left, right);                         // or
     return  rot;
-#endif
 }
 
 
@@ -3094,15 +3019,15 @@ class Vec4uq : public Vec4q {
 public:
     // Default constructor:
     Vec4uq() {
-    };
+    }
     // Constructor to broadcast the same value into all elements:
     Vec4uq(uint64_t i) {
         ymm = Vec4q(i);
-    };
+    }
     // Constructor to build from all elements:
     Vec4uq(uint64_t i0, uint64_t i1, uint64_t i2, uint64_t i3) {
         ymm = Vec4q(i0, i1, i2, i3);
-    };
+    }
     // Constructor to build from two Vec2uq:
     Vec4uq(Vec2uq const & a0, Vec2uq const & a1) {
         ymm = set_m128ir(a0, a1);
@@ -3110,12 +3035,12 @@ public:
     // Constructor to convert from type __m256i used in intrinsics:
     Vec4uq(__m256i const & x) {
         ymm = x;
-    };
+    }
     // Assignment operator to convert from type __m256i used in intrinsics:
     Vec4uq & operator = (__m256i const & x) {
         ymm = x;
         return *this;
-    };
+    }
     // Member function to load from array (unaligned)
     Vec4uq & load(void const * p) {
         ymm = _mm256_loadu_si256((__m256i const*)p);
@@ -3195,20 +3120,12 @@ static inline Vec4uq operator << (Vec4uq const & a, int32_t b) {
 
 // vector operator > : returns true for elements for which a > b (unsigned)
 static inline Vec4qb operator > (Vec4uq const & a, Vec4uq const & b) {
-#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-    return _mm256_comgt_epu64(a,b);
-#else  // SSE2 instruction set
-    __m256i sign32  = _mm256_set1_epi32(0x80000000);          // sign bit of each dword
-    __m256i aflip   = _mm256_xor_si256(a,sign32);             // a with sign bits flipped
-    __m256i bflip   = _mm256_xor_si256(b,sign32);             // b with sign bits flipped
-    __m256i equal   = _mm256_cmpeq_epi32(a,b);                // a == b, dwords
-    __m256i bigger  = _mm256_cmpgt_epi32(aflip,bflip);        // a > b, dwords
-    __m256i biggerl = _mm256_shuffle_epi32(bigger,0xA0);      // a > b, low dwords copied to high dwords
-    __m256i eqbig   = _mm256_and_si256(equal,biggerl);        // high part equal and low part bigger
-    __m256i hibig   = _mm256_or_si256(bigger,eqbig);          // high part bigger or high part equal and low part bigger
-    __m256i big     = _mm256_shuffle_epi32(hibig,0xF5);       // result copied to low part
-    return  big;
-#endif
+//#if defined ( __XOP__ ) // AMD XOP instruction set
+    __m256i sign64 = Vec4uq(0x8000000000000000);
+    __m256i aflip  = _mm256_xor_si256(a, sign64);
+    __m256i bflip  = _mm256_xor_si256(b, sign64);
+    Vec4q   cmp    = _mm256_cmpgt_epi64(aflip,bflip);
+    return Vec4qb(cmp);
 }
 
 // vector operator < : returns true for elements for which a < b (unsigned)
@@ -3218,11 +3135,7 @@ static inline Vec4qb operator < (Vec4uq const & a, Vec4uq const & b) {
 
 // vector operator >= : returns true for elements for which a >= b (unsigned)
 static inline Vec4qb operator >= (Vec4uq const & a, Vec4uq const & b) {
-#ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-    return _mm256_comge_epu64(a,b);
-#else  // SSE2 instruction set
     return  Vec4qb(Vec4q(~(b > a)));
-#endif
 }
 
 // vector operator <= : returns true for elements for which a <= b (unsigned)
@@ -3556,14 +3469,14 @@ static inline Vec16s permute16s(Vec16s const & a) {
             if (dozero) {
                 // zero some elements
                 const __m256i maskz = constant8i<
-                    (i0 <0?0:0xFFFF) | (i1 <0?0:0xFFFF0000),
-                    (i2 <0?0:0xFFFF) | (i3 <0?0:0xFFFF0000),
-                    (i4 <0?0:0xFFFF) | (i5 <0?0:0xFFFF0000),
-                    (i6 <0?0:0xFFFF) | (i7 <0?0:0xFFFF0000),
-                    (i8 <0?0:0xFFFF) | (i9 <0?0:0xFFFF0000),
-                    (i10<0?0:0xFFFF) | (i11<0?0:0xFFFF0000),
-                    (i12<0?0:0xFFFF) | (i13<0?0:0xFFFF0000),
-                    (i14<0?0:0xFFFF) | (i15<0?0:0xFFFF0000) > ();                    
+                    int((i0 <0?0:0xFFFF) | (i1 <0?0:0xFFFF0000)),
+                    int((i2 <0?0:0xFFFF) | (i3 <0?0:0xFFFF0000)),
+                    int((i4 <0?0:0xFFFF) | (i5 <0?0:0xFFFF0000)),
+                    int((i6 <0?0:0xFFFF) | (i7 <0?0:0xFFFF0000)),
+                    int((i8 <0?0:0xFFFF) | (i9 <0?0:0xFFFF0000)),
+                    int((i10<0?0:0xFFFF) | (i11<0?0:0xFFFF0000)),
+                    int((i12<0?0:0xFFFF) | (i13<0?0:0xFFFF0000)),
+                    int((i14<0?0:0xFFFF) | (i15<0?0:0xFFFF0000)) > ();                    
                 return _mm256_and_si256(a, maskz);
             }
             return a;                                 // do nothing
@@ -3598,14 +3511,14 @@ static inline Vec16s permute16s(Vec16s const & a) {
         }
         // need more zeroing
         mask = constant8i<
-            (i0 <0?0:0xFFFF) | (i1 <0?0:0xFFFF0000),
-            (i2 <0?0:0xFFFF) | (i3 <0?0:0xFFFF0000),
-            (i4 <0?0:0xFFFF) | (i5 <0?0:0xFFFF0000),
-            (i6 <0?0:0xFFFF) | (i7 <0?0:0xFFFF0000),
-            (i8 <0?0:0xFFFF) | (i9 <0?0:0xFFFF0000),
-            (i10<0?0:0xFFFF) | (i11<0?0:0xFFFF0000),
-            (i12<0?0:0xFFFF) | (i13<0?0:0xFFFF0000),
-            (i14<0?0:0xFFFF) | (i15<0?0:0xFFFF0000) > ();                    
+            int((i0 <0?0:0xFFFF) | (i1 <0?0:0xFFFF0000)),
+            int((i2 <0?0:0xFFFF) | (i3 <0?0:0xFFFF0000)),
+            int((i4 <0?0:0xFFFF) | (i5 <0?0:0xFFFF0000)),
+            int((i6 <0?0:0xFFFF) | (i7 <0?0:0xFFFF0000)),
+            int((i8 <0?0:0xFFFF) | (i9 <0?0:0xFFFF0000)),
+            int((i10<0?0:0xFFFF) | (i11<0?0:0xFFFF0000)),
+            int((i12<0?0:0xFFFF) | (i13<0?0:0xFFFF0000)),
+            int((i14<0?0:0xFFFF) | (i15<0?0:0xFFFF0000)) > ();                    
         return _mm256_and_si256(t1, mask);
     }
 
@@ -3749,14 +3662,14 @@ static inline Vec32c permute32c(Vec32c const & a) {
         if (dozero) {
             // zero some elements
             mask = constant8i <
-                (i0 <0?0:0xFF) | (i1 <0?0:0xFF00) | (i2 <0?0:0xFF0000) | (i3 <0?0:0xFF000000),
-                (i4 <0?0:0xFF) | (i5 <0?0:0xFF00) | (i6 <0?0:0xFF0000) | (i7 <0?0:0xFF000000),
-                (i8 <0?0:0xFF) | (i9 <0?0:0xFF00) | (i10<0?0:0xFF0000) | (i11<0?0:0xFF000000),
-                (i12<0?0:0xFF) | (i13<0?0:0xFF00) | (i14<0?0:0xFF0000) | (i15<0?0:0xFF000000),
-                (i16<0?0:0xFF) | (i17<0?0:0xFF00) | (i18<0?0:0xFF0000) | (i19<0?0:0xFF000000),
-                (i20<0?0:0xFF) | (i21<0?0:0xFF00) | (i22<0?0:0xFF0000) | (i23<0?0:0xFF000000),
-                (i24<0?0:0xFF) | (i25<0?0:0xFF00) | (i26<0?0:0xFF0000) | (i27<0?0:0xFF000000),
-                (i28<0?0:0xFF) | (i29<0?0:0xFF00) | (i30<0?0:0xFF0000) | (i31<0?0:0xFF000000) > ();
+                int((i0 <0?0:0xFF) | (i1 <0?0:0xFF00) | (i2 <0?0:0xFF0000) | (i3 <0?0:0xFF000000)),
+                int((i4 <0?0:0xFF) | (i5 <0?0:0xFF00) | (i6 <0?0:0xFF0000) | (i7 <0?0:0xFF000000)),
+                int((i8 <0?0:0xFF) | (i9 <0?0:0xFF00) | (i10<0?0:0xFF0000) | (i11<0?0:0xFF000000)),
+                int((i12<0?0:0xFF) | (i13<0?0:0xFF00) | (i14<0?0:0xFF0000) | (i15<0?0:0xFF000000)),
+                int((i16<0?0:0xFF) | (i17<0?0:0xFF00) | (i18<0?0:0xFF0000) | (i19<0?0:0xFF000000)),
+                int((i20<0?0:0xFF) | (i21<0?0:0xFF00) | (i22<0?0:0xFF0000) | (i23<0?0:0xFF000000)),
+                int((i24<0?0:0xFF) | (i25<0?0:0xFF00) | (i26<0?0:0xFF0000) | (i27<0?0:0xFF000000)),
+                int((i28<0?0:0xFF) | (i29<0?0:0xFF00) | (i30<0?0:0xFF0000) | (i31<0?0:0xFF000000)) > ();
             return _mm256_and_si256(a, mask);
         }
         return a; // do nothing
@@ -3812,14 +3725,14 @@ static inline Vec32c permute32c(Vec32c const & a) {
         }
         // need more zeroing
         mask = constant8i <
-            (i0 <0?0:0xFF) | (i1 <0?0:0xFF00) | (i2 <0?0:0xFF0000) | (i3 <0?0:0xFF000000),
-            (i4 <0?0:0xFF) | (i5 <0?0:0xFF00) | (i6 <0?0:0xFF0000) | (i7 <0?0:0xFF000000),
-            (i8 <0?0:0xFF) | (i9 <0?0:0xFF00) | (i10<0?0:0xFF0000) | (i11<0?0:0xFF000000),
-            (i12<0?0:0xFF) | (i13<0?0:0xFF00) | (i14<0?0:0xFF0000) | (i15<0?0:0xFF000000),
-            (i16<0?0:0xFF) | (i17<0?0:0xFF00) | (i18<0?0:0xFF0000) | (i19<0?0:0xFF000000),
-            (i20<0?0:0xFF) | (i21<0?0:0xFF00) | (i22<0?0:0xFF0000) | (i23<0?0:0xFF000000),
-            (i24<0?0:0xFF) | (i25<0?0:0xFF00) | (i26<0?0:0xFF0000) | (i27<0?0:0xFF000000),
-            (i28<0?0:0xFF) | (i29<0?0:0xFF00) | (i30<0?0:0xFF0000) | (i31<0?0:0xFF000000) > ();
+            int((i0 <0?0:0xFF) | (i1 <0?0:0xFF00) | (i2 <0?0:0xFF0000) | (i3 <0?0:0xFF000000)),
+            int((i4 <0?0:0xFF) | (i5 <0?0:0xFF00) | (i6 <0?0:0xFF0000) | (i7 <0?0:0xFF000000)),
+            int((i8 <0?0:0xFF) | (i9 <0?0:0xFF00) | (i10<0?0:0xFF0000) | (i11<0?0:0xFF000000)),
+            int((i12<0?0:0xFF) | (i13<0?0:0xFF00) | (i14<0?0:0xFF0000) | (i15<0?0:0xFF000000)),
+            int((i16<0?0:0xFF) | (i17<0?0:0xFF00) | (i18<0?0:0xFF0000) | (i19<0?0:0xFF000000)),
+            int((i20<0?0:0xFF) | (i21<0?0:0xFF00) | (i22<0?0:0xFF0000) | (i23<0?0:0xFF000000)),
+            int((i24<0?0:0xFF) | (i25<0?0:0xFF00) | (i26<0?0:0xFF0000) | (i27<0?0:0xFF000000)),
+            int((i28<0?0:0xFF) | (i29<0?0:0xFF00) | (i30<0?0:0xFF0000) | (i31<0?0:0xFF000000)) > ();
         return _mm256_and_si256(a, mask);
     } 
 
@@ -3965,51 +3878,53 @@ static inline Vec4q blend4q(Vec4q const & a, Vec4q const & b) {
 
     // special case: 128 bit blend/permute
     if (((m1 ^ 0x01000100) & 0x01010101 & mz) == 0 && (((m1 + 0x00010001) ^ (m1 >> 8)) & 0x00FF00FF & mz & mz >> 8) == 0) {
-        const int j0 = i0 >= 0 ? i0 / 2 : i1 >= 0 ? i1 / 2 : 4;  // index for low 128 bits
-        const int j1 = i2 >= 0 ? i2 / 2 : i3 >= 0 ? i3 / 2 : 4;  // index for high 128 bits
-        const bool partialzero = int((i0 ^ i1) | (i2 ^ i3)) < 0; // part of a 128-bit block is zeroed
-        __m256i t1;
-
-        switch (j0 | j1 << 4) {
-        case 0x00:
-            t1 = _mm256_inserti128_si256(a, _mm256_castsi256_si128(a), 1);  break;
-        case 0x02:
-            t1 = _mm256_inserti128_si256(b, _mm256_castsi256_si128(a), 1);  break;
-        case 0x04:
-            if (dozero && !partialzero) return _mm256_inserti128_si256(_mm256_setzero_si256(), _mm256_castsi256_si128(a), 1);
-            t1 = _mm256_inserti128_si256(a, _mm256_castsi256_si128(a), 1);  break;
-        case 0x12:
-            t1 = _mm256_inserti128_si256(a, _mm256_castsi256_si128(b), 0);  break;
-        case 0x14:
-            if (dozero && !partialzero) return _mm256_inserti128_si256(a,_mm_setzero_si128(), 0);
-            t1 = a;  break;
-        case 0x01: case 0x10: case 0x11: // all from a
-            return permute4q <i0, i1, i2, i3> (a);
-        case 0x20:
-            t1 = _mm256_inserti128_si256(a, _mm256_castsi256_si128(b), 1);  break;
-        case 0x22:
-            t1 = _mm256_inserti128_si256(b, _mm256_castsi256_si128(b), 1);  break;
-        case 0x24:
-            if (dozero && !partialzero) return _mm256_inserti128_si256(_mm256_setzero_si256(), _mm256_castsi256_si128(b), 1);
-            t1 = _mm256_inserti128_si256(b, _mm256_castsi256_si128(b), 1);  break;
-        case 0x30:
-            t1 = _mm256_inserti128_si256(b, _mm256_castsi256_si128(a), 0);  break;
-        case 0x34:
-            if (dozero && !partialzero) return _mm256_inserti128_si256(b,_mm_setzero_si128(), 0);
-            t1 = b;  break;
-        case 0x23: case 0x32: case 0x33:  // all from b
-            return permute4q <i0^4, i1^4, i2^4, i3^4> (b);
-        case 0x40:
-            if (dozero && !partialzero) return _mm256_castsi128_si256(_mm_and_si128(_mm256_castsi256_si128(a),_mm256_castsi256_si128(a)));
-            t1 = a;  break;
-        case 0x42:
-            if (dozero && !partialzero) return _mm256_castsi128_si256(_mm_and_si128(_mm256_castsi256_si128(b),_mm256_castsi256_si128(b)));
-            t1 = b;  break;
-        case 0x44:
-            return _mm256_setzero_si256();
-        default:
-            t1 = _mm256_permute2x128_si256(a, b, (j0&0x0F) | (j1&0x0F) << 4);
+        {
+            const int j0 = i0 >= 0 ? i0 / 2 : i1 >= 0 ? i1 / 2 : 4;  // index for low 128 bits
+            const int j1 = i2 >= 0 ? i2 / 2 : i3 >= 0 ? i3 / 2 : 4;  // index for high 128 bits
+            const bool partialzero = int((i0 ^ i1) | (i2 ^ i3)) < 0; // part of a 128-bit block is zeroed
+
+            switch (j0 | j1 << 4) {
+            case 0x00:
+                t1 = _mm256_inserti128_si256(a, _mm256_castsi256_si128(a), 1);  break;
+            case 0x02:
+                t1 = _mm256_inserti128_si256(b, _mm256_castsi256_si128(a), 1);  break;
+            case 0x04:
+                if (dozero && !partialzero) return _mm256_inserti128_si256(_mm256_setzero_si256(), _mm256_castsi256_si128(a), 1);
+                t1 = _mm256_inserti128_si256(a, _mm256_castsi256_si128(a), 1);  break;
+            case 0x12:
+                t1 = _mm256_inserti128_si256(a, _mm256_castsi256_si128(b), 0);  break;
+            case 0x14:
+                if (dozero && !partialzero) return _mm256_inserti128_si256(a,_mm_setzero_si128(), 0);
+                t1 = a;  break;
+            case 0x01: case 0x10: case 0x11: // all from a
+                return permute4q <i0, i1, i2, i3> (a);
+            case 0x20:
+                t1 = _mm256_inserti128_si256(a, _mm256_castsi256_si128(b), 1);  break;
+            case 0x22:
+                t1 = _mm256_inserti128_si256(b, _mm256_castsi256_si128(b), 1);  break;
+            case 0x24:
+                if (dozero && !partialzero) return _mm256_inserti128_si256(_mm256_setzero_si256(), _mm256_castsi256_si128(b), 1);
+                t1 = _mm256_inserti128_si256(b, _mm256_castsi256_si128(b), 1);  break;
+            case 0x30:
+                t1 = _mm256_inserti128_si256(b, _mm256_castsi256_si128(a), 0);  break;
+            case 0x34:
+                if (dozero && !partialzero) return _mm256_inserti128_si256(b,_mm_setzero_si128(), 0);
+                t1 = b;  break;
+            case 0x23: case 0x32: case 0x33:  // all from b
+                return permute4q <i0^4, i1^4, i2^4, i3^4> (b);
+            case 0x40:
+                if (dozero && !partialzero) return _mm256_castsi128_si256(_mm_and_si128(_mm256_castsi256_si128(a),_mm256_castsi256_si128(a)));
+                t1 = a;  break;
+            case 0x42:
+                if (dozero && !partialzero) return _mm256_castsi128_si256(_mm_and_si128(_mm256_castsi256_si128(b),_mm256_castsi256_si128(b)));
+                t1 = b;  break;
+            case 0x44:
+                return _mm256_setzero_si256();
+            default:
+                t1 = _mm256_permute2x128_si256(a, b, (j0&0x0F) | (j1&0x0F) << 4);
+            }
         }
+        RETURNORZERO:
         if (dozero) {
             // zero some elements
             const __m256i maskz = constant8i <
@@ -4032,21 +3947,11 @@ static inline Vec4q blend4q(Vec4q const & a, Vec4q const & b) {
 
     // special case: blend without permute
     if (((m1 ^ 0x03020100) & 0xFBFBFBFB & mz) == 0) {
-
         mask = constant8i <
             (i0 & 4) ? -1 : 0, (i0 & 4) ? -1 : 0, (i1 & 4) ? -1 : 0, (i1 & 4) ? -1 : 0, 
             (i2 & 4) ? -1 : 0, (i2 & 4) ? -1 : 0, (i3 & 4) ? -1 : 0, (i3 & 4) ? -1 : 0 > ();
-
         t1 = _mm256_blendv_epi8(a, b, mask);  // blend
-
-        if (dozero) {
-            // zero some elements
-            const __m256i maskz = constant8i <
-                i0 < 0 ? 0 : -1, i0 < 0 ? 0 : -1, i1 < 0 ? 0 : -1, i1 < 0 ? 0 : -1, 
-                i2 < 0 ? 0 : -1, i2 < 0 ? 0 : -1, i3 < 0 ? 0 : -1, i3 < 0 ? 0 : -1 > ();
-            return _mm256_and_si256(t1, maskz);
-        }
-        return t1;
+        goto RETURNORZERO;
     } 
 
     // special case: shift left
@@ -4061,6 +3966,26 @@ static inline Vec4q blend4q(Vec4q const & a, Vec4q const & b) {
         if (i0 < 6) return _mm256_alignr_epi8(t1, b, (i0 & 1) * 8);
         else        return _mm256_alignr_epi8(a, t1, (i0 & 1) * 8);
     }
+    // special case: unpack low
+    if (((m1 ^ 0x06020400) & mz) == 0) {
+        t1 = _mm256_unpacklo_epi64(a, b);
+        goto RETURNORZERO;
+    }
+    // special case: unpack low
+    if (((m1 ^ 0x02060004) & mz) == 0) {
+        t1 = _mm256_unpacklo_epi64(b, a);
+        goto RETURNORZERO;
+    }
+    // special case: unpack high
+    if (((m1 ^ 0x07030501) & mz) == 0) {
+        t1 = _mm256_unpackhi_epi64(a, b);
+        goto RETURNORZERO;
+    }
+    // special case: unpack high
+    if (((m1 ^ 0x03070105) & mz) == 0) {
+        t1 = _mm256_unpackhi_epi64(b, a);
+        goto RETURNORZERO;
+    }
 
     // general case: permute and blend and possibly zero
     const int blank = dozero ? -1 : -0x100;  // ignore or zero
@@ -4202,8 +4127,6 @@ static inline Vec8ui blend8ui(Vec8ui const & a, Vec8ui const & b) {
 template <int i0,  int i1,  int i2,  int i3,  int i4,  int i5,  int i6,  int i7, 
           int i8,  int i9,  int i10, int i11, int i12, int i13, int i14, int i15 > 
 static inline Vec16s blend16s(Vec16s const & a, Vec16s const & b) {  
-    //  #ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-
     // collect bit 4 of each index
     const int m1 = 
         (i0 &16)>>4  | (i1 &16)>>3  | (i2 &16)>>2  | (i3 &16)>>1  | (i4 &16)     | (i5 &16)<<1  | (i6 &16)<<2  | (i7 &16)<<3  | 
@@ -4236,28 +4159,28 @@ static inline Vec16s blend16s(Vec16s const & a, Vec16s const & b) {
         (i12<0||(i12&15)==12) && (i13<0||(i13&15)==13) && (i14<0||(i14&15)==14) && (i15<0||(i15&15)==15)) {
 
         mask = constant8i <
-            ((i0 & 16) ? 0xFFFF : 0) | ((i1 & 16) ? 0xFFFF0000 : 0),
-            ((i2 & 16) ? 0xFFFF : 0) | ((i3 & 16) ? 0xFFFF0000 : 0),
-            ((i4 & 16) ? 0xFFFF : 0) | ((i5 & 16) ? 0xFFFF0000 : 0),
-            ((i6 & 16) ? 0xFFFF : 0) | ((i7 & 16) ? 0xFFFF0000 : 0),
-            ((i8 & 16) ? 0xFFFF : 0) | ((i9 & 16) ? 0xFFFF0000 : 0),
-            ((i10& 16) ? 0xFFFF : 0) | ((i11& 16) ? 0xFFFF0000 : 0),
-            ((i12& 16) ? 0xFFFF : 0) | ((i13& 16) ? 0xFFFF0000 : 0),
-            ((i14& 16) ? 0xFFFF : 0) | ((i15& 16) ? 0xFFFF0000 : 0) > ();
+            int(((i0 & 16) ? 0xFFFF : 0) | ((i1 & 16) ? 0xFFFF0000 : 0)),
+            int(((i2 & 16) ? 0xFFFF : 0) | ((i3 & 16) ? 0xFFFF0000 : 0)),
+            int(((i4 & 16) ? 0xFFFF : 0) | ((i5 & 16) ? 0xFFFF0000 : 0)),
+            int(((i6 & 16) ? 0xFFFF : 0) | ((i7 & 16) ? 0xFFFF0000 : 0)),
+            int(((i8 & 16) ? 0xFFFF : 0) | ((i9 & 16) ? 0xFFFF0000 : 0)),
+            int(((i10& 16) ? 0xFFFF : 0) | ((i11& 16) ? 0xFFFF0000 : 0)),
+            int(((i12& 16) ? 0xFFFF : 0) | ((i13& 16) ? 0xFFFF0000 : 0)),
+            int(((i14& 16) ? 0xFFFF : 0) | ((i15& 16) ? 0xFFFF0000 : 0)) > ();
 
         t1 = _mm256_blendv_epi8(a, b, mask);  // blend
 
         if (mz != 0xFFFF) {
             // zero some elements
             mask = constant8i <
-                (i0  < 0 ? 0 : 0xFFFF) | (i1  < 0 ? 0 : 0xFFFF0000),
-                (i2  < 0 ? 0 : 0xFFFF) | (i3  < 0 ? 0 : 0xFFFF0000),
-                (i4  < 0 ? 0 : 0xFFFF) | (i5  < 0 ? 0 : 0xFFFF0000),
-                (i6  < 0 ? 0 : 0xFFFF) | (i7  < 0 ? 0 : 0xFFFF0000),
-                (i8  < 0 ? 0 : 0xFFFF) | (i9  < 0 ? 0 : 0xFFFF0000),
-                (i10 < 0 ? 0 : 0xFFFF) | (i11 < 0 ? 0 : 0xFFFF0000),
-                (i12 < 0 ? 0 : 0xFFFF) | (i13 < 0 ? 0 : 0xFFFF0000),
-                (i14 < 0 ? 0 : 0xFFFF) | (i15 < 0 ? 0 : 0xFFFF0000) > ();
+                int((i0  < 0 ? 0 : 0xFFFF) | (i1  < 0 ? 0 : 0xFFFF0000)),
+                int((i2  < 0 ? 0 : 0xFFFF) | (i3  < 0 ? 0 : 0xFFFF0000)),
+                int((i4  < 0 ? 0 : 0xFFFF) | (i5  < 0 ? 0 : 0xFFFF0000)),
+                int((i6  < 0 ? 0 : 0xFFFF) | (i7  < 0 ? 0 : 0xFFFF0000)),
+                int((i8  < 0 ? 0 : 0xFFFF) | (i9  < 0 ? 0 : 0xFFFF0000)),
+                int((i10 < 0 ? 0 : 0xFFFF) | (i11 < 0 ? 0 : 0xFFFF0000)),
+                int((i12 < 0 ? 0 : 0xFFFF) | (i13 < 0 ? 0 : 0xFFFF0000)),
+                int((i14 < 0 ? 0 : 0xFFFF) | (i15 < 0 ? 0 : 0xFFFF0000)) > ();
             return _mm256_and_si256(t1, mask);
         }
         return t1;
@@ -4274,14 +4197,14 @@ static inline Vec16s blend16s(Vec16s const & a, Vec16s const & b) {
         if (mz != 0xFFFF) {
             // zero some elements
             mask = constant8i <
-                (i0  < 0 ? 0 : 0xFFFF) | (i1  < 0 ? 0 : 0xFFFF0000),
-                (i2  < 0 ? 0 : 0xFFFF) | (i3  < 0 ? 0 : 0xFFFF0000),
-                (i4  < 0 ? 0 : 0xFFFF) | (i5  < 0 ? 0 : 0xFFFF0000),
-                (i6  < 0 ? 0 : 0xFFFF) | (i7  < 0 ? 0 : 0xFFFF0000),
-                (i8  < 0 ? 0 : 0xFFFF) | (i9  < 0 ? 0 : 0xFFFF0000),
-                (i10 < 0 ? 0 : 0xFFFF) | (i11 < 0 ? 0 : 0xFFFF0000),
-                (i12 < 0 ? 0 : 0xFFFF) | (i13 < 0 ? 0 : 0xFFFF0000),
-                (i14 < 0 ? 0 : 0xFFFF) | (i15 < 0 ? 0 : 0xFFFF0000) > ();
+                int((i0  < 0 ? 0 : 0xFFFF) | (i1  < 0 ? 0 : 0xFFFF0000)),
+                int((i2  < 0 ? 0 : 0xFFFF) | (i3  < 0 ? 0 : 0xFFFF0000)),
+                int((i4  < 0 ? 0 : 0xFFFF) | (i5  < 0 ? 0 : 0xFFFF0000)),
+                int((i6  < 0 ? 0 : 0xFFFF) | (i7  < 0 ? 0 : 0xFFFF0000)),
+                int((i8  < 0 ? 0 : 0xFFFF) | (i9  < 0 ? 0 : 0xFFFF0000)),
+                int((i10 < 0 ? 0 : 0xFFFF) | (i11 < 0 ? 0 : 0xFFFF0000)),
+                int((i12 < 0 ? 0 : 0xFFFF) | (i13 < 0 ? 0 : 0xFFFF0000)),
+                int((i14 < 0 ? 0 : 0xFFFF) | (i15 < 0 ? 0 : 0xFFFF0000)) > ();
             return _mm256_and_si256(t1, mask);
         }
         return t1;
@@ -4297,14 +4220,14 @@ static inline Vec16s blend16s(Vec16s const & a, Vec16s const & b) {
         if (mz != 0xFFFF) {
             // zero some elements
             mask = constant8i <
-                (i0  < 0 ? 0 : 0xFFFF) | (i1  < 0 ? 0 : 0xFFFF0000),
-                (i2  < 0 ? 0 : 0xFFFF) | (i3  < 0 ? 0 : 0xFFFF0000),
-                (i4  < 0 ? 0 : 0xFFFF) | (i5  < 0 ? 0 : 0xFFFF0000),
-                (i6  < 0 ? 0 : 0xFFFF) | (i7  < 0 ? 0 : 0xFFFF0000),
-                (i8  < 0 ? 0 : 0xFFFF) | (i9  < 0 ? 0 : 0xFFFF0000),
-                (i10 < 0 ? 0 : 0xFFFF) | (i11 < 0 ? 0 : 0xFFFF0000),
-                (i12 < 0 ? 0 : 0xFFFF) | (i13 < 0 ? 0 : 0xFFFF0000),
-                (i14 < 0 ? 0 : 0xFFFF) | (i15 < 0 ? 0 : 0xFFFF0000) > ();
+                int((i0  < 0 ? 0 : 0xFFFF) | (i1  < 0 ? 0 : 0xFFFF0000)),
+                int((i2  < 0 ? 0 : 0xFFFF) | (i3  < 0 ? 0 : 0xFFFF0000)),
+                int((i4  < 0 ? 0 : 0xFFFF) | (i5  < 0 ? 0 : 0xFFFF0000)),
+                int((i6  < 0 ? 0 : 0xFFFF) | (i7  < 0 ? 0 : 0xFFFF0000)),
+                int((i8  < 0 ? 0 : 0xFFFF) | (i9  < 0 ? 0 : 0xFFFF0000)),
+                int((i10 < 0 ? 0 : 0xFFFF) | (i11 < 0 ? 0 : 0xFFFF0000)),
+                int((i12 < 0 ? 0 : 0xFFFF) | (i13 < 0 ? 0 : 0xFFFF0000)),
+                int((i14 < 0 ? 0 : 0xFFFF) | (i15 < 0 ? 0 : 0xFFFF0000)) > ();
             return _mm256_and_si256(t1, mask);
         }
         return t1;
@@ -4332,14 +4255,14 @@ static inline Vec16s blend16s(Vec16s const & a, Vec16s const & b) {
     }
     // no zeroing, need to blend
     mask = constant8i <
-        ((i0 & 16) ? 0xFFFF : 0) | ((i1 & 16) ? 0xFFFF0000 : 0),
-        ((i2 & 16) ? 0xFFFF : 0) | ((i3 & 16) ? 0xFFFF0000 : 0),
-        ((i4 & 16) ? 0xFFFF : 0) | ((i5 & 16) ? 0xFFFF0000 : 0),
-        ((i6 & 16) ? 0xFFFF : 0) | ((i7 & 16) ? 0xFFFF0000 : 0),
-        ((i8 & 16) ? 0xFFFF : 0) | ((i9 & 16) ? 0xFFFF0000 : 0),
-        ((i10& 16) ? 0xFFFF : 0) | ((i11& 16) ? 0xFFFF0000 : 0),
-        ((i12& 16) ? 0xFFFF : 0) | ((i13& 16) ? 0xFFFF0000 : 0),
-        ((i14& 16) ? 0xFFFF : 0) | ((i15& 16) ? 0xFFFF0000 : 0) > ();
+        int(((i0 & 16) ? 0xFFFF : 0) | ((i1 & 16) ? 0xFFFF0000 : 0)),
+        int(((i2 & 16) ? 0xFFFF : 0) | ((i3 & 16) ? 0xFFFF0000 : 0)),
+        int(((i4 & 16) ? 0xFFFF : 0) | ((i5 & 16) ? 0xFFFF0000 : 0)),
+        int(((i6 & 16) ? 0xFFFF : 0) | ((i7 & 16) ? 0xFFFF0000 : 0)),
+        int(((i8 & 16) ? 0xFFFF : 0) | ((i9 & 16) ? 0xFFFF0000 : 0)),
+        int(((i10& 16) ? 0xFFFF : 0) | ((i11& 16) ? 0xFFFF0000 : 0)),
+        int(((i12& 16) ? 0xFFFF : 0) | ((i13& 16) ? 0xFFFF0000 : 0)),
+        int(((i14& 16) ? 0xFFFF : 0) | ((i15& 16) ? 0xFFFF0000 : 0)) > ();
 
     return _mm256_blendv_epi8(ta, tb, mask);  // blend
 }
@@ -4355,8 +4278,6 @@ template <int i0,  int i1,  int i2,  int i3,  int i4,  int i5,  int i6,  int i7,
           int i16, int i17, int i18, int i19, int i20, int i21, int i22, int i23,
           int i24, int i25, int i26, int i27, int i28, int i29, int i30, int i31 > 
 static inline Vec32c blend32c(Vec32c const & a, Vec32c const & b) {  
-    //  #ifdef __XOP2__  // Possible future 256-bit XOP extension ?
-
     // collect bit 5 of each index
     const int m1 = 
         (i0 &32)>>5  | (i1 &32)>>4  | (i2 &32)>>3  | (i3 &32)>>2  | (i4 &32)>>1  | (i5 &32)     | (i6 &32)<<1  | (i7 &32)<<2  | 
@@ -4399,28 +4320,28 @@ static inline Vec32c blend32c(Vec32c const & a, Vec32c const & b) {
         (i28<0||(i28&31)==28) && (i29<0||(i29&31)==29) && (i30<0||(i30&31)==30) && (i31<0||(i31&31)==31) ) {
 
         mask = constant8i <
-            ((i0 <<2)&0x80) | ((i1 <<10)&0x8000) | ((i2 <<18)&0x800000) | (uint32_t(i3 <<26)&0x80000000) ,
-            ((i4 <<2)&0x80) | ((i5 <<10)&0x8000) | ((i6 <<18)&0x800000) | (uint32_t(i7 <<26)&0x80000000) ,
-            ((i8 <<2)&0x80) | ((i9 <<10)&0x8000) | ((i10<<18)&0x800000) | (uint32_t(i11<<26)&0x80000000) ,
-            ((i12<<2)&0x80) | ((i13<<10)&0x8000) | ((i14<<18)&0x800000) | (uint32_t(i15<<26)&0x80000000) ,
-            ((i16<<2)&0x80) | ((i17<<10)&0x8000) | ((i18<<18)&0x800000) | (uint32_t(i19<<26)&0x80000000) ,
-            ((i20<<2)&0x80) | ((i21<<10)&0x8000) | ((i22<<18)&0x800000) | (uint32_t(i23<<26)&0x80000000) ,
-            ((i24<<2)&0x80) | ((i25<<10)&0x8000) | ((i26<<18)&0x800000) | (uint32_t(i27<<26)&0x80000000) ,
-            ((i28<<2)&0x80) | ((i29<<10)&0x8000) | ((i30<<18)&0x800000) | (uint32_t(i31<<26)&0x80000000) > ();
+            int(((i0 <<2)&0x80) | ((i1 <<10)&0x8000) | ((i2 <<18)&0x800000) | (uint32_t(i3 <<26)&0x80000000)) ,
+            int(((i4 <<2)&0x80) | ((i5 <<10)&0x8000) | ((i6 <<18)&0x800000) | (uint32_t(i7 <<26)&0x80000000)) ,
+            int(((i8 <<2)&0x80) | ((i9 <<10)&0x8000) | ((i10<<18)&0x800000) | (uint32_t(i11<<26)&0x80000000)) ,
+            int(((i12<<2)&0x80) | ((i13<<10)&0x8000) | ((i14<<18)&0x800000) | (uint32_t(i15<<26)&0x80000000)) ,
+            int(((i16<<2)&0x80) | ((i17<<10)&0x8000) | ((i18<<18)&0x800000) | (uint32_t(i19<<26)&0x80000000)) ,
+            int(((i20<<2)&0x80) | ((i21<<10)&0x8000) | ((i22<<18)&0x800000) | (uint32_t(i23<<26)&0x80000000)) ,
+            int(((i24<<2)&0x80) | ((i25<<10)&0x8000) | ((i26<<18)&0x800000) | (uint32_t(i27<<26)&0x80000000)) ,
+            int(((i28<<2)&0x80) | ((i29<<10)&0x8000) | ((i30<<18)&0x800000) | (uint32_t(i31<<26)&0x80000000)) > ();
 
         t1 = _mm256_blendv_epi8(a, b, mask);  // blend
 
         if (mz != -1) {
             // zero some elements
             const __m256i maskz = constant8i <
-                (i0 <0?0:0xFF) | (i1 <0?0:0xFF00) | (i2 <0?0:0xFF0000) | (i3 <0?0:0xFF000000),
-                (i4 <0?0:0xFF) | (i5 <0?0:0xFF00) | (i6 <0?0:0xFF0000) | (i7 <0?0:0xFF000000),
-                (i8 <0?0:0xFF) | (i9 <0?0:0xFF00) | (i10<0?0:0xFF0000) | (i11<0?0:0xFF000000),
-                (i12<0?0:0xFF) | (i13<0?0:0xFF00) | (i14<0?0:0xFF0000) | (i15<0?0:0xFF000000),
-                (i16<0?0:0xFF) | (i17<0?0:0xFF00) | (i18<0?0:0xFF0000) | (i19<0?0:0xFF000000),
-                (i20<0?0:0xFF) | (i21<0?0:0xFF00) | (i22<0?0:0xFF0000) | (i23<0?0:0xFF000000),
-                (i24<0?0:0xFF) | (i25<0?0:0xFF00) | (i26<0?0:0xFF0000) | (i27<0?0:0xFF000000),
-                (i28<0?0:0xFF) | (i29<0?0:0xFF00) | (i30<0?0:0xFF0000) | (i31<0?0:0xFF000000) > ();
+                int((i0 <0?0:0xFF) | (i1 <0?0:0xFF00) | (i2 <0?0:0xFF0000) | (i3 <0?0:0xFF000000)),
+                int((i4 <0?0:0xFF) | (i5 <0?0:0xFF00) | (i6 <0?0:0xFF0000) | (i7 <0?0:0xFF000000)),
+                int((i8 <0?0:0xFF) | (i9 <0?0:0xFF00) | (i10<0?0:0xFF0000) | (i11<0?0:0xFF000000)),
+                int((i12<0?0:0xFF) | (i13<0?0:0xFF00) | (i14<0?0:0xFF0000) | (i15<0?0:0xFF000000)),
+                int((i16<0?0:0xFF) | (i17<0?0:0xFF00) | (i18<0?0:0xFF0000) | (i19<0?0:0xFF000000)),
+                int((i20<0?0:0xFF) | (i21<0?0:0xFF00) | (i22<0?0:0xFF0000) | (i23<0?0:0xFF000000)),
+                int((i24<0?0:0xFF) | (i25<0?0:0xFF00) | (i26<0?0:0xFF0000) | (i27<0?0:0xFF000000)),
+                int((i28<0?0:0xFF) | (i29<0?0:0xFF00) | (i30<0?0:0xFF0000) | (i31<0?0:0xFF000000)) > ();
             return _mm256_and_si256(t1, maskz);
         }
         return t1;
@@ -4443,14 +4364,14 @@ static inline Vec32c blend32c(Vec32c const & a, Vec32c const & b) {
         if (mz != -1) {
             // zero some elements
             const __m256i maskz = constant8i <
-                (i0 <0?0:0xFF) | (i1 <0?0:0xFF00) | (i2 <0?0:0xFF0000) | (i3 <0?0:0xFF000000),
-                (i4 <0?0:0xFF) | (i5 <0?0:0xFF00) | (i6 <0?0:0xFF0000) | (i7 <0?0:0xFF000000),
-                (i8 <0?0:0xFF) | (i9 <0?0:0xFF00) | (i10<0?0:0xFF0000) | (i11<0?0:0xFF000000),
-                (i12<0?0:0xFF) | (i13<0?0:0xFF00) | (i14<0?0:0xFF0000) | (i15<0?0:0xFF000000),
-                (i16<0?0:0xFF) | (i17<0?0:0xFF00) | (i18<0?0:0xFF0000) | (i19<0?0:0xFF000000),
-                (i20<0?0:0xFF) | (i21<0?0:0xFF00) | (i22<0?0:0xFF0000) | (i23<0?0:0xFF000000),
-                (i24<0?0:0xFF) | (i25<0?0:0xFF00) | (i26<0?0:0xFF0000) | (i27<0?0:0xFF000000),
-                (i28<0?0:0xFF) | (i29<0?0:0xFF00) | (i30<0?0:0xFF0000) | (i31<0?0:0xFF000000) > ();
+                int((i0 <0?0:0xFF) | (i1 <0?0:0xFF00) | (i2 <0?0:0xFF0000) | (i3 <0?0:0xFF000000)),
+                int((i4 <0?0:0xFF) | (i5 <0?0:0xFF00) | (i6 <0?0:0xFF0000) | (i7 <0?0:0xFF000000)),
+                int((i8 <0?0:0xFF) | (i9 <0?0:0xFF00) | (i10<0?0:0xFF0000) | (i11<0?0:0xFF000000)),
+                int((i12<0?0:0xFF) | (i13<0?0:0xFF00) | (i14<0?0:0xFF0000) | (i15<0?0:0xFF000000)),
+                int((i16<0?0:0xFF) | (i17<0?0:0xFF00) | (i18<0?0:0xFF0000) | (i19<0?0:0xFF000000)),
+                int((i20<0?0:0xFF) | (i21<0?0:0xFF00) | (i22<0?0:0xFF0000) | (i23<0?0:0xFF000000)),
+                int((i24<0?0:0xFF) | (i25<0?0:0xFF00) | (i26<0?0:0xFF0000) | (i27<0?0:0xFF000000)),
+                int((i28<0?0:0xFF) | (i29<0?0:0xFF00) | (i30<0?0:0xFF0000) | (i31<0?0:0xFF000000)) > ();
             return _mm256_and_si256(t1, maskz);
         }
         return t1;
@@ -4472,14 +4393,14 @@ static inline Vec32c blend32c(Vec32c const & a, Vec32c const & b) {
         if (mz != -1) {
             // zero some elements
             const __m256i maskz = constant8i <
-                (i0 <0?0:0xFF) | (i1 <0?0:0xFF00) | (i2 <0?0:0xFF0000) | (i3 <0?0:0xFF000000),
-                (i4 <0?0:0xFF) | (i5 <0?0:0xFF00) | (i6 <0?0:0xFF0000) | (i7 <0?0:0xFF000000),
-                (i8 <0?0:0xFF) | (i9 <0?0:0xFF00) | (i10<0?0:0xFF0000) | (i11<0?0:0xFF000000),
-                (i12<0?0:0xFF) | (i13<0?0:0xFF00) | (i14<0?0:0xFF0000) | (i15<0?0:0xFF000000),
-                (i16<0?0:0xFF) | (i17<0?0:0xFF00) | (i18<0?0:0xFF0000) | (i19<0?0:0xFF000000),
-                (i20<0?0:0xFF) | (i21<0?0:0xFF00) | (i22<0?0:0xFF0000) | (i23<0?0:0xFF000000),
-                (i24<0?0:0xFF) | (i25<0?0:0xFF00) | (i26<0?0:0xFF0000) | (i27<0?0:0xFF000000),
-                (i28<0?0:0xFF) | (i29<0?0:0xFF00) | (i30<0?0:0xFF0000) | (i31<0?0:0xFF000000) > ();
+                int((i0 <0?0:0xFF) | (i1 <0?0:0xFF00) | (i2 <0?0:0xFF0000) | (i3 <0?0:0xFF000000)),
+                int((i4 <0?0:0xFF) | (i5 <0?0:0xFF00) | (i6 <0?0:0xFF0000) | (i7 <0?0:0xFF000000)),
+                int((i8 <0?0:0xFF) | (i9 <0?0:0xFF00) | (i10<0?0:0xFF0000) | (i11<0?0:0xFF000000)),
+                int((i12<0?0:0xFF) | (i13<0?0:0xFF00) | (i14<0?0:0xFF0000) | (i15<0?0:0xFF000000)),
+                int((i16<0?0:0xFF) | (i17<0?0:0xFF00) | (i18<0?0:0xFF0000) | (i19<0?0:0xFF000000)),
+                int((i20<0?0:0xFF) | (i21<0?0:0xFF00) | (i22<0?0:0xFF0000) | (i23<0?0:0xFF000000)),
+                int((i24<0?0:0xFF) | (i25<0?0:0xFF00) | (i26<0?0:0xFF0000) | (i27<0?0:0xFF000000)),
+                int((i28<0?0:0xFF) | (i29<0?0:0xFF00) | (i30<0?0:0xFF0000) | (i31<0?0:0xFF000000)) > ();
             return _mm256_and_si256(t1, maskz);
         }
         return t1;
@@ -4515,14 +4436,14 @@ static inline Vec32c blend32c(Vec32c const & a, Vec32c const & b) {
     }
     // no zeroing, need to blend
     mask = constant8i <
-        ((i0 <<2)&0x80) | ((i1 <<10)&0x8000) | ((i2 <<18)&0x800000) | (uint32_t(i3 <<26)&0x80000000) ,
-        ((i4 <<2)&0x80) | ((i5 <<10)&0x8000) | ((i6 <<18)&0x800000) | (uint32_t(i7 <<26)&0x80000000) ,
-        ((i8 <<2)&0x80) | ((i9 <<10)&0x8000) | ((i10<<18)&0x800000) | (uint32_t(i11<<26)&0x80000000) ,
-        ((i12<<2)&0x80) | ((i13<<10)&0x8000) | ((i14<<18)&0x800000) | (uint32_t(i15<<26)&0x80000000) ,
-        ((i16<<2)&0x80) | ((i17<<10)&0x8000) | ((i18<<18)&0x800000) | (uint32_t(i19<<26)&0x80000000) ,
-        ((i20<<2)&0x80) | ((i21<<10)&0x8000) | ((i22<<18)&0x800000) | (uint32_t(i23<<26)&0x80000000) ,
-        ((i24<<2)&0x80) | ((i25<<10)&0x8000) | ((i26<<18)&0x800000) | (uint32_t(i27<<26)&0x80000000) ,
-        ((i28<<2)&0x80) | ((i29<<10)&0x8000) | ((i30<<18)&0x800000) | (uint32_t(i31<<26)&0x80000000) > ();
+        int(((i0 <<2)&0x80) | ((i1 <<10)&0x8000) | ((i2 <<18)&0x800000) | (uint32_t(i3 <<26)&0x80000000)) ,
+        int(((i4 <<2)&0x80) | ((i5 <<10)&0x8000) | ((i6 <<18)&0x800000) | (uint32_t(i7 <<26)&0x80000000)) ,
+        int(((i8 <<2)&0x80) | ((i9 <<10)&0x8000) | ((i10<<18)&0x800000) | (uint32_t(i11<<26)&0x80000000)) ,
+        int(((i12<<2)&0x80) | ((i13<<10)&0x8000) | ((i14<<18)&0x800000) | (uint32_t(i15<<26)&0x80000000)) ,
+        int(((i16<<2)&0x80) | ((i17<<10)&0x8000) | ((i18<<18)&0x800000) | (uint32_t(i19<<26)&0x80000000)) ,
+        int(((i20<<2)&0x80) | ((i21<<10)&0x8000) | ((i22<<18)&0x800000) | (uint32_t(i23<<26)&0x80000000)) ,
+        int(((i24<<2)&0x80) | ((i25<<10)&0x8000) | ((i26<<18)&0x800000) | (uint32_t(i27<<26)&0x80000000)) ,
+        int(((i28<<2)&0x80) | ((i29<<10)&0x8000) | ((i30<<18)&0x800000) | (uint32_t(i31<<26)&0x80000000)) > ();
 
     return _mm256_blendv_epi8(ta, tb, mask);  // blend
 }
diff --git a/vectorclass/vectori256e.h b/vectorclass/vectori256e.h
index 8f3c2b5..71d0ffb 100755
--- a/vectorclass/vectori256e.h
+++ b/vectorclass/vectori256e.h
@@ -276,11 +276,11 @@ class Vec32c : public Vec256b {
 public:
     // Default constructor:
     Vec32c(){
-    };
+    }
     // Constructor to broadcast the same value into all elements:
     Vec32c(int i) {
         y1 = y0 = _mm_set1_epi8((char)i);
-    };
+    }
     // Constructor to build from all elements:
     Vec32c(int8_t i0, int8_t i1, int8_t i2, int8_t i3, int8_t i4, int8_t i5, int8_t i6, int8_t i7,
         int8_t i8, int8_t i9, int8_t i10, int8_t i11, int8_t i12, int8_t i13, int8_t i14, int8_t i15,        
@@ -288,7 +288,7 @@ public:
         int8_t i24, int8_t i25, int8_t i26, int8_t i27, int8_t i28, int8_t i29, int8_t i30, int8_t i31) {
         y0 = _mm_setr_epi8(i0,  i1,  i2,  i3,  i4,  i5,  i6,  i7,  i8,  i9,  i10, i11, i12, i13, i14, i15);
         y1 = _mm_setr_epi8(i16, i17, i18, i19, i20, i21, i22, i23, i24, i25, i26, i27, i28, i29, i30, i31);
-    };
+    }
     // Constructor to build from two Vec16c:
     Vec32c(Vec16c const & a0, Vec16c const & a1) {
         y0 = a0;  y1 = a1;
@@ -297,13 +297,13 @@ public:
     Vec32c(Vec256ie const & x) {
         y0 = x.get_low();
         y1 = x.get_high();
-    };
+    }
     // Assignment operator to convert from type Vec256ie
     Vec32c & operator = (Vec256ie const & x) {
         y0 = x.get_low();
         y1 = x.get_high();
         return *this;
-    };
+    }
     // Member function to load from array (unaligned)
     Vec32c & load(void const * p) {
         y0 = _mm_loadu_si128((__m128i const*)p);
@@ -773,11 +773,11 @@ class Vec32uc : public Vec32c {
 public:
     // Default constructor:
     Vec32uc(){
-    };
+    }
     // Constructor to broadcast the same value into all elements:
     Vec32uc(uint32_t i) {
         y1 = y0 = _mm_set1_epi8((char)i);
-    };
+    }
     // Constructor to build from all elements:
     Vec32uc(uint8_t i0, uint8_t i1, uint8_t i2, uint8_t i3, uint8_t i4, uint8_t i5, uint8_t i6, uint8_t i7,
         uint8_t i8, uint8_t i9, uint8_t i10, uint8_t i11, uint8_t i12, uint8_t i13, uint8_t i14, uint8_t i15,        
@@ -785,7 +785,7 @@ public:
         uint8_t i24, uint8_t i25, uint8_t i26, uint8_t i27, uint8_t i28, uint8_t i29, uint8_t i30, uint8_t i31) {
         y0 = _mm_setr_epi8(i0,  i1,  i2,  i3,  i4,  i5,  i6,  i7,  i8,  i9,  i10, i11, i12, i13, i14, i15);
         y1 = _mm_setr_epi8(i16, i17, i18, i19, i20, i21, i22, i23, i24, i25, i26, i27, i28, i29, i30, i31);
-    };
+    }
     // Constructor to build from two Vec16uc:
     Vec32uc(Vec16uc const & a0, Vec16uc const & a1) {
         y0 = a0;  y1 = a1;
@@ -793,12 +793,12 @@ public:
     // Constructor to convert from type Vec256ie
     Vec32uc(Vec256ie const & x) {
         y0 = x.get_low();  y1 = x.get_high();
-    };
+    }
     // Assignment operator to convert from type Vec256ie
     Vec32uc & operator = (Vec256ie const & x) {
         y0 = x.get_low();  y1 = x.get_high();
         return *this;
-    };
+    }
     // Member function to load from array (unaligned)
     Vec32uc & load(void const * p) {
         y0 = _mm_loadu_si128((__m128i const*)p);
@@ -995,17 +995,17 @@ class Vec16s : public Vec256b {
 public:
     // Default constructor:
     Vec16s() {
-    };
+    }
     // Constructor to broadcast the same value into all elements:
     Vec16s(int i) {
         y1 = y0 = _mm_set1_epi16((int16_t)i);
-    };
+    }
     // Constructor to build from all elements:
     Vec16s(int16_t i0, int16_t i1, int16_t i2,  int16_t i3,  int16_t i4,  int16_t i5,  int16_t i6,  int16_t i7,
            int16_t i8, int16_t i9, int16_t i10, int16_t i11, int16_t i12, int16_t i13, int16_t i14, int16_t i15) {
         y0 = _mm_setr_epi16(i0, i1, i2,  i3,  i4,  i5,  i6,  i7);
         y1 = _mm_setr_epi16(i8, i9, i10, i11, i12, i13, i14, i15);
-    };
+    }
     // Constructor to build from two Vec8s:
     Vec16s(Vec8s const & a0, Vec8s const & a1) {
         y0 = a0;  y1 = a1;
@@ -1013,12 +1013,12 @@ public:
     // Constructor to convert from type Vec256ie
     Vec16s(Vec256ie const & x) {
         y0 = x.get_low();  y1 = x.get_high();
-    };
+    }
     // Assignment operator to convert from type Vec256ie
     Vec16s & operator = (Vec256ie const & x) {
         y0 = x.get_low();  y1 = x.get_high();
         return *this;
-    };
+    }
     // Member function to load from array (unaligned)
     Vec16s & load(void const * p) {
         y0 = _mm_loadu_si128((__m128i const*)p);
@@ -1078,7 +1078,7 @@ public:
             y1 = Vec8s(y1).insert(index-8, value);
         }
         return *this;
-    };
+    }
     // Member function extract a single element from vector
     int16_t extract(uint32_t index) const {
         if (index < 8) {
@@ -1478,17 +1478,17 @@ class Vec16us : public Vec16s {
 public:
     // Default constructor:
     Vec16us(){
-    };
+    }
     // Constructor to broadcast the same value into all elements:
     Vec16us(uint32_t i) {
         y1 = y0 = _mm_set1_epi16((int16_t)i);
-    };
+    }
     // Constructor to build from all elements:
     Vec16us(uint16_t i0, uint16_t i1, uint16_t i2,  uint16_t i3,  uint16_t i4,  uint16_t i5,  uint16_t i6,  uint16_t i7,
             uint16_t i8, uint16_t i9, uint16_t i10, uint16_t i11, uint16_t i12, uint16_t i13, uint16_t i14, uint16_t i15) {
         y0 = _mm_setr_epi16(i0, i1, i2,  i3,  i4,  i5,  i6,  i7);
         y1 = _mm_setr_epi16(i8, i9, i10, i11, i12, i13, i14, i15 );
-    };
+    }
     // Constructor to build from two Vec8us:
     Vec16us(Vec8us const & a0, Vec8us const & a1) {
         y0 = a0;  y1 = a1;
@@ -1496,12 +1496,12 @@ public:
     // Constructor to convert from type Vec256ie
     Vec16us(Vec256ie const & x) {
         y0 = x.get_low();  y1 = x.get_high();
-    };
+    }
     // Assignment operator to convert from type Vec256ie
     Vec16us & operator = (Vec256ie const & x) {
         y0 = x.get_low();  y1 = x.get_high();
         return *this;
-    };
+    }
     // Member function to load from array (unaligned)
     Vec16us & load(void const * p) {
         y0 = _mm_loadu_si128((__m128i const*)p);
@@ -1519,7 +1519,7 @@ public:
     Vec16us const & insert(uint32_t index, uint16_t value) {
         Vec16s::insert(index, value);
         return *this;
-    };
+    }
     // Member function extract a single element from vector
     uint16_t extract(uint32_t index) const {
         return Vec16s::extract(index);
@@ -1855,7 +1855,7 @@ public:
     Vec8ib & insert (int index, bool a) {
         Vec8i::insert(index, -(int)a);
         return *this;
-    };
+    }
     // Member function extract a single element from vector
     // Note: This function is inefficient. Use store function if extracting more than one element
     bool extract(uint32_t index) const {
@@ -2179,16 +2179,16 @@ class Vec8ui : public Vec8i {
 public:
     // Default constructor:
     Vec8ui() {
-    };
+    }
     // Constructor to broadcast the same value into all elements:
     Vec8ui(uint32_t i) {
         y1 = y0 = _mm_set1_epi32(i);
-    };
+    }
     // Constructor to build from all elements:
     Vec8ui(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4, uint32_t i5, uint32_t i6, uint32_t i7) {
         y0 = _mm_setr_epi32(i0, i1, i2, i3);
         y1 = _mm_setr_epi32(i4, i5, i6, i7);
-    };
+    }
     // Constructor to build from two Vec4ui:
     Vec8ui(Vec4ui const & a0, Vec4ui const & a1) {
         y0 = a0;  y1 = a1;
@@ -2196,12 +2196,12 @@ public:
     // Constructor to convert from type Vec256ie
     Vec8ui(Vec256ie const & x) {
         y0 = x.get_low();  y1 = x.get_high();
-    };
+    }
     // Assignment operator to convert from type Vec256ie
     Vec8ui & operator = (Vec256ie const & x) {
         y0 = x.get_low();  y1 = x.get_high();
         return *this;
-    };
+    }
     // Member function to load from array (unaligned)
     Vec8ui & load(void const * p) {
         y0 = _mm_loadu_si128((__m128i const*)p);
@@ -2560,7 +2560,7 @@ public:
     Vec4qb & insert (int index, bool a) {
         Vec4q::insert(index, -(int64_t)a);
         return *this;
-    };    
+    }    
     // Member function extract a single element from vector
     // Note: This function is inefficient. Use store function if extracting more than one element
     bool extract(uint32_t index) const {
@@ -2859,16 +2859,16 @@ class Vec4uq : public Vec4q {
 public:
     // Default constructor:
     Vec4uq() {
-    };
+    }
     // Constructor to broadcast the same value into all elements:
     Vec4uq(uint64_t i) {
         y1 = y0 = Vec2q(i);
-    };
+    }
     // Constructor to build from all elements:
     Vec4uq(uint64_t i0, uint64_t i1, uint64_t i2, uint64_t i3) {
         y0 = Vec2q(i0, i1);
         y1 = Vec2q(i2, i3);
-    };
+    }
     // Constructor to build from two Vec2uq:
     Vec4uq(Vec2uq const & a0, Vec2uq const & a1) {
         y0 = a0;  y1 = a1;
@@ -2876,12 +2876,12 @@ public:
     // Constructor to convert from type Vec256ie
     Vec4uq(Vec256ie const & x) {
         y0 = x.get_low();  y1 = x.get_high();
-    };
+    }
     // Assignment operator to convert from type Vec256ie
     Vec4uq & operator = (Vec256ie const & x) {
         y0 = x.get_low();  y1 = x.get_high();
         return *this;
-    };
+    }
     // Member function to load from array (unaligned)
     Vec4uq & load(void const * p) {
         y0 = _mm_loadu_si128((__m128i const*)p);
diff --git a/vectorclass/vectori512.h b/vectorclass/vectori512.h
index ad8863f..dac51c3 100755
--- a/vectorclass/vectori512.h
+++ b/vectorclass/vectori512.h
@@ -51,13 +51,13 @@
 // Bug fix for missing intrinsics:
 // _mm512_cmpgt_epu32_mask, _mm512_cmpgt_epu64_mask
 // all typecast intrinsics
-// Fix expected in GCC version 4.9.2 but not seen yet https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61878
+// Fix expected in GCC version 4.9.3 or 5.0. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61878
 
 // questionable
 // _mm512_mask_mov_epi32 check select(). Doc at https://software.intel.com/en-us/node/513888 is wrong. Bug report filed
 
 
-#if defined (GCC_VERSION) && GCC_VERSION < 41102 && !defined(__INTEL_COMPILER) && !defined(__clang__)
+#if defined (GCC_VERSION) && GCC_VERSION < 50000 && !defined(__INTEL_COMPILER) && !defined(__clang__)
 
 static inline  __m512i _mm512_castsi256_si512(__m256i x) {
     union {
diff --git a/vectorclass/vectormath_exp.h b/vectorclass/vectormath_exp.h
index 4669b87..465ada8 100755
--- a/vectorclass/vectormath_exp.h
+++ b/vectorclass/vectormath_exp.h
@@ -1,7 +1,7 @@
 /****************************  vectormath_exp.h   ******************************
 * Author:        Agner Fog
 * Date created:  2014-04-18
-* Last modified: 2014-12-18
+* Last modified: 2015-02-10
 * Version:       1.16
 * Project:       vector classes
 * Description:
@@ -114,15 +114,15 @@ static inline Vec16f vm_pow2n (Vec16f const & n) {
 // The limit of abs(x) is defined by max_x below
 // This function does not produce denormals
 // Template parameters:
-// VTYPE: double vector type
-// BTYPE: boolean vector type
+// VTYPE:  double vector type
+// BVTYPE: boolean vector type
 // M1: 0 for exp, 1 for expm1
 // BA: 0 for exp, 1 for 0.5*exp, 2 for pow(2,x), 10 for pow(10,x)
 
 #if 1  // choose method
 
 // Taylor expansion
-template<class VTYPE, class BTYPE, int M1, int BA> 
+template<class VTYPE, class BVTYPE, int M1, int BA> 
 static inline VTYPE exp_d(VTYPE const & initial_x) {    
 
     // Taylor coefficients, 1/n!
@@ -146,8 +146,8 @@ static inline VTYPE exp_d(VTYPE const & initial_x) {
     double max_x;
 
     // data vectors
-    VTYPE x, r, z, n2;
-    BTYPE inrange;                               // boolean vector
+    VTYPE  x, r, z, n2;
+    BVTYPE inrange;                              // boolean vector
 
     if (BA <= 1) { // exp(x)
         max_x = BA == 0 ? 708.39 : 709.7; // lower limit for 0.5*exp(x) is -707.6, but we are using 0.5*exp(x) only for positive x in hyperbolic functions
@@ -217,7 +217,7 @@ static inline VTYPE exp_d(VTYPE const & initial_x) {
 #else
 
 // Pade expansion uses less code and fewer registers, but is slower
-template<class VTYPE, class BTYPE, int M1, int BA> 
+template<class VTYPE, class BVTYPE, int M1, int BA> 
 static inline VTYPE exp_d(VTYPE const & initial_x) {
 
     // define constants
@@ -234,8 +234,8 @@ static inline VTYPE exp_d(VTYPE const & initial_x) {
     const double Q2exp = 2.52448340349684104192E-3;
     const double Q3exp = 3.00198505138664455042E-6;
 
-    VTYPE x, r, xx, px, qx, y, n2;               // data vectors
-    BTYPE inrange;                               // boolean vector
+    VTYPE  x, r, xx, px, qx, y, n2;              // data vectors
+    BVTYPE inrange;                              // boolean vector
 
     x = initial_x;
     r = round(initial_x*log2e);
@@ -349,12 +349,12 @@ static inline Vec8d exp10(Vec8d const & x) {
 // The limit of abs(x) is defined by max_x below
 // This function does not produce denormals
 // Template parameters:
-// VTYPE: float vector type
-// BTYPE: boolean vector type
+// VTYPE:  float vector type
+// BVTYPE: boolean vector type
 // M1: 0 for exp, 1 for expm1
 // BA: 0 for exp, 1 for 0.5*exp, 2 for pow(2,x), 10 for pow(10,x)
 
-template<class VTYPE, class BTYPE, int M1, int BA> 
+template<class VTYPE, class BVTYPE, int M1, int BA> 
 static inline VTYPE exp_f(VTYPE const & initial_x) {
 
     // Taylor coefficients
@@ -365,8 +365,8 @@ static inline VTYPE exp_f(VTYPE const & initial_x) {
     const float P4expf   =  1.f/720.f; 
     const float P5expf   =  1.f/5040.f; 
 
-    VTYPE x, r, x2, z, n2;                       // data vectors        
-    BTYPE inrange;                               // boolean vector
+    VTYPE  x, r, x2, z, n2;                      // data vectors        
+    BVTYPE inrange;                              // boolean vector
 
     // maximum abs(x), value depends on BA, defined below
     // The lower limit of x is slightly more restrictive than the upper limit.
@@ -660,10 +660,10 @@ static inline Vec8d exponent_f(Vec8d const & x) {
 
 // log function, double precision
 // template parameters:
-// VTYPE: f.p. vector type
-// BTYPE: boolean vector type
+// VTYPE:  f.p. vector type
+// BVTYPE: boolean vector type
 // M1: 0 for log, 1 for log1p
-template<class VTYPE, class BTYPE, int M1> 
+template<class VTYPE, class BVTYPE, int M1> 
 static inline VTYPE log_d(VTYPE const & initial_x) {
 
     // define constants
@@ -681,8 +681,8 @@ static inline VTYPE log_d(VTYPE const & initial_x) {
     const double Q3log  =  4.52279145837532221105E1;
     const double Q4log  =  1.12873587189167450590E1;
 
-    VTYPE x1, x, x2, px, qx, res, fe;            // data vectors
-    BTYPE blend, overflow, underflow;            // boolean vectors
+    VTYPE  x1, x, x2, px, qx, res, fe;           // data vectors
+    BVTYPE blend, overflow, underflow;           // boolean vectors
 
     if (M1 == 0) {
         x1 = initial_x;                          // log(x)
@@ -798,12 +798,12 @@ static inline Vec8d log10(Vec8d const & x) {
 
 // log function, single precision
 // template parameters:
-// VTYPE: f.p. vector type
-// ITYPE: integer vector type with same element size
-// BTYPE: boolean vector type
+// VTYPE:  f.p. vector type
+// ITYPE:  integer vector type with same element size
+// BVTYPE: boolean vector type
 // BTYPEI: boolean vector type for ITYPE
 // M1: 0 for log, 1 for log1p
-template<class VTYPE, class ITYPE, class BTYPE, class BTYPEI, int M1> 
+template<class VTYPE, class ITYPE, class BVTYPE, class BTYPEI, int M1> 
 static inline VTYPE log_f(VTYPE const & initial_x) {
 
     // define constants
@@ -819,9 +819,9 @@ static inline VTYPE log_f(VTYPE const & initial_x) {
     const float P7logf  = -1.1514610310E-1f;
     const float P8logf  =  7.0376836292E-2f;
 
-    VTYPE x1, x, res, x2, fe;                    // data vectors
-    ITYPE e;                                     // integer vector
-    BTYPE blend, overflow, underflow;            // boolean vectors
+    VTYPE  x1, x, res, x2, fe;                   // data vectors
+    ITYPE  e;                                    // integer vector
+    BVTYPE blend, overflow, underflow;           // boolean vectors
 
     if (M1 == 0) {
         x1 = initial_x;                          // log(x)
@@ -845,7 +845,7 @@ static inline VTYPE log_f(VTYPE const & initial_x) {
     }
     else {
         // log(x+1). Avoid loss of precision when adding 1 and later subtracting 1 if exponent = 0
-        x = select(BTYPE(e==0), initial_x, x - 1.0f);
+        x = select(BVTYPE(e==0), initial_x, x - 1.0f);
     }
 
     // Taylor expansion
@@ -941,15 +941,15 @@ static inline Vec16f log10(Vec16f const & x) {
 // VTYPE:  f.p. vector type
 // ITYPE:  uint32_t integer vector type with same total number of bits
 // ITYPE2: uint64_t integer vector type with same total number of bits
-// BTYPE:  boolean vector type
+// BVTYPE: boolean vector type
 // CR:     -1 for reciprocal cube root, 1 for cube root, 2 for cube root squared
-template<class VTYPE, class ITYPE, class ITYPE2, class BTYPE, int CR> 
+template<class VTYPE, class ITYPE, class ITYPE2, class BVTYPE, int CR> 
 static inline VTYPE cbrt_d(VTYPE const & x) {
     const int iter = 7;     // iteration count of x^(-1/3) loop
     int i;
     VTYPE  xa, xa3, a, a2;
     ITYPE  m1, m2;
-    BTYPE  underflow;
+    BVTYPE underflow;
     ITYPE2 q1(0x5540000000000000ULL);            // exponent bias
     ITYPE2 q2(0x0005555500000000ULL);            // exponent multiplier for 1/3
     ITYPE2 q3(0x0010000000000000ULL);            // denormal limit
@@ -963,7 +963,7 @@ static inline VTYPE cbrt_d(VTYPE const & x) {
     m1 = reinterpret_i(xa);
     m2 = ITYPE(q1) - (m1 >> 20) * ITYPE(q2);
     a  = reinterpret_d(m2);
-    underflow = BTYPE(ITYPE2(m1) < q3);          // true if denormal or zero
+    underflow = BVTYPE(ITYPE2(m1) < q3);          // true if denormal or zero
 
     // Newton Raphson iteration
     for (i = 0; i < iter-1; i++) {
@@ -1049,16 +1049,16 @@ static inline Vec8d square_cbrt(Vec8d const & x) {
 // template parameters:
 // VTYPE:  f.p. vector type
 // ITYPE:  uint32_t integer vector type
-// BTYPE:  boolean vector type
+// BVTYPE: boolean vector type
 // CR:     -1 for reciprocal cube root, 1 for cube root, 2 for cube root squared
-template<class VTYPE, class ITYPE, class BTYPE, int CR> 
+template<class VTYPE, class ITYPE, class BVTYPE, int CR> 
 static inline VTYPE cbrt_f(VTYPE const & x) {
 
     const int iter = 6;                          // iteration count of x^(-1/3) loop
     int i;
     VTYPE  xa, xa3, a, a2;
     ITYPE  m1, m2;
-    BTYPE  underflow;
+    BVTYPE underflow;
     ITYPE  q1(0x54800000U);                      // exponent bias
     ITYPE  q2(0x002AAAAAU);                      // exponent multiplier for 1/3
     ITYPE  q3(0x00800000U);                      // denormal limit
@@ -1073,7 +1073,7 @@ static inline VTYPE cbrt_f(VTYPE const & x) {
     m2 = q1 - (m1 >> 23) * q2;
     a  = reinterpret_f(m2);
 
-    underflow = BTYPE(m1 < q3);                  // true if denormal or zero
+    underflow = BVTYPE(m1 < q3);                  // true if denormal or zero
 
     // Newton Raphson iteration
     for (i = 0; i < iter-1; i++) {
@@ -1169,8 +1169,8 @@ static inline Vec16f square_cbrt(Vec16f const & x) {
 // Template parameters:
 // VTYPE:  data vector type
 // ITYPE:  signed integer vector type
-// BTYPE:  boolean vector type
-template <class VTYPE, class ITYPE, class BTYPE>
+// BVTYPE: boolean vector type
+template <class VTYPE, class ITYPE, class BVTYPE>
 static inline VTYPE pow_template_d(VTYPE const & x0, VTYPE const & y) {
 
     // define constants
@@ -1217,8 +1217,8 @@ static inline VTYPE pow_template_d(VTYPE const & x0, VTYPE const & y) {
     // integer vectors
     ITYPE ei, ej, yodd;
     // boolean vectors
-    BTYPE blend, xzero, xnegative;
-    BTYPE overflow, underflow, xfinite, yfinite, efinite;
+    BVTYPE blend, xzero, xnegative;
+    BVTYPE overflow, underflow, xfinite, yfinite, efinite;
 
     // remove sign
     x1 = abs(x0);
@@ -1286,8 +1286,8 @@ static inline VTYPE pow_template_d(VTYPE const & x0, VTYPE const & y) {
     // biased exponent of result:
     ej = ei + (ITYPE(reinterpret_i(z)) >> 52);
     // check exponent for overflow and underflow
-    overflow  = BTYPE(ej >= 0x07FF) | (ee >  3000.);
-    underflow = BTYPE(ej <= 0x0000) | (ee < -3000.);
+    overflow  = BVTYPE(ej >= 0x07FF) | (ee >  3000.);
+    underflow = BVTYPE(ej <= 0x0000) | (ee < -3000.);
 
     // add exponent by integer addition
     z = reinterpret_d(ITYPE(reinterpret_i(z)) + (ei << 52));
@@ -1397,9 +1397,9 @@ inline Vec8d pow<float>(Vec8d const & x, float y) {
 // Template parameters:
 // VTYPE:  data vector type
 // ITYPE:  signed integer vector type
-// BTYPE:  boolean vector type
+// BVTYPE: boolean vector type
 // Calculate x to the power of y
-template <class VTYPE, class ITYPE, class BTYPE>
+template <class VTYPE, class ITYPE, class BVTYPE>
 static inline VTYPE pow_template_f(VTYPE const & x0, VTYPE const & y) {
 
     // define constants
@@ -1436,8 +1436,8 @@ static inline VTYPE pow_template_f(VTYPE const & x0, VTYPE const & y) {
     // integer vectors
     ITYPE ei, ej, yodd;
     // boolean vectors
-    BTYPE blend, xzero, xnegative;
-    BTYPE overflow, underflow, xfinite, yfinite, efinite;
+    BVTYPE blend, xzero, xnegative;
+    BVTYPE overflow, underflow, xfinite, yfinite, efinite;
 
     // remove sign
     x1 = abs(x0);
@@ -1501,8 +1501,8 @@ static inline VTYPE pow_template_f(VTYPE const & x0, VTYPE const & y) {
     // biased exponent of result:
     ej = ei + (ITYPE(reinterpret_i(z)) >> 23);
     // check exponent for overflow and underflow
-    overflow  = BTYPE(ej >= 0x0FF) | (ee >  300.f);
-    underflow = BTYPE(ej <= 0x000) | (ee < -300.f);
+    overflow  = BVTYPE(ej >= 0x0FF) | (ee >  300.f);
+    underflow = BVTYPE(ej <= 0x000) | (ee < -300.f);
 
     // add exponent by integer addition
     z = reinterpret_f(ITYPE(reinterpret_i(z)) + (ei << 23)); // the extra 0x10000 is shifted out here
diff --git a/vectorclass/vectormath_hyp.h b/vectorclass/vectormath_hyp.h
index 27ca7a9..948269b 100755
--- a/vectorclass/vectormath_hyp.h
+++ b/vectorclass/vectormath_hyp.h
@@ -1,7 +1,7 @@
 /****************************  vectormath_hyp.h   ******************************
 * Author:        Agner Fog
 * Date created:  2014-07-09
-* Last modified: 2014-10-16
+* Last modified: 2015-02-10
 * Version:       1.16
 * Project:       vector classes
 * Description:
@@ -24,7 +24,7 @@
 *
 * For detailed instructions, see vectormath_common.h and VectorClass.pdf
 *
-* (c) Copyright 2014 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2015 GNU General Public License http://www.gnu.org/licenses
 ******************************************************************************/
 
 #ifndef VECTORMATH_HYP_H
@@ -40,9 +40,9 @@
 // Template for sinh function, double precision
 // This function does not produce denormals
 // Template parameters:
-// VTYPE: double vector type
-// BTYPE: boolean vector type 
-template<class VTYPE, class BTYPE> 
+// VTYPE:  double vector type
+// BVTYPE: boolean vector type 
+template<class VTYPE, class BVTYPE> 
 static inline VTYPE sinh_d(VTYPE const & x0) {    
 // The limit of abs(x) is 709.7, as defined by max_x in vectormath_exp.h for 0.5*exp(x).
 
@@ -58,8 +58,8 @@ static inline VTYPE sinh_d(VTYPE const & x0) {
     const double q3 =  1.0; 
 
     // data vectors
-    VTYPE x, x2, y1, y2;
-    BTYPE x_small;                               // boolean vector
+    VTYPE  x, x2, y1, y2;
+    BVTYPE x_small;                              // boolean vector
 
     x = abs(x0);
     x_small = x <= 1.0;                          // use Pade approximation if abs(x) <= 1
@@ -72,7 +72,7 @@ static inline VTYPE sinh_d(VTYPE const & x0) {
     }
     if (!horizontal_and(x_small)) {
         // At least one element needs big method
-        y2 =  exp_d<VTYPE, BTYPE, 0, 1>(x);      //   0.5 * exp(x)
+        y2 =  exp_d<VTYPE, BVTYPE, 0, 1>(x);     //   0.5 * exp(x)
         y2 -= 0.25 / y2;                         // - 0.5 * exp(-x)
     }
     y1 = select(x_small, y1, y2);                // choose method
@@ -102,9 +102,9 @@ static inline Vec8d sinh(Vec8d const & x) {
 // Template for sinh function, single precision
 // This function does not produce denormals
 // Template parameters:
-// VTYPE: double vector type
-// BTYPE: boolean vector type 
-template<class VTYPE, class BTYPE> 
+// VTYPE:  double vector type
+// BVTYPE: boolean vector type 
+template<class VTYPE, class BVTYPE> 
 static inline VTYPE sinh_f(VTYPE const & x0) {    
 // The limit of abs(x) is 89.0, as defined by max_x in vectormath_exp.h for 0.5*exp(x).
 
@@ -115,7 +115,7 @@ static inline VTYPE sinh_f(VTYPE const & x0) {
 
     // data vectors
     VTYPE x, x2, y1, y2;
-    BTYPE x_small;                               // boolean vector
+    BVTYPE x_small;                              // boolean vector
 
     x = abs(x0);
     x_small = x <= 1.0f;                         // use polynomial approximation if abs(x) <= 1
@@ -128,7 +128,7 @@ static inline VTYPE sinh_f(VTYPE const & x0) {
     }
     if (!horizontal_and(x_small)) {
         // At least one element needs big method
-        y2 =  exp_f<VTYPE, BTYPE, 0, 1>(x);      //   0.5 * exp(x)
+        y2 =  exp_f<VTYPE, BVTYPE, 0, 1>(x);     //   0.5 * exp(x)
         y2 -= 0.25f / y2;                        // - 0.5 * exp(-x)
     }
     y1 = select(x_small, y1, y2);                // choose method
@@ -158,9 +158,9 @@ static inline Vec16f sinh(Vec16f const & x) {
 // Template for cosh function, double precision
 // This function does not produce denormals
 // Template parameters:
-// VTYPE: double vector type
-// BTYPE: boolean vector type 
-template<class VTYPE, class BTYPE> 
+// VTYPE:  double vector type
+// BVTYPE: boolean vector type 
+template<class VTYPE, class BVTYPE> 
 static inline VTYPE cosh_d(VTYPE const & x0) {    
 // The limit of abs(x) is 709.7, as defined by max_x in vectormath_exp.h for 0.5*exp(x).
 
@@ -168,7 +168,7 @@ static inline VTYPE cosh_d(VTYPE const & x0) {
     VTYPE x, y;
 
     x  = abs(x0);
-    y  = exp_d<VTYPE, BTYPE, 0, 1>(x);           //   0.5 * exp(x)
+    y  = exp_d<VTYPE, BVTYPE, 0, 1>(x);          //   0.5 * exp(x)
     y += 0.25 / y;                               // + 0.5 * exp(-x)
     return y;
 }
@@ -194,9 +194,9 @@ static inline Vec8d cosh(Vec8d const & x) {
 // Template for cosh function, single precision
 // This function does not produce denormals
 // Template parameters:
-// VTYPE: double vector type
-// BTYPE: boolean vector type 
-template<class VTYPE, class BTYPE> 
+// VTYPE:  double vector type
+// BVTYPE: boolean vector type 
+template<class VTYPE, class BVTYPE> 
 static inline VTYPE cosh_f(VTYPE const & x0) {    
 // The limit of abs(x) is 89.0, as defined by max_x in vectormath_exp.h for 0.5*exp(x).
 
@@ -204,7 +204,7 @@ static inline VTYPE cosh_f(VTYPE const & x0) {
     VTYPE x, y;
 
     x  = abs(x0);
-    y  = exp_f<VTYPE, BTYPE, 0, 1>(x);           //   0.5 * exp(x)
+    y  = exp_f<VTYPE, BVTYPE, 0, 1>(x);          //   0.5 * exp(x)
     y += 0.25f / y;                              // + 0.5 * exp(-x)
     return y;
 }
@@ -230,9 +230,9 @@ static inline Vec16f cosh(Vec16f const & x) {
 // Template for tanh function, double precision
 // This function does not produce denormals
 // Template parameters:
-// VTYPE: double vector type
-// BTYPE: boolean vector type 
-template<class VTYPE, class BTYPE> 
+// VTYPE:  double vector type
+// BVTYPE: boolean vector type 
+template<class VTYPE, class BVTYPE> 
 static inline VTYPE tanh_d(VTYPE const & x0) {    
 
     // Coefficients
@@ -246,8 +246,8 @@ static inline VTYPE tanh_d(VTYPE const & x0) {
     const double q3 =  1.0; 
 
     // data vectors
-    VTYPE x, x2, y1, y2;
-    BTYPE x_small, x_big;                        // boolean vectors
+    VTYPE  x, x2, y1, y2;
+    BVTYPE x_small, x_big;                       // boolean vectors
 
     x = abs(x0);
     x_small = x <= 0.625;                        // use Pade approximation if abs(x) <= 5/8
@@ -292,9 +292,9 @@ static inline Vec8d tanh(Vec8d const & x) {
 // Template for tanh function, single precision
 // This function does not produce denormals
 // Template parameters:
-// VTYPE: double vector type
-// BTYPE: boolean vector type 
-template<class VTYPE, class BTYPE> 
+// VTYPE:  double vector type
+// BVTYPE: boolean vector type 
+template<class VTYPE, class BVTYPE> 
 static inline VTYPE tanh_f(VTYPE const & x0) {    
 // The limit of abs(x) is 89.0, as defined by max_x in vectormath_exp.h for 0.5*exp(x).
 
@@ -307,7 +307,7 @@ static inline VTYPE tanh_f(VTYPE const & x0) {
 
     // data vectors
     VTYPE x, x2, y1, y2;
-    BTYPE x_small, x_big;                        // boolean vectors
+    BVTYPE x_small, x_big;                       // boolean vectors
 
     x = abs(x0);
     x_small = x <= 0.625f;                       // use polynomial approximation if abs(x) <= 5/8
@@ -357,9 +357,9 @@ static inline Vec16f tanh(Vec16f const & x) {
 // Template for asinh function, double precision
 // This function does not produce denormals
 // Template parameters:
-// VTYPE: double vector type
-// BTYPE: boolean vector type 
-template<class VTYPE, class BTYPE> 
+// VTYPE:  double vector type
+// BVTYPE: boolean vector type 
+template<class VTYPE, class BVTYPE> 
 static inline VTYPE asinh_d(VTYPE const & x0) {    
 
     // Coefficients
@@ -376,8 +376,8 @@ static inline VTYPE asinh_d(VTYPE const & x0) {
     const double q4 =  1.0;
 
     // data vectors
-    VTYPE x, x2, y1, y2;
-    BTYPE x_small, x_huge;                       // boolean vectors
+    VTYPE  x, x2, y1, y2;
+    BVTYPE x_small, x_huge;                      // boolean vectors
 
     x2 = x0 * x0;
     x  = abs(x0);
@@ -425,9 +425,9 @@ static inline Vec8d asinh(Vec8d const & x) {
 // Template for asinh function, single precision
 // This function does not produce denormals
 // Template parameters:
-// VTYPE: double vector type
-// BTYPE: boolean vector type 
-template<class VTYPE, class BTYPE> 
+// VTYPE:  double vector type
+// BVTYPE: boolean vector type 
+template<class VTYPE, class BVTYPE> 
 static inline VTYPE asinh_f(VTYPE const & x0) {    
 
     // Coefficients
@@ -437,8 +437,8 @@ static inline VTYPE asinh_f(VTYPE const & x0) {
     const float r3 =  2.0122003309E-2f;
 
     // data vectors
-    VTYPE x, x2, y1, y2;
-    BTYPE x_small, x_huge;                       // boolean vectors
+    VTYPE  x, x2, y1, y2;
+    BVTYPE x_small, x_huge;                      // boolean vectors
 
     x2 = x0 * x0;
     x  = abs(x0);
@@ -485,9 +485,9 @@ static inline Vec16f asinh(Vec16f const & x) {
 // Template for acosh function, double precision
 // This function does not produce denormals
 // Template parameters:
-// VTYPE: double vector type
-// BTYPE: boolean vector type 
-template<class VTYPE, class BTYPE> 
+// VTYPE:  double vector type
+// BVTYPE: boolean vector type 
+template<class VTYPE, class BVTYPE> 
 static inline VTYPE acosh_d(VTYPE const & x0) {    
 
     // Coefficients
@@ -505,8 +505,8 @@ static inline VTYPE acosh_d(VTYPE const & x0) {
     const double q5 = 1.0;
 
     // data vectors
-    VTYPE x1, y1, y2;
-    BTYPE x_small, x_huge, undef;                // boolean vectors
+    VTYPE  x1, y1, y2;
+    BVTYPE x_small, x_huge, undef;               // boolean vectors
 
     x1      = x0 - 1.0;
     undef   = x0 < 1.0;                          // result is NAN
@@ -552,9 +552,9 @@ static inline Vec8d acosh(Vec8d const & x) {
 // Template for acosh function, single precision
 // This function does not produce denormals
 // Template parameters:
-// VTYPE: double vector type
-// BTYPE: boolean vector type 
-template<class VTYPE, class BTYPE> 
+// VTYPE:  double vector type
+// BVTYPE: boolean vector type 
+template<class VTYPE, class BVTYPE> 
 static inline VTYPE acosh_f(VTYPE const & x0) {    
 
     // Coefficients
@@ -565,8 +565,8 @@ static inline VTYPE acosh_f(VTYPE const & x0) {
     const float r4 =  1.7596881071E-3f;
 
     // data vectors
-    VTYPE x1, y1, y2;
-    BTYPE x_small, x_huge, undef;                // boolean vectors
+    VTYPE  x1, y1, y2;
+    BVTYPE x_small, x_huge, undef;               // boolean vectors
 
     x1      = x0 - 1.0f;
     undef   = x0 < 1.0f;                         // result is NAN
@@ -612,9 +612,9 @@ static inline Vec16f acosh(Vec16f const & x) {
 // Template for atanh function, double precision
 // This function does not produce denormals
 // Template parameters:
-// VTYPE: double vector type
-// BTYPE: boolean vector type 
-template<class VTYPE, class BTYPE> 
+// VTYPE:  double vector type
+// BVTYPE: boolean vector type 
+template<class VTYPE, class BVTYPE> 
 static inline VTYPE atanh_d(VTYPE const & x0) {    
 
     // Coefficients
@@ -632,8 +632,8 @@ static inline VTYPE atanh_d(VTYPE const & x0) {
     const double q5 =  1.0;
 
     // data vectors
-    VTYPE x, x2, y1, y2, y3;
-    BTYPE x_small;                               // boolean vector
+    VTYPE  x, x2, y1, y2, y3;
+    BVTYPE x_small;                              // boolean vector
 
     x  = abs(x0);
     x_small = x < 0.5;                           // use Pade approximation if abs(x) < 0.5
@@ -678,9 +678,9 @@ static inline Vec8d atanh(Vec8d const & x) {
 // Template for atanh function, single precision
 // This function does not produce denormals
 // Template parameters:
-// VTYPE: double vector type
-// BTYPE: boolean vector type 
-template<class VTYPE, class BTYPE> 
+// VTYPE:  double vector type
+// BVTYPE: boolean vector type 
+template<class VTYPE, class BVTYPE> 
 static inline VTYPE atanh_f(VTYPE const & x0) {    
 
     // Coefficients
@@ -691,8 +691,8 @@ static inline VTYPE atanh_f(VTYPE const & x0) {
     const float r4 = 1.81740078349E-1f;
 
     // data vectors
-    VTYPE x, x2, y1, y2, y3;
-    BTYPE x_small;                               // boolean vector
+    VTYPE  x, x2, y1, y2, y3;
+    BVTYPE x_small;                              // boolean vector
 
     x  = abs(x0);
     x_small = x < 0.5f;                          // use polynomial approximation if abs(x) < 0.5
diff --git a/vectorclass/vectormath_trig.h b/vectorclass/vectormath_trig.h
index ecbceaa..986d2e4 100755
--- a/vectorclass/vectormath_trig.h
+++ b/vectorclass/vectormath_trig.h
@@ -1,7 +1,7 @@
 /****************************  vectormath_trig.h   ******************************
 * Author:        Agner Fog
 * Date created:  2014-04-18
-* Last modified: 2014-10-22
+* Last modified: 2015-02-10
 * Version:       1.16
 * Project:       vector classes
 * Description:
@@ -20,7 +20,7 @@
 *
 * For detailed instructions, see vectormath_common.h and VectorClass.pdf
 *
-* (c) Copyright 2014 GNU General Public License http://www.gnu.org/licenses
+* (c) Copyright 2015 GNU General Public License http://www.gnu.org/licenses
 ******************************************************************************/
 
 #ifndef VECTORMATH_TRIG_H
@@ -112,12 +112,12 @@ inline Vec8q vm_half_int_vector_to_full<Vec8q,Vec8i>(Vec8i const & x) {
 // VTYPE:  f.p. vector type
 // ITYPE:  integer vector type with same element size
 // ITYPEH: integer vector type with half the element size
-// BTYPE:  boolean vector type
+// BVTYPE: boolean vector type
 // SC:     1 = sin, 2 = cos, 3 = sincos
 // Paramterers:
 // xx = input x (radians)
 // cosret = return pointer (only if SC = 3)
-template<class VTYPE, class ITYPE, class ITYPEH, class BTYPE, int SC> 
+template<class VTYPE, class ITYPE, class ITYPEH, class BVTYPE, int SC> 
 static inline VTYPE sincos_d(VTYPE * cosret, VTYPE const & xx) {
 
     // define constants
@@ -145,10 +145,10 @@ static inline VTYPE sincos_d(VTYPE * cosret, VTYPE const & xx) {
     const double DP2sc = 3.77489470793079817668E-8;
     const double DP3sc = 2.69515142907905952645E-15;
     */
-    VTYPE xa, x, y, x2, s, c, sin1, cos1;        // data vectors
+    VTYPE  xa, x, y, x2, s, c, sin1, cos1;       // data vectors
     ITYPEH q;                                    // integer vectors, 32 bit
-    ITYPE qq, signsin, signcos;                  // integer vectors, 64 bit
-    BTYPE swap, overflow;                        // boolean vectors
+    ITYPE  qq, signsin, signcos;                 // integer vectors, 64 bit
+    BVTYPE swap, overflow;                       // boolean vectors
 
     xa = abs(xx);
 
@@ -177,7 +177,7 @@ static inline VTYPE sincos_d(VTYPE * cosret, VTYPE const & xx) {
 
     // correct for quadrant
     qq = vm_half_int_vector_to_full<ITYPE,ITYPEH>(q);
-    swap = BTYPE((qq & 2) != 0);
+    swap = BVTYPE((qq & 2) != 0);
 
     // check for overflow
     if (horizontal_or(q < 0)) {
@@ -251,12 +251,12 @@ static inline Vec8d sincos(Vec8d * cosret, Vec8d const & x) {
 // Template parameters:
 // VTYPE:  f.p. vector type
 // ITYPE:  integer vector type with same element size
-// BTYPE:  boolean vector type
+// BVTYPE: boolean vector type
 // SC:     1 = sin, 2 = cos, 3 = sincos, 4 = tan
 // Paramterers:
 // xx = input x (radians)
 // cosret = return pointer (only if SC = 3)
-template<class VTYPE, class ITYPE, class BTYPE, int SC> 
+template<class VTYPE, class ITYPE, class BVTYPE, int SC> 
 static inline VTYPE sincos_f(VTYPE * cosret, VTYPE const & xx) {
 
     // define constants
@@ -274,9 +274,9 @@ static inline VTYPE sincos_f(VTYPE * cosret, VTYPE const & xx) {
     const float P1cosf = -1.388731625493765E-3f;
     const float P2cosf =  2.443315711809948E-5f;
 
-    VTYPE xa, x, y, x2, s, c, sin1, cos1;  // data vectors
-    ITYPE q, signsin, signcos;             // integer vectors
-    BTYPE swap, overflow;                  // boolean vectors
+    VTYPE  xa, x, y, x2, s, c, sin1, cos1;  // data vectors
+    ITYPE  q, signsin, signcos;             // integer vectors
+    BVTYPE swap, overflow;                  // boolean vectors
 
     xa = abs(xx);
 
@@ -303,10 +303,10 @@ static inline VTYPE sincos_f(VTYPE * cosret, VTYPE const & xx) {
     c = polynomial_2(x2, P0cosf, P1cosf, P2cosf) * (x2*x2) + nmul_add(0.5f, x2, 1.0f);
 
     // correct for quadrant
-    swap = BTYPE((q & 2) != 0);
+    swap = BVTYPE((q & 2) != 0);
 
     // check for overflow
-    overflow = BTYPE(q < 0);  // q = 0x80000000 if overflow
+    overflow = BVTYPE(q < 0);  // q = 0x80000000 if overflow
     if (horizontal_or(overflow & is_finite(xa))) {
         s = select(overflow, 0.f, s);
         c = select(overflow, 1.f, c);
@@ -393,10 +393,10 @@ static inline Vec16f tan(Vec16f const & x) {
 // VTYPE:  f.p. vector type
 // ITYPE:  integer vector type with same element size
 // ITYPEH: integer vector type with half the element size
-// BTYPE:  boolean vector type
+// BVTYPE: boolean vector type
 // Paramterers:
 // x = input x (radians)
-template<class VTYPE, class ITYPE, class ITYPEH, class BTYPE> 
+template<class VTYPE, class ITYPE, class ITYPEH, class BVTYPE> 
 static inline VTYPE tan_d(VTYPE const & x) {
 
     // define constants
@@ -415,10 +415,10 @@ static inline VTYPE tan_d(VTYPE const & x) {
     const double Q1tan = 2.50083801823357915839E7;
     const double Q0tan = -5.38695755929454629881E7;
 
-    VTYPE xa, y, z, zz, px, qx, tn, recip;  // data vectors
+    VTYPE  xa, y, z, zz, px, qx, tn, recip; // data vectors
     ITYPEH q;                               // integer vector, 32 bit
-    ITYPE qq;                               // integer vector, 64 bit
-    BTYPE doinvert, xzero, overflow;        // boolean vectors
+    ITYPE  qq;                              // integer vector, 64 bit
+    BVTYPE doinvert, xzero, overflow;       // boolean vectors
 
     xa = abs(x);
 
@@ -447,7 +447,7 @@ static inline VTYPE tan_d(VTYPE const & x) {
 
     // if (q&2) tn = -1/tn
     qq = vm_half_int_vector_to_full<ITYPE,ITYPEH>(q);
-    doinvert = BTYPE((qq & 2) != 0);
+    doinvert = BVTYPE((qq & 2) != 0);
     xzero = (xa == 0.);
     // avoid division by 0. We will not be using recip anyway if xa == 0.
     // tn never becomes exactly 0 when x = pi/2 so we only have to make 
@@ -494,11 +494,11 @@ It is faster to use tan(x) = sin(x)/cos(x)
 // Template parameters:
 // VTYPE:  f.p. vector type
 // ITYPE:  integer vector type with same element size
-// BTYPE:  boolean vector type
+// BVTYPE: boolean vector type
 // Paramterers:
 // x = input x (radians)
 // cosret = return pointer (only if SC = 3)
-template<class VTYPE, class ITYPE, class BTYPE> 
+template<class VTYPE, class ITYPE, class BVTYPE> 
 static inline VTYPE tan_f(VTYPE const & x) {
 
     // define constants
@@ -515,9 +515,9 @@ static inline VTYPE tan_f(VTYPE const & x) {
     const float P1tanf = 1.33387994085E-1f;
     const float P0tanf = 3.33331568548E-1f;
 
-    VTYPE xa, y, z, zz, tn, recip;   // data vectors
-    ITYPE q;                         // integer vector
-    BTYPE doinvert, xzero;           // boolean vectors
+    VTYPE  xa, y, z, zz, tn, recip;  // data vectors
+    ITYPE  q;                        // integer vector
+    BVTYPE doinvert, xzero;          // boolean vectors
 
     xa = abs(x);
 
@@ -569,11 +569,11 @@ static inline Vec8f tan(Vec8f const & x) {
 // *************************************************************
 // Template parameters:
 // VTYPE:  f.p. vector type
-// BTYPE:  boolean vector type
+// BVTYPE: boolean vector type
 // AC: 0 = asin, 1 = acos
 // Paramterers:
 // x = input x
-template<class VTYPE, class BTYPE, int AC> 
+template<class VTYPE, class BVTYPE, int AC> 
 static inline VTYPE asin_d(VTYPE const & x) {
 
     // define constants
@@ -601,9 +601,9 @@ static inline VTYPE asin_d(VTYPE const & x) {
     const double Q1asin =  1.395105614657485689735E2;
     const double Q0asin = -4.918853881490881290097E1;
 
-    VTYPE xa, xb, x1, x2, x3, x4, x5, px, qx, rx, sx, vx, wx, y1, yb, z, z1, z2;
-    BTYPE big;
-    bool dobig, dosmall;
+    VTYPE  xa, xb, x1, x2, x3, x4, x5, px, qx, rx, sx, vx, wx, y1, yb, z, z1, z2;
+    BVTYPE big;
+    bool   dobig, dosmall;
 
     xa  = abs(x);
     big = xa >= 0.625;
@@ -719,11 +719,11 @@ static inline Vec8d acos(Vec8d const & x) {
 // *************************************************************
 // Template parameters:
 // VTYPE:  f.p. vector type
-// BTYPE:  boolean vector type
+// BVTYPE: boolean vector type
 // AC: 0 = asin, 1 = acos
 // Paramterers:
 // x = input x
-template<class VTYPE, class BTYPE, int AC> 
+template<class VTYPE, class BVTYPE, int AC> 
 static inline VTYPE asin_f(VTYPE const & x) {
 
     // define constants
@@ -733,8 +733,8 @@ static inline VTYPE asin_f(VTYPE const & x) {
     const float P1asinf = 7.4953002686E-2f;
     const float P0asinf = 1.6666752422E-1f;
 
-    VTYPE xa, x1, x2, x3, x4, xb, z, z1, z2;
-    BTYPE big;
+    VTYPE  xa, x1, x2, x3, x4, xb, z, z1, z2;
+    BVTYPE big;
 
     xa  = abs(x);
     big = xa > 0.5f;
@@ -802,14 +802,14 @@ static inline Vec16f acos(Vec16f const & x) {
 // *************************************************************
 // Template parameters:
 // VTYPE:  f.p. vector type
-// BTYPE:  boolean vector type
+// BVTYPE: boolean vector type
 // T2:     0 = atan, 1 = atan2
 // Paramterers:
 // y, x. calculate tan(y/x)
 // result is between -pi/2 and +pi/2 when x > 0
 // result is between -pi and -pi/2 or between pi/2 and pi when x < 0 for atan2
 // atan2(0,0) gives NAN. Future versions may give 0
-template<class VTYPE, class BTYPE, int T2> 
+template<class VTYPE, class BVTYPE, int T2> 
 static inline VTYPE atan_d(VTYPE const & y, VTYPE const & x) {
 
     // define constants
@@ -830,8 +830,8 @@ static inline VTYPE atan_d(VTYPE const & y, VTYPE const & x) {
 	const double Q1atan = 4.853903996359136964868E2;
 	const double Q0atan = 1.945506571482613964425E2;
 
-    VTYPE t, x1, x2, y1, y2, s, fac, a, b, z, zz, px, qx, re;  // data vectors
-    BTYPE swapxy, notbig, notsmal;                             // boolean vectors
+    VTYPE  t, x1, x2, y1, y2, s, fac, a, b, z, zz, px, qx, re;  // data vectors
+    BVTYPE swapxy, notbig, notsmal;                             // boolean vectors
 
     if (T2) {  // atan2(y,x)
         // move in first octant
@@ -924,14 +924,14 @@ static inline Vec8d atan(Vec8d const & y) {
 // *************************************************************
 // Template parameters:
 // VTYPE:  f.p. vector type
-// BTYPE:  boolean vector type
+// BVTYPE: boolean vector type
 // T2:     0 = atan, 1 = atan2
 // Paramterers:
 // y, x. calculate tan(y/x)
 // result is between -pi/2 and +pi/2 when x > 0
 // result is between -pi and -pi/2 or between pi/2 and pi when x < 0 for atan2
 // atan2(0,0) gives NAN. Future versions may give 0
-template<class VTYPE, class BTYPE, int T2> 
+template<class VTYPE, class BVTYPE, int T2> 
 static inline VTYPE atan_f(VTYPE const & y, VTYPE const & x) {
 
     // define constants
@@ -940,8 +940,8 @@ static inline VTYPE atan_f(VTYPE const & y, VTYPE const & x) {
     const float P1atanf =  1.99777106478E-1f;
     const float P0atanf = -3.33329491539E-1f;
 
-    VTYPE t, x1, x2, y1, y2, s, a, b, z, zz, re;   // data vectors
-    BTYPE swapxy, notbig, notsmal;                 // boolean vectors
+    VTYPE  t, x1, x2, y1, y2, s, a, b, z, zz, re;  // data vectors
+    BVTYPE swapxy, notbig, notsmal;                // boolean vectors
 
     if (T2) {  // atan2(y,x)
         // move in first octant

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/iqtree.git